In [201]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score

from catboost import CatBoostClassifier

from skopt import BayesSearchCV

from xgboost import XGBClassifier

import pickle

import animalhelper as ah

%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [138]:
!ls data

[31maac_intakes.csv[m[m          df_dog.csv               master12218.csv
[31maac_intakes_outcomes.csv[m[m df_mod.csv               master_df.csv
[31maac_outcomes.csv[m[m         engineered.csv           master_df_1128.csv
all_categoricals.csv     group_shade.csv          master_df_1129.csv
concat_backup.csv        hour_ids.csv             model12218.csv
df_cat.csv               hr_name.csv


In [139]:
df = pd.read_csv('./data/aac_intakes_outcomes.csv')

In [233]:
df2 = pd.read_csv('./data/aac_intakes.csv')

In [235]:
df2 = df2[['animal_id', 'name']]

In [236]:
name_freqs = pickle.load(open('name_freqs.pkl', 'rb'))

In [238]:
def get_name_frequencies(x):
    return name_freqs[x]

In [240]:
df2['name_freqs'] = df2['name'].apply(lambda x: get_name_frequencies(x))

In [242]:
df2 = df2.drop(labels=['name'], axis=1)

In [243]:
df2.head()

Unnamed: 0,animal_id,name_freqs
0,A706918,52
1,A724273,1
2,A665644,0
3,A682524,22
4,A743852,32


In [140]:
df.head()

Unnamed: 0,age_upon_outcome,animal_id_outcome,date_of_birth,outcome_subtype,outcome_type,sex_upon_outcome,age_upon_outcome_(days),age_upon_outcome_(years),age_upon_outcome_age_group,outcome_datetime,...,age_upon_intake_age_group,intake_datetime,intake_month,intake_year,intake_monthyear,intake_weekday,intake_hour,intake_number,time_in_shelter,time_in_shelter_days
0,10 years,A006100,2007-07-09 00:00:00,,Return to Owner,Neutered Male,3650,10.0,"(7.5, 10.0]",2017-12-07 14:07:00,...,"(7.5, 10.0]",2017-12-07 00:00:00,12,2017,2017-12,Thursday,14,1.0,0 days 14:07:00.000000000,0.588194
1,7 years,A006100,2007-07-09 00:00:00,,Return to Owner,Neutered Male,2555,7.0,"(5.0, 7.5]",2014-12-20 16:35:00,...,"(5.0, 7.5]",2014-12-19 10:21:00,12,2014,2014-12,Friday,10,2.0,1 days 06:14:00.000000000,1.259722
2,6 years,A006100,2007-07-09 00:00:00,,Return to Owner,Neutered Male,2190,6.0,"(5.0, 7.5]",2014-03-08 17:10:00,...,"(5.0, 7.5]",2014-03-07 14:26:00,3,2014,2014-03,Friday,14,3.0,1 days 02:44:00.000000000,1.113889
3,10 years,A047759,2004-04-02 00:00:00,Partner,Transfer,Neutered Male,3650,10.0,"(7.5, 10.0]",2014-04-07 15:12:00,...,"(7.5, 10.0]",2014-04-02 15:55:00,4,2014,2014-04,Wednesday,15,1.0,4 days 23:17:00.000000000,4.970139
4,16 years,A134067,1997-10-16 00:00:00,,Return to Owner,Neutered Male,5840,16.0,"(15.0, 17.5]",2013-11-16 11:54:00,...,"(15.0, 17.5]",2013-11-16 09:02:00,11,2013,2013-11,Saturday,9,1.0,0 days 02:52:00.000000000,0.119444


In [141]:
df.columns.values

array(['age_upon_outcome', 'animal_id_outcome', 'date_of_birth',
       'outcome_subtype', 'outcome_type', 'sex_upon_outcome',
       'age_upon_outcome_(days)', 'age_upon_outcome_(years)',
       'age_upon_outcome_age_group', 'outcome_datetime', 'outcome_month',
       'outcome_year', 'outcome_monthyear', 'outcome_weekday',
       'outcome_hour', 'outcome_number', 'dob_year', 'dob_month',
       'dob_monthyear', 'age_upon_intake', 'animal_id_intake',
       'animal_type', 'breed', 'color', 'found_location',
       'intake_condition', 'intake_type', 'sex_upon_intake', 'count',
       'age_upon_intake_(days)', 'age_upon_intake_(years)',
       'age_upon_intake_age_group', 'intake_datetime', 'intake_month',
       'intake_year', 'intake_monthyear', 'intake_weekday', 'intake_hour',
       'intake_number', 'time_in_shelter', 'time_in_shelter_days'],
      dtype=object)

In [142]:
df = df.iloc[:, 18:]

In [143]:
df2 = pd.read_csv('./data/aac_intakes_outcomes.csv')

In [144]:
df = pd.concat([df, df2['outcome_type']], axis=1)

In [145]:
len(df['animal_id_intake'].unique())

71961

In [146]:
df = df.drop_duplicates(subset='animal_id_intake')

In [147]:
df = df.drop(labels=['found_location', 'count', 'age_upon_intake_(years)', 'intake_month', 'intake_year', 
                'intake_monthyear', 'intake_number', 'time_in_shelter'], axis=1)

In [148]:
df = df[(df['animal_type'] == 'Dog') | (df['animal_type'] == 'Cat')]

In [149]:
df['intake_datetime'] = pd.to_datetime(df['intake_datetime'])

In [150]:
df.columns.values

array(['dob_monthyear', 'age_upon_intake', 'animal_id_intake',
       'animal_type', 'breed', 'color', 'intake_condition', 'intake_type',
       'sex_upon_intake', 'age_upon_intake_(days)',
       'age_upon_intake_age_group', 'intake_datetime', 'intake_weekday',
       'intake_hour', 'time_in_shelter_days', 'outcome_type'],
      dtype=object)

In [151]:
df['intake_hour'] = df['intake_datetime'].dt.hour

In [167]:
df = df.drop(labels=['age_upon_intake'], axis=1)

KeyError: "labels ['age_upon_intake'] not contained in axis"

In [168]:
df = df.drop(labels=['dob_monthyear'], axis=1)

## Reduce color to basic colors

In [153]:
df['color'] = df['color'].apply(lambda x: ah.color(x))

## Group breeds

In [154]:
df['breed'] = df['breed'].apply(lambda x: ah.group_dogs(x, False))

In [155]:
df_cat = df[df['animal_type'] == 'Cat']

In [156]:
df_cat['breed'] = df_cat['breed'].apply(lambda x: ah.cat_breed(x, False))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [157]:
df = pd.concat([df, df_cat])

In [158]:
df.columns.values

array(['dob_monthyear', 'animal_id_intake', 'animal_type', 'breed',
       'color', 'intake_condition', 'intake_type', 'sex_upon_intake',
       'age_upon_intake_(days)', 'age_upon_intake_age_group',
       'intake_datetime', 'intake_weekday', 'intake_hour',
       'time_in_shelter_days', 'outcome_type'], dtype=object)

In [159]:
df = df.drop_duplicates(subset='animal_id_intake', keep='last')

In [160]:
df['breed'].value_counts()

short hair       27060
Terrier           9524
Toy               7376
Sporting          6529
Herding           5897
Working           4604
Hound             2996
Non-Sporting      1871
long hair         1243
Terrier & Toy      111
Unknown              1
Name: breed, dtype: int64

In [161]:
def fix_breed(s):
    if s == 'Terrier & Toy':
        return 'Terrier'
    else:
        return s

In [162]:
df['breed'] = df['breed'].apply(lambda x: fix_breed(x))

In [163]:
df['breed'].value_counts()

short hair      27060
Terrier          9635
Toy              7376
Sporting         6529
Herding          5897
Working          4604
Hound            2996
Non-Sporting     1871
long hair        1243
Unknown             1
Name: breed, dtype: int64

In [164]:
df = df[df['breed'] != 'Unknown']

In [169]:
df.head()

Unnamed: 0,animal_id_intake,animal_type,breed,color,intake_condition,intake_type,sex_upon_intake,age_upon_intake_(days),age_upon_intake_age_group,intake_datetime,intake_weekday,intake_hour,time_in_shelter_days,outcome_type
0,A006100,Dog,Sporting,Yellow,Normal,Stray,Neutered Male,3650,"(7.5, 10.0]",2017-12-07 00:00:00,Thursday,0,0.588194,Return to Owner
3,A047759,Dog,Hound,Tricolor,Normal,Owner Surrender,Neutered Male,3650,"(7.5, 10.0]",2014-04-02 15:55:00,Wednesday,15,4.970139,Transfer
4,A134067,Dog,Herding,Brown,Injured,Public Assist,Neutered Male,5840,"(15.0, 17.5]",2013-11-16 09:02:00,Saturday,9,0.119444,Return to Owner
5,A141142,Dog,Sporting,Black,Aged,Stray,Spayed Female,5475,"(12.5, 15.0]",2013-11-16 14:46:00,Saturday,14,0.870833,Return to Owner
6,A163459,Dog,Terrier,Black,Normal,Stray,Intact Female,5475,"(12.5, 15.0]",2014-11-14 15:11:00,Friday,15,0.178472,Return to Owner


## Add in name frequencies

In [246]:
df = df2.merge(df, left_on='animal_id', right_on='animal_id_intake')

In [250]:
df = df.drop_duplicates()

## Remove rare occurences of features and response

In [176]:
df = df[(df['outcome_type'] != 'Rto-Adopt') & (df['outcome_type'] != 'Missing') & (df['outcome_type'] != 'Disposal')]

In [180]:
df = df[df['intake_type'] != 'Euthanasia Request']

In [181]:
df.head()

Unnamed: 0,animal_id_intake,animal_type,breed,color,intake_condition,intake_type,sex_upon_intake,age_upon_intake_(days),age_upon_intake_age_group,intake_datetime,intake_weekday,intake_hour,time_in_shelter_days,outcome_type
0,A006100,Dog,Sporting,Yellow,Normal,Stray,Neutered Male,3650,"(7.5, 10.0]",2017-12-07 00:00:00,Thursday,0,0.588194,Return to Owner
3,A047759,Dog,Hound,Tricolor,Normal,Owner Surrender,Neutered Male,3650,"(7.5, 10.0]",2014-04-02 15:55:00,Wednesday,15,4.970139,Transfer
4,A134067,Dog,Herding,Brown,Injured,Public Assist,Neutered Male,5840,"(15.0, 17.5]",2013-11-16 09:02:00,Saturday,9,0.119444,Return to Owner
5,A141142,Dog,Sporting,Black,Aged,Stray,Spayed Female,5475,"(12.5, 15.0]",2013-11-16 14:46:00,Saturday,14,0.870833,Return to Owner
6,A163459,Dog,Terrier,Black,Normal,Stray,Intact Female,5475,"(12.5, 15.0]",2014-11-14 15:11:00,Friday,15,0.178472,Return to Owner


## Model

In [251]:
df_model = df.copy()

In [252]:
df_model = df_model.drop(labels=['animal_id_intake', 'intake_datetime', 'time_in_shelter_days'], axis=1)

In [254]:
df_model = df_model.drop(labels=['animal_id'], axis=1)

In [255]:
df_model.head()

Unnamed: 0,name_freqs,animal_type,breed,color,intake_condition,intake_type,sex_upon_intake,age_upon_intake_(days),age_upon_intake_age_group,intake_weekday,intake_hour,outcome_type
0,52,Dog,Sporting,White,Normal,Stray,Spayed Female,2920,"(7.5, 10.0]",Sunday,12,Return to Owner
1,1,Dog,Hound,Sable,Normal,Stray,Intact Male,330,"(-0.025, 2.5]",Thursday,18,Return to Owner
2,0,Cat,short hair,Tricolor,Sick,Stray,Intact Female,28,"(-0.025, 2.5]",Monday,7,Transfer
3,22,Dog,Working,Yellow,Normal,Stray,Neutered Male,1460,"(2.5, 5.0]",Sunday,10,Return to Owner
4,32,Dog,Sporting,Brown,Normal,Owner Surrender,Neutered Male,730,"(-0.025, 2.5]",Saturday,12,Return to Owner


In [256]:
def simplify_response(s):
    if s == 'Adoption':
        return s
    else:
        return 'Not Adoption'

In [257]:
df_model['outcome_type'] = df_model['outcome_type'].apply(lambda x: simplify_response(x))

In [258]:
df_model = df_model.dropna()

In [259]:
df_model.head()

Unnamed: 0,name_freqs,animal_type,breed,color,intake_condition,intake_type,sex_upon_intake,age_upon_intake_(days),age_upon_intake_age_group,intake_weekday,intake_hour,outcome_type
0,52,Dog,Sporting,White,Normal,Stray,Spayed Female,2920,"(7.5, 10.0]",Sunday,12,Not Adoption
1,1,Dog,Hound,Sable,Normal,Stray,Intact Male,330,"(-0.025, 2.5]",Thursday,18,Not Adoption
2,0,Cat,short hair,Tricolor,Sick,Stray,Intact Female,28,"(-0.025, 2.5]",Monday,7,Not Adoption
3,22,Dog,Working,Yellow,Normal,Stray,Neutered Male,1460,"(2.5, 5.0]",Sunday,10,Not Adoption
4,32,Dog,Sporting,Brown,Normal,Owner Surrender,Neutered Male,730,"(-0.025, 2.5]",Saturday,12,Not Adoption


In [260]:
X = df_model.iloc[:, :10]

In [261]:
y = df_model.iloc[:, -1]

In [262]:
le = LabelEncoder()
y = le.fit_transform(y)

In [277]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2325, stratify=y)

In [278]:
X_train.columns.values

array(['name_freqs', 'animal_type', 'breed', 'color', 'intake_condition',
       'intake_type', 'sex_upon_intake', 'age_upon_intake_(days)',
       'age_upon_intake_age_group', 'intake_weekday'], dtype=object)

In [279]:
X_train_val, X_test_val, y_train_val, y_test_val = train_test_split(X_train, y_train)

In [280]:
c = CatBoostClassifier(cat_features=[1, 2, 3, 4, 5, 6, 8, 9], depth=8, learning_rate = 0.23,
                   n_estimators = 800, one_hot_max_size=24, od_type = 'IncToDec', od_pval=1e-5,
                       l2_leaf_reg=1, eval_metric='AUC', class_weights=[1.1887, 1])

c.fit(X_train_val, y_train_val, use_best_model=True, eval_set=(X_test_val, y_test_val))

0:	test: 0.7327654	best: 0.7327654 (0)	total: 66.6ms	remaining: 53.2s
1:	test: 0.7569298	best: 0.7569298 (1)	total: 121ms	remaining: 48.4s
2:	test: 0.7588114	best: 0.7588114 (2)	total: 179ms	remaining: 47.4s
3:	test: 0.7610660	best: 0.7610660 (3)	total: 236ms	remaining: 46.9s
4:	test: 0.7619140	best: 0.7619140 (4)	total: 282ms	remaining: 44.8s
5:	test: 0.7643484	best: 0.7643484 (5)	total: 356ms	remaining: 47.2s
6:	test: 0.7662951	best: 0.7662951 (6)	total: 412ms	remaining: 46.7s
7:	test: 0.7661756	best: 0.7662951 (6)	total: 467ms	remaining: 46.3s
8:	test: 0.7669103	best: 0.7669103 (8)	total: 535ms	remaining: 47s
9:	test: 0.7672996	best: 0.7672996 (9)	total: 577ms	remaining: 45.6s
10:	test: 0.7672276	best: 0.7672996 (9)	total: 634ms	remaining: 45.5s
11:	test: 0.7673141	best: 0.7673141 (11)	total: 684ms	remaining: 44.9s
12:	test: 0.7673001	best: 0.7673141 (11)	total: 727ms	remaining: 44s
13:	test: 0.7674208	best: 0.7674208 (13)	total: 778ms	remaining: 43.7s
14:	test: 0.7687889	best: 0.76

117:	test: 0.7737880	best: 0.7738510 (114)	total: 6.86s	remaining: 39.6s
118:	test: 0.7737817	best: 0.7738510 (114)	total: 6.93s	remaining: 39.6s
119:	test: 0.7737713	best: 0.7738510 (114)	total: 6.99s	remaining: 39.6s
120:	test: 0.7737675	best: 0.7738510 (114)	total: 7.04s	remaining: 39.5s
121:	test: 0.7737680	best: 0.7738510 (114)	total: 7.09s	remaining: 39.4s
122:	test: 0.7737694	best: 0.7738510 (114)	total: 7.15s	remaining: 39.3s
123:	test: 0.7737717	best: 0.7738510 (114)	total: 7.19s	remaining: 39.2s
124:	test: 0.7737653	best: 0.7738510 (114)	total: 7.23s	remaining: 39s
125:	test: 0.7737653	best: 0.7738510 (114)	total: 7.29s	remaining: 39s
126:	test: 0.7737613	best: 0.7738510 (114)	total: 7.35s	remaining: 39s
127:	test: 0.7737608	best: 0.7738510 (114)	total: 7.4s	remaining: 38.9s
128:	test: 0.7735387	best: 0.7738510 (114)	total: 7.46s	remaining: 38.8s
129:	test: 0.7735337	best: 0.7738510 (114)	total: 7.51s	remaining: 38.7s
130:	test: 0.7735328	best: 0.7738510 (114)	total: 7.57s	re

234:	test: 0.7733485	best: 0.7738510 (114)	total: 13.1s	remaining: 31.5s
235:	test: 0.7733513	best: 0.7738510 (114)	total: 13.2s	remaining: 31.5s
236:	test: 0.7733619	best: 0.7738510 (114)	total: 13.2s	remaining: 31.4s
237:	test: 0.7733622	best: 0.7738510 (114)	total: 13.3s	remaining: 31.4s
238:	test: 0.7733157	best: 0.7738510 (114)	total: 13.3s	remaining: 31.3s
239:	test: 0.7733176	best: 0.7738510 (114)	total: 13.4s	remaining: 31.2s
240:	test: 0.7733108	best: 0.7738510 (114)	total: 13.4s	remaining: 31.2s
241:	test: 0.7733116	best: 0.7738510 (114)	total: 13.5s	remaining: 31.1s
242:	test: 0.7733095	best: 0.7738510 (114)	total: 13.5s	remaining: 31s
243:	test: 0.7733093	best: 0.7738510 (114)	total: 13.6s	remaining: 31s
244:	test: 0.7733084	best: 0.7738510 (114)	total: 13.6s	remaining: 30.9s
245:	test: 0.7733166	best: 0.7738510 (114)	total: 13.7s	remaining: 30.8s
246:	test: 0.7732930	best: 0.7738510 (114)	total: 13.7s	remaining: 30.8s
247:	test: 0.7732930	best: 0.7738510 (114)	total: 13.8s

350:	test: 0.7735183	best: 0.7738510 (114)	total: 19.1s	remaining: 24.5s
351:	test: 0.7735248	best: 0.7738510 (114)	total: 19.2s	remaining: 24.4s
352:	test: 0.7735221	best: 0.7738510 (114)	total: 19.2s	remaining: 24.4s
353:	test: 0.7735171	best: 0.7738510 (114)	total: 19.3s	remaining: 24.3s
354:	test: 0.7735169	best: 0.7738510 (114)	total: 19.3s	remaining: 24.2s
355:	test: 0.7735163	best: 0.7738510 (114)	total: 19.4s	remaining: 24.2s
356:	test: 0.7735182	best: 0.7738510 (114)	total: 19.4s	remaining: 24.1s
357:	test: 0.7735087	best: 0.7738510 (114)	total: 19.5s	remaining: 24.1s
358:	test: 0.7735116	best: 0.7738510 (114)	total: 19.5s	remaining: 24s
359:	test: 0.7734728	best: 0.7738510 (114)	total: 19.6s	remaining: 24s
360:	test: 0.7734725	best: 0.7738510 (114)	total: 19.7s	remaining: 23.9s
361:	test: 0.7734726	best: 0.7738510 (114)	total: 19.7s	remaining: 23.9s
362:	test: 0.7734494	best: 0.7738510 (114)	total: 19.8s	remaining: 23.8s
363:	test: 0.7734488	best: 0.7738510 (114)	total: 19.8s

466:	test: 0.7731419	best: 0.7738510 (114)	total: 25.3s	remaining: 18s
467:	test: 0.7731427	best: 0.7738510 (114)	total: 25.3s	remaining: 18s
468:	test: 0.7731440	best: 0.7738510 (114)	total: 25.4s	remaining: 17.9s
469:	test: 0.7731131	best: 0.7738510 (114)	total: 25.4s	remaining: 17.8s
470:	test: 0.7731166	best: 0.7738510 (114)	total: 25.5s	remaining: 17.8s
471:	test: 0.7731134	best: 0.7738510 (114)	total: 25.5s	remaining: 17.7s
472:	test: 0.7731134	best: 0.7738510 (114)	total: 25.6s	remaining: 17.7s
473:	test: 0.7731055	best: 0.7738510 (114)	total: 25.6s	remaining: 17.6s
474:	test: 0.7730980	best: 0.7738510 (114)	total: 25.7s	remaining: 17.6s
475:	test: 0.7730956	best: 0.7738510 (114)	total: 25.7s	remaining: 17.5s
476:	test: 0.7730971	best: 0.7738510 (114)	total: 25.8s	remaining: 17.5s
477:	test: 0.7731003	best: 0.7738510 (114)	total: 25.8s	remaining: 17.4s
478:	test: 0.7731011	best: 0.7738510 (114)	total: 25.9s	remaining: 17.4s
479:	test: 0.7730994	best: 0.7738510 (114)	total: 26s	r

579:	test: 0.7731114	best: 0.7738510 (114)	total: 32.6s	remaining: 12.4s
580:	test: 0.7731121	best: 0.7738510 (114)	total: 32.6s	remaining: 12.3s
581:	test: 0.7731142	best: 0.7738510 (114)	total: 32.7s	remaining: 12.2s
582:	test: 0.7731131	best: 0.7738510 (114)	total: 32.8s	remaining: 12.2s
583:	test: 0.7731026	best: 0.7738510 (114)	total: 32.8s	remaining: 12.1s
584:	test: 0.7730969	best: 0.7738510 (114)	total: 32.9s	remaining: 12.1s
585:	test: 0.7730963	best: 0.7738510 (114)	total: 33s	remaining: 12s
586:	test: 0.7730954	best: 0.7738510 (114)	total: 33s	remaining: 12s
587:	test: 0.7730952	best: 0.7738510 (114)	total: 33.1s	remaining: 11.9s
588:	test: 0.7730964	best: 0.7738510 (114)	total: 33.2s	remaining: 11.9s
589:	test: 0.7731052	best: 0.7738510 (114)	total: 33.2s	remaining: 11.8s
590:	test: 0.7731058	best: 0.7738510 (114)	total: 33.3s	remaining: 11.8s
591:	test: 0.7731065	best: 0.7738510 (114)	total: 33.4s	remaining: 11.7s
592:	test: 0.7731038	best: 0.7738510 (114)	total: 33.4s	rem

694:	test: 0.7731972	best: 0.7738510 (114)	total: 40.1s	remaining: 6.06s
695:	test: 0.7731974	best: 0.7738510 (114)	total: 40.2s	remaining: 6s
696:	test: 0.7731936	best: 0.7738510 (114)	total: 40.2s	remaining: 5.95s
697:	test: 0.7731930	best: 0.7738510 (114)	total: 40.3s	remaining: 5.89s
698:	test: 0.7731931	best: 0.7738510 (114)	total: 40.3s	remaining: 5.83s
699:	test: 0.7731964	best: 0.7738510 (114)	total: 40.4s	remaining: 5.77s
700:	test: 0.7731867	best: 0.7738510 (114)	total: 40.5s	remaining: 5.71s
701:	test: 0.7731878	best: 0.7738510 (114)	total: 40.5s	remaining: 5.65s
702:	test: 0.7732902	best: 0.7738510 (114)	total: 40.6s	remaining: 5.6s
703:	test: 0.7732946	best: 0.7738510 (114)	total: 40.6s	remaining: 5.54s
704:	test: 0.7732964	best: 0.7738510 (114)	total: 40.7s	remaining: 5.48s
705:	test: 0.7733001	best: 0.7738510 (114)	total: 40.7s	remaining: 5.42s
706:	test: 0.7732900	best: 0.7738510 (114)	total: 40.8s	remaining: 5.37s
707:	test: 0.7732902	best: 0.7738510 (114)	total: 40.8s

<catboost.core.CatBoostClassifier at 0x13537aa20>

In [281]:
y_preds = c.predict(X_test)

In [282]:
from sklearn.metrics import classification_report

In [283]:
print(classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       0.63      0.75      0.68      7248
           1       0.78      0.66      0.71      9443

   micro avg       0.70      0.70      0.70     16691
   macro avg       0.70      0.71      0.70     16691
weighted avg       0.71      0.70      0.70     16691



In [275]:
pickle.dump(c, open('good_at_adoption.pkl', 'wb'))

## XGBoost