#### Imports

In [16]:
import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from catboost import CatBoostClassifier, Pool 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

#### Read in Dataset

In [2]:
dogs = pd.read_csv('../data/dogs.csv')

#### Feature Selection

In [3]:
dogs = dogs.dropna(subset=['outcome_type', 'intake_sex'])

In [5]:
features = [
    'intake_type',
    'intake_condition',
    'intake_sex',
    'intake_spay_neuter',
    'intake_age_in_years',
    'breed',
    'color',
    'found_city',
]

target = ['outcome_type']

In [7]:
X = dogs[features]
y = dogs[target]

In [8]:
#CatBoost requires the specification of categorical columns. This function gets the list of categorical indicies

def get_categorical_indicies(X):
    cats = []
    for col in X.columns:
        if is_numeric_dtype(X[col]):
            pass
        else:
            cats.append(col)
    cat_indicies = []
    for col in cats:
        cat_indicies.append(X.columns.get_loc(col))
    return cat_indicies

categorical_indicies = get_categorical_indicies(X)

In [9]:
categorical_indicies

[0, 1, 2, 3, 5, 6, 7]

In [10]:
#CatBoost needs the categorical columns to be converted to the categorical datatype

def convert_cats(X):
    cats = []
    for col in X.columns:
        if is_numeric_dtype(X[col]):
            pass
        else:
            cats.append(col)
    cat_indicies = []
    for col in cats:
        X[col] = X[col].astype('category')

convert_cats(X)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')


#### Train, Test, Split and Pooling

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=63)

In [13]:
train_dataset = Pool(X_train, 
                     y_train,
                     cat_features=categorical_indicies
                    )
test_dataset = Pool(X_test, 
                    y_test,
                    cat_features=categorical_indicies
                   )

#### Model Training and Prediction

In [14]:
clf = CatBoostClassifier()

clf.fit(train_dataset)

preds = clf.predict(test_dataset)

print(preds)

Learning rate set to 0.096902
0:	learn: 1.8555478	total: 300ms	remaining: 4m 59s
1:	learn: 1.7098396	total: 578ms	remaining: 4m 48s
2:	learn: 1.6043622	total: 746ms	remaining: 4m 7s
3:	learn: 1.5191823	total: 1.03s	remaining: 4m 17s
4:	learn: 1.4518051	total: 1.33s	remaining: 4m 24s
5:	learn: 1.3920731	total: 1.61s	remaining: 4m 26s
6:	learn: 1.3428217	total: 1.95s	remaining: 4m 36s
7:	learn: 1.3002339	total: 2.28s	remaining: 4m 43s
8:	learn: 1.2639335	total: 2.65s	remaining: 4m 51s
9:	learn: 1.2326635	total: 2.95s	remaining: 4m 51s
10:	learn: 1.2052330	total: 3.26s	remaining: 4m 53s
11:	learn: 1.1816727	total: 3.6s	remaining: 4m 56s
12:	learn: 1.1605156	total: 3.92s	remaining: 4m 57s
13:	learn: 1.1428122	total: 4.22s	remaining: 4m 57s
14:	learn: 1.1262489	total: 4.54s	remaining: 4m 57s
15:	learn: 1.1120943	total: 4.8s	remaining: 4m 55s
16:	learn: 1.0997139	total: 5.14s	remaining: 4m 57s
17:	learn: 1.0878600	total: 5.52s	remaining: 5m 1s
18:	learn: 1.0774542	total: 5.84s	remaining: 5m 

157:	learn: 0.9492298	total: 50.2s	remaining: 4m 27s
158:	learn: 0.9489842	total: 50.5s	remaining: 4m 27s
159:	learn: 0.9487259	total: 50.9s	remaining: 4m 27s
160:	learn: 0.9485288	total: 51.2s	remaining: 4m 26s
161:	learn: 0.9483082	total: 51.5s	remaining: 4m 26s
162:	learn: 0.9481015	total: 51.9s	remaining: 4m 26s
163:	learn: 0.9477940	total: 52.2s	remaining: 4m 25s
164:	learn: 0.9475788	total: 52.5s	remaining: 4m 25s
165:	learn: 0.9473954	total: 52.8s	remaining: 4m 25s
166:	learn: 0.9473738	total: 53.1s	remaining: 4m 25s
167:	learn: 0.9472477	total: 53.4s	remaining: 4m 24s
168:	learn: 0.9471603	total: 53.7s	remaining: 4m 24s
169:	learn: 0.9469831	total: 54s	remaining: 4m 23s
170:	learn: 0.9468068	total: 54.3s	remaining: 4m 23s
171:	learn: 0.9463813	total: 54.6s	remaining: 4m 23s
172:	learn: 0.9462019	total: 54.9s	remaining: 4m 22s
173:	learn: 0.9459814	total: 55.3s	remaining: 4m 22s
174:	learn: 0.9457054	total: 55.6s	remaining: 4m 21s
175:	learn: 0.9454902	total: 55.9s	remaining: 4m

311:	learn: 0.9263634	total: 1m 38s	remaining: 3m 37s
312:	learn: 0.9262347	total: 1m 39s	remaining: 3m 37s
313:	learn: 0.9260323	total: 1m 39s	remaining: 3m 37s
314:	learn: 0.9259210	total: 1m 39s	remaining: 3m 36s
315:	learn: 0.9258148	total: 1m 39s	remaining: 3m 36s
316:	learn: 0.9256420	total: 1m 40s	remaining: 3m 35s
317:	learn: 0.9254616	total: 1m 40s	remaining: 3m 35s
318:	learn: 0.9253652	total: 1m 40s	remaining: 3m 35s
319:	learn: 0.9252669	total: 1m 41s	remaining: 3m 34s
320:	learn: 0.9250965	total: 1m 41s	remaining: 3m 34s
321:	learn: 0.9249672	total: 1m 41s	remaining: 3m 34s
322:	learn: 0.9247865	total: 1m 42s	remaining: 3m 34s
323:	learn: 0.9247249	total: 1m 42s	remaining: 3m 33s
324:	learn: 0.9246502	total: 1m 42s	remaining: 3m 33s
325:	learn: 0.9245886	total: 1m 43s	remaining: 3m 33s
326:	learn: 0.9243364	total: 1m 43s	remaining: 3m 32s
327:	learn: 0.9242738	total: 1m 43s	remaining: 3m 32s
328:	learn: 0.9240767	total: 1m 44s	remaining: 3m 32s
329:	learn: 0.9238269	total:

465:	learn: 0.9074469	total: 2m 23s	remaining: 2m 44s
466:	learn: 0.9073483	total: 2m 23s	remaining: 2m 44s
467:	learn: 0.9072848	total: 2m 24s	remaining: 2m 43s
468:	learn: 0.9071948	total: 2m 24s	remaining: 2m 43s
469:	learn: 0.9070726	total: 2m 24s	remaining: 2m 43s
470:	learn: 0.9069571	total: 2m 24s	remaining: 2m 42s
471:	learn: 0.9069241	total: 2m 25s	remaining: 2m 42s
472:	learn: 0.9068221	total: 2m 25s	remaining: 2m 42s
473:	learn: 0.9067500	total: 2m 25s	remaining: 2m 41s
474:	learn: 0.9065695	total: 2m 26s	remaining: 2m 41s
475:	learn: 0.9064932	total: 2m 26s	remaining: 2m 41s
476:	learn: 0.9064529	total: 2m 26s	remaining: 2m 40s
477:	learn: 0.9063818	total: 2m 26s	remaining: 2m 40s
478:	learn: 0.9062387	total: 2m 27s	remaining: 2m 40s
479:	learn: 0.9060529	total: 2m 27s	remaining: 2m 39s
480:	learn: 0.9059633	total: 2m 27s	remaining: 2m 39s
481:	learn: 0.9059002	total: 2m 28s	remaining: 2m 39s
482:	learn: 0.9058231	total: 2m 28s	remaining: 2m 38s
483:	learn: 0.9057429	total:

619:	learn: 0.8904352	total: 3m 8s	remaining: 1m 55s
620:	learn: 0.8903024	total: 3m 8s	remaining: 1m 54s
621:	learn: 0.8901965	total: 3m 8s	remaining: 1m 54s
622:	learn: 0.8901366	total: 3m 8s	remaining: 1m 54s
623:	learn: 0.8899845	total: 3m 9s	remaining: 1m 54s
624:	learn: 0.8899032	total: 3m 9s	remaining: 1m 53s
625:	learn: 0.8898359	total: 3m 9s	remaining: 1m 53s
626:	learn: 0.8897681	total: 3m 10s	remaining: 1m 53s
627:	learn: 0.8896235	total: 3m 10s	remaining: 1m 52s
628:	learn: 0.8895224	total: 3m 10s	remaining: 1m 52s
629:	learn: 0.8894367	total: 3m 11s	remaining: 1m 52s
630:	learn: 0.8892381	total: 3m 11s	remaining: 1m 51s
631:	learn: 0.8890796	total: 3m 11s	remaining: 1m 51s
632:	learn: 0.8889603	total: 3m 11s	remaining: 1m 51s
633:	learn: 0.8888871	total: 3m 12s	remaining: 1m 50s
634:	learn: 0.8887412	total: 3m 12s	remaining: 1m 50s
635:	learn: 0.8885738	total: 3m 12s	remaining: 1m 50s
636:	learn: 0.8884736	total: 3m 13s	remaining: 1m 50s
637:	learn: 0.8883304	total: 3m 13s

771:	learn: 0.8733597	total: 3m 51s	remaining: 1m 8s
772:	learn: 0.8733006	total: 3m 52s	remaining: 1m 8s
773:	learn: 0.8731343	total: 3m 52s	remaining: 1m 7s
774:	learn: 0.8729792	total: 3m 52s	remaining: 1m 7s
775:	learn: 0.8728958	total: 3m 52s	remaining: 1m 7s
776:	learn: 0.8726430	total: 3m 53s	remaining: 1m 6s
777:	learn: 0.8725010	total: 3m 53s	remaining: 1m 6s
778:	learn: 0.8723677	total: 3m 53s	remaining: 1m 6s
779:	learn: 0.8723214	total: 3m 54s	remaining: 1m 6s
780:	learn: 0.8722744	total: 3m 54s	remaining: 1m 5s
781:	learn: 0.8721754	total: 3m 54s	remaining: 1m 5s
782:	learn: 0.8721178	total: 3m 54s	remaining: 1m 5s
783:	learn: 0.8720526	total: 3m 55s	remaining: 1m 4s
784:	learn: 0.8719260	total: 3m 55s	remaining: 1m 4s
785:	learn: 0.8718306	total: 3m 55s	remaining: 1m 4s
786:	learn: 0.8717087	total: 3m 56s	remaining: 1m 3s
787:	learn: 0.8716431	total: 3m 56s	remaining: 1m 3s
788:	learn: 0.8715482	total: 3m 56s	remaining: 1m 3s
789:	learn: 0.8714710	total: 3m 56s	remaining:

928:	learn: 0.8576814	total: 4m 36s	remaining: 21.1s
929:	learn: 0.8575595	total: 4m 36s	remaining: 20.8s
930:	learn: 0.8575075	total: 4m 37s	remaining: 20.5s
931:	learn: 0.8574650	total: 4m 37s	remaining: 20.2s
932:	learn: 0.8573146	total: 4m 37s	remaining: 19.9s
933:	learn: 0.8572767	total: 4m 37s	remaining: 19.6s
934:	learn: 0.8571728	total: 4m 38s	remaining: 19.3s
935:	learn: 0.8570433	total: 4m 38s	remaining: 19s
936:	learn: 0.8568915	total: 4m 38s	remaining: 18.8s
937:	learn: 0.8567705	total: 4m 39s	remaining: 18.5s
938:	learn: 0.8566190	total: 4m 39s	remaining: 18.2s
939:	learn: 0.8565223	total: 4m 39s	remaining: 17.9s
940:	learn: 0.8563639	total: 4m 40s	remaining: 17.6s
941:	learn: 0.8563109	total: 4m 40s	remaining: 17.3s
942:	learn: 0.8562102	total: 4m 40s	remaining: 17s
943:	learn: 0.8561345	total: 4m 40s	remaining: 16.7s
944:	learn: 0.8560119	total: 4m 41s	remaining: 16.4s
945:	learn: 0.8559486	total: 4m 41s	remaining: 16.1s
946:	learn: 0.8559093	total: 4m 41s	remaining: 15.

#### Model Performance

In [17]:
print(classification_report(y_test, preds))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                 precision    recall  f1-score   support

       Adoption       0.62      0.82      0.71      8453
           Died       0.50      0.02      0.03        61
       Disposal       0.00      0.00      0.00         9
     Euthanasia       0.51      0.13      0.21       444
        Missing       0.00      0.00      0.00        10
Return to Owner       0.63      0.67      0.65      4847
      Rto-Adopt       0.00      0.00      0.00       140
       Transfer       0.60      0.18      0.28      3762

       accuracy                           0.62     17726
      macro avg       0.36      0.23      0.23     17726
   weighted avg       0.61      0.62      0.58     17726



  _warn_prf(average, modifier, msg_start, len(result))
