# Score: 11.92998

### Imports

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
%matplotlib inline
sns.set()

### Plotting helper function

In [2]:
def pretty_bar(data, xlabel=None, ylabel=None, title=None):
    plt.figure(figsize=(15,12))
    sns.barplot(data.values, data.index)
    for i, v in enumerate(data.values):
        plt.text(0.8,i,v,color='k',fontsize=19)
    ylabel != None and plt.ylabel(ylabel, fontsize=12)
    xlabel != None and plt.xlabel(xlabel, fontsize=12)
    title != None and plt.title(title, fontsize=16)
    plt.show()

### Load data

In [3]:
dateparse = lambda x: pd.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")

train = pd.read_csv("../../data/shelter/train.csv", parse_dates=['DateTime'], date_parser=dateparse)
test  = pd.read_csv("../../data/shelter/test.csv", parse_dates=['DateTime'], date_parser=dateparse)

print("train shape:", train.shape)
print("test shape:", test.shape)

print("Earliest train time:", min(train.DateTime), "- Latest train time:", max(train.DateTime))
print("Earliest test time:",  min(test.DateTime),  "- Latest test time:",  max(test.DateTime))

train shape: (26729, 10)
test shape: (11456, 8)
Earliest train time: 2013-10-01 09:31:00 - Latest train time: 2016-02-21 19:17:00
Earliest test time: 2013-10-01 10:44:00 - Latest test time: 2016-02-21 18:37:00


### First few rows of the train data

- I wonder why the name is included?  Is there anything we can do with it?  I don't think so, but missing names might indicate something (going off 5 data points, NaN is correlated with transfers)

- What use can I make out of OutcomeSubtype?  I wonder if the model would learn the subtypes easier, then I could map those to the OutcomeType

In [4]:
train.head()

Unnamed: 0,AnimalID,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
0,A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,,Dog,Neutered Male,1 year,Shetland Sheepdog Mix,Brown/White
1,A656520,Emily,2013-10-13 12:44:00,Euthanasia,Suffering,Cat,Spayed Female,1 year,Domestic Shorthair Mix,Cream Tabby
2,A686464,Pearce,2015-01-31 12:28:00,Adoption,Foster,Dog,Neutered Male,2 years,Pit Bull Mix,Blue/White
3,A683430,,2014-07-11 19:09:00,Transfer,Partner,Cat,Intact Male,3 weeks,Domestic Shorthair Mix,Blue Cream
4,A667013,,2013-11-15 12:52:00,Transfer,Partner,Dog,Neutered Male,2 years,Lhasa Apso/Miniature Poodle,Tan


### First few rows of the test data

In [5]:
test.head()

Unnamed: 0,ID,Name,DateTime,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
0,1,Summer,2015-10-12 12:15:00,Dog,Intact Female,10 months,Labrador Retriever Mix,Red/White
1,2,Cheyenne,2014-07-26 17:59:00,Dog,Spayed Female,2 years,German Shepherd/Siberian Husky,Black/Tan
2,3,Gus,2016-01-13 12:20:00,Cat,Neutered Male,1 year,Domestic Shorthair Mix,Brown Tabby
3,4,Pongo,2013-12-28 18:12:00,Dog,Intact Male,4 months,Collie Smooth Mix,Tricolor
4,5,Skooter,2015-09-24 17:59:00,Dog,Neutered Male,2 years,Miniature Poodle Mix,White


### Missing data

In [6]:
pd.DataFrame({
    "Train Missing": train.isnull().sum().astype(int),
    "Test Missing": test.isnull().sum().astype(int),
})

Unnamed: 0,Test Missing,Train Missing
AgeuponOutcome,6.0,18.0
AnimalID,,0.0
AnimalType,0.0,0.0
Breed,0.0,0.0
Color,0.0,0.0
DateTime,0.0,0.0
ID,0.0,
Name,3225.0,7691.0
OutcomeSubtype,,13612.0
OutcomeType,,0.0


### Is it possible to guess the Sex?

Not really, you can't guess a sex and whether a pet is neutered.  Since there's one missing value it doesn't make since to replace all NaN values with None.

I'm just going to drop this as an outlier

In [7]:
train[train.SexuponOutcome.isnull()]

Unnamed: 0,AnimalID,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
3174,A667395,Diego,2013-11-27 16:11:00,Return_to_owner,,Dog,,7 years,Dachshund,Brown Merle


In [8]:
train = train.drop(train[train.SexuponOutcome.isnull()].index)

### Merge train and test data

In [9]:
full = pd.concat([train, test])
train_N = len(train)

### Time

In [10]:
full["weekday"] = full.DateTime.map(lambda x: x.weekday())
full["is_weekend"] = full.weekday.map(lambda x: int(x in [5, 6]))
full["month"] = full.DateTime.map(lambda x: x.month)
full["hour"] = full.DateTime.map(lambda x: x.hour)
print(full.OutcomeType.unique())

['Return_to_owner' 'Euthanasia' 'Adoption' 'Transfer' 'Died' nan]


### Create column: time of day

In [11]:
full["night"]   = full.hour.map(lambda x: int(x > 5  and x < 11))
full["morning"] = full.hour.map(lambda x: int(x > 10 and x < 16))
full["midday"]  = full.hour.map(lambda x: int(x > 15 and x < 20))
full["lateday"] = full.hour.map(lambda x: int(x > 19 or  x < 6))
full.head(5)

Unnamed: 0,AgeuponOutcome,AnimalID,AnimalType,Breed,Color,DateTime,ID,Name,OutcomeSubtype,OutcomeType,SexuponOutcome,weekday,is_weekend,month,hour,night,morning,midday,lateday
0,1 year,A671945,Dog,Shetland Sheepdog Mix,Brown/White,2014-02-12 18:22:00,,Hambone,,Return_to_owner,Neutered Male,2,0,2,18,0,0,1,0
1,1 year,A656520,Cat,Domestic Shorthair Mix,Cream Tabby,2013-10-13 12:44:00,,Emily,Suffering,Euthanasia,Spayed Female,6,1,10,12,0,1,0,0
2,2 years,A686464,Dog,Pit Bull Mix,Blue/White,2015-01-31 12:28:00,,Pearce,Foster,Adoption,Neutered Male,5,1,1,12,0,1,0,0
3,3 weeks,A683430,Cat,Domestic Shorthair Mix,Blue Cream,2014-07-11 19:09:00,,,Partner,Transfer,Intact Male,4,0,7,19,0,0,1,0
4,2 years,A667013,Dog,Lhasa Apso/Miniature Poodle,Tan,2013-11-15 12:52:00,,,Partner,Transfer,Neutered Male,4,0,11,12,0,1,0,0


### Create columns sex and is_netured

In [12]:
def get_neut(x):
    if x is np.nan:
        return np.nan
    elif x == "Unknown":
        return 2
    return int("Neutered" in x or "Sprayed" in x)

def get_sex(x):
    if x is np.nan:
        return np.nan
    elif x == "Unknown":
        return 2
    return int("Male" in x)

full["Sex"] = full.SexuponOutcome.map(get_sex)
full["isNetured"] = full.SexuponOutcome.map(get_neut)

full = full.drop(["SexuponOutcome"], axis=1)

### AgeuponOutcome

In [13]:
def format_age(x):
    if x is np.nan:
        return None
    
    num, scale = x.split(" ")
    if scale in ["day", "days"]:
        return int(num)
    elif scale in ["week", "weeks"]:
        return 7 * int(num)
    elif scale in ["month", "months"]:
        return 30 * int(num)
    elif scale in ["year", "years"]:
        return 365 * int(num)

# This may seem like double variables, but people are weird and may think 100 weeks is a lot younger than 2 years
def human_age(x, timescale):
    if x is np.nan:
        return 0
    num, scale = x.split(" ")
    if scale in [timescale, timescale+"s"]:
        return int(num)
    return 0

full["age_year"]  = full.AgeuponOutcome.map(lambda x: human_age(x, "year"))
full["age_month"] = full.AgeuponOutcome.map(lambda x: human_age(x, "month"))
full["age_week"]  = full.AgeuponOutcome.map(lambda x: human_age(x, "week"))
full["age_day"]   = full.AgeuponOutcome.map(lambda x: human_age(x, "day"))
full.AgeuponOutcome = full.AgeuponOutcome.map(format_age)

### Fill in AgeuponOutcome values

In [14]:
full.AgeuponOutcome.fillna(
    full.groupby("Breed")["AgeuponOutcome"].transform("median"), inplace=True)

### Create column: is_baby

In [15]:
full["is_baby"] = full.AgeuponOutcome.map(lambda x: int(x < 365))

### AnimalType mapping

In [16]:
full.AnimalType = full.AnimalType.map({"Dog": 0, "Cat": 1})

## Breed

### Hair type and isMix

Looks at the breed and determines whether the breed is a mix or determine the length of its hair

In [17]:
shorthair_map  = lambda x: 1 if "Shorthair" in x else 0
mediumhair_map = lambda x: 1 if "Medium Hair" in x else 0
longhair_map   = lambda x: 1 if "Longhair" in x else 0
mix_map        = lambda x: 1 if "Mix" in x else 0

full["Shorthair"]  = full.Breed.map(shorthair_map)
full["Mediumhair"] = full.Breed.map(mediumhair_map)
full["Longhair"]   = full.Breed.map(longhair_map)
full["Mix"]        = full.Breed.map(mix_map)

### Create column first breed

In [18]:
full["first_breed"] = full.Breed.map(lambda x: x.split("/")[0].replace(" Mix", ""))
full.head(3)

Unnamed: 0,AgeuponOutcome,AnimalID,AnimalType,Breed,Color,DateTime,ID,Name,OutcomeSubtype,OutcomeType,...,age_year,age_month,age_week,age_day,is_baby,Shorthair,Mediumhair,Longhair,Mix,first_breed
0,365.0,A671945,0,Shetland Sheepdog Mix,Brown/White,2014-02-12 18:22:00,,Hambone,,Return_to_owner,...,1,0,0,0,0,0,0,0,1,Shetland Sheepdog
1,365.0,A656520,1,Domestic Shorthair Mix,Cream Tabby,2013-10-13 12:44:00,,Emily,Suffering,Euthanasia,...,1,0,0,0,0,1,0,0,1,Domestic Shorthair
2,730.0,A686464,0,Pit Bull Mix,Blue/White,2015-01-31 12:28:00,,Pearce,Foster,Adoption,...,2,0,0,0,0,0,0,0,1,Pit Bull


### Breed mapping

In [19]:
def get_col_map(df, col, cutoff):
    val_counts = df[col].value_counts()
    ret = dict()
    counter = 0
    
    for index in val_counts.index:
        count = val_counts[index]
        if count > cutoff:
            ret[index] = counter
            counter += 1
        else:
            ret[index] = counter
    return ret

breed_map = get_col_map(full, "Breed", 200)
first_breed_map = get_col_map(full, "first_breed", 200)

full.Breed       = full.Breed.map(breed_map)
full.first_breed = full.first_breed.map(first_breed_map)

### Color mapping

In [20]:
blue_map  = lambda x: 1 if "Blue"      in x else 0
black_map = lambda x: 1 if "White"     in x else 0
white_map = lambda x: 1 if "White"     in x else 0
brown_map = lambda x: 1 if "Brown"     in x else 0
tabby_map = lambda x: 1 if "Tabby"     in x else 0
tan_map   = lambda x: 1 if "Tan"       in x else 0
red_map   = lambda x: 1 if "Red"       in x else 0
choc_map  = lambda x: 1 if "Chocolate" in x else 0


full["Blue"]  = full.Color.map(blue_map)
full["Black"] = full.Color.map(black_map)
full["White"] = full.Color.map(white_map)
full["Brown"] = full.Color.map(brown_map)
full["Tabby"] = full.Color.map(tabby_map)
full["Tan"]   = full.Color.map(tan_map)
full["Red"]   = full.Color.map(red_map)
full["Choc"]  = full.Color.map(choc_map)

In [21]:
color_map = get_col_map(full, "Color", 200)

full.Color = full.Color.map(color_map)

### Name

I'm wondering if a missing name is indicative of anything?  Maybe the name is missing because the dog was transfered quickly or spent a little time at the shelter.  Also some outcome's may rely on having documentation of the name

In [22]:
name_map = lambda x: 0 if x is np.nan else 1

full.Name = full.Name.map(name_map)

### Split full back into train and test

In [23]:
train = full[:train_N]
test = full[train_N:]
full.head()

Unnamed: 0,AgeuponOutcome,AnimalID,AnimalType,Breed,Color,DateTime,ID,Name,OutcomeSubtype,OutcomeType,...,Mix,first_breed,Blue,Black,White,Brown,Tabby,Tan,Red,Choc
0,365.0,A671945,0,19,5,2014-02-12 18:22:00,,1,,Return_to_owner,...,1,26,0,1,1,1,0,0,0,0
1,365.0,A656520,1,0,30,2013-10-13 12:44:00,,1,Suffering,Euthanasia,...,1,0,0,0,0,0,1,0,0,0
2,730.0,A686464,0,1,9,2015-01-31 12:28:00,,1,Foster,Adoption,...,1,1,1,1,1,0,0,0,0,0
3,21.0,A683430,1,0,39,2014-07-11 19:09:00,,0,Partner,Transfer,...,1,0,1,0,0,0,0,0,0,0
4,730.0,A667013,0,19,12,2013-11-15 12:52:00,,0,Partner,Transfer,...,0,26,0,0,0,0,0,1,0,0


### Separate target from predictors

In [25]:
train_y = train.OutcomeType.map({"Adoption": 0, "Transfer": 1, "Return_to_owner": 2, "Euthanasia": 3, "Died": 4})
train_x = train.drop(["DateTime", "OutcomeType", "OutcomeSubtype", "AnimalID", "ID"], axis=1)

test_x = test.drop(["DateTime", "OutcomeType", "OutcomeSubtype", "AnimalID", "ID"], axis=1)

## Construct the model

Let's try out a few models and see which works best through cross validation

In [26]:
# Massive amounts of model imports
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, StratifiedKFold, learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Imputer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

kfold = StratifiedKFold()

def score_model(model):
    score = cross_val_score(model, train_x, train_y, cv=kfold, n_jobs=1, scoring="accuracy")
    print(score)
    print("\nAverage is ...")
    print(sum(score) / len(score))

 ---

## Extra Trees Classifier

### Parameter Search

Tried

{"max_depth": [None],
  "max_features": [1, 3, 10],
  "min_samples_split": [2, 3, 10],
  "min_samples_leaf": [1, 3, 10],
  "bootstrap": [False],
  "n_estimators" :[100,300],
  "criterion": ["gini"]}

Best

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 10,
 'min_samples_leaf': 10,
 'min_samples_split': 2,
 'n_estimators': 300}

In [30]:
# ExtC = ExtraTreesClassifier()


# ## Search grid for optimal parameters
# ex_param_grid = {
#                   "max_depth": [None],
#                   "max_features": [1, 3, 10],
#                   "min_samples_split": [2, 3, 10],
#                   "min_samples_leaf": [1, 3, 10],
#                   "n_estimators" :[100,300]
#                 }


# gsExtC = GridSearchCV(ExtC, param_grid=ex_param_grid, cv=kfold, scoring="accuracy", verbose=1)

# gsExtC.fit(train_x, train_y)

# ExtC_best = gsExtC.best_estimator_

# # Best score
# print(gsExtC.best_score_)
# print(gsExtC.best_params_)

### Model fitting

In [31]:
etc_model = ExtraTreesClassifier(
                                    bootstrap=False,
                                    criterion="gini",
                                    max_depth=None,
                                    max_features=10,
                                    min_samples_leaf=10,
                                    min_samples_split=2,
                                    n_estimators=300
                                )
etc_model.fit(train_x, train_y)
score_model(etc_model)

[ 0.65536977  0.64769921  0.64679466]

Average is ...
0.649954545986


 ---

## Random Forest Classifier

### Parameter Search

Tried

{"max_depth": [None],
  "max_features": [1, 3, 10],
  "min_samples_split": [2, 3, 10],
  "min_samples_leaf": [1, 3, 10],
  "bootstrap": [False],
  "n_estimators" :[100,300],
  "criterion": ["gini"]}

Best

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 10,
 'min_samples_split': 10,
 'n_estimators': 300}

In [32]:
# RFC = RandomForestClassifier()

# ## Search grid for optimal parameters
# rf_param_grid = {"max_depth": [None],
#               "max_features": [1, 3, 10],
#               "min_samples_split": [2, 3, 10],
#               "min_samples_leaf": [1, 3, 10],
#               "bootstrap": [False],
#               "n_estimators" :[100,300],
#               "criterion": ["gini"]}


# gsRFC = GridSearchCV(RFC,param_grid = rf_param_grid, cv=kfold, scoring="accuracy", verbose = 1)

# gsRFC.fit(train_x, train_y)

# RFC_best = gsRFC.best_estimator_

# # Best score
# print(gsRFC.best_score_)
# print(gsRFC.best_params_)

In [33]:
rfc_model = RandomForestClassifier(
                                    bootstrap=False,
                                    criterion="gini",
                                    max_depth=None,
                                    max_features=3,
                                    min_samples_leaf=10,
                                    min_samples_split=10,
                                    n_estimators=300
                                  )
rfc_model.fit(train_x, train_y)
score_model(rfc_model)

[ 0.64728987  0.64320988  0.64264062]

Average is ...
0.644380120913


---

## Gradient Boost Classifier

### Parameter Search

Tried

{'loss' : ["deviance"],
  'n_estimators' : [100,200,300],
  'learning_rate': [0.1, 0.05, 0.01],
  'max_depth': [4, 8],
  'min_samples_leaf': [100,150],
  'max_features': [0.3, 0.1] 
  }

Best

{'learning_rate': 0.05,
 'loss': 'deviance',
 'max_depth': 8,
 'max_features': 0.3,
 'min_samples_leaf': 100,
 'n_estimators': 100}

In [34]:
# GBC = GradientBoostingClassifier()
# gb_param_grid = {'loss' : ["deviance"],
#               'n_estimators' : [100,200,300],
#               'learning_rate': [0.1, 0.05, 0.01],
#               'max_depth': [4, 8],
#               'min_samples_leaf': [100,150],
#               'max_features': [0.3, 0.1] 
#               }

# gsGBC = GridSearchCV(GBC,param_grid = gb_param_grid, cv=kfold, scoring="accuracy", verbose = 1)

# gsGBC.fit(train_x, train_y)

# GBC_best = gsGBC.best_estimator_

# # Best score
# print(gsGBC.best_score_)
# print(gsGBC.best_params_)

In [35]:
gbc_model = GradientBoostingClassifier(
                                          loss = "deviance",
                                          n_estimators = 100,
                                          learning_rate = 0.05,
                                          max_depth = 8,
                                          min_samples_leaf = 100,
                                          max_features = 0.3
                                      )
gbc_model.fit(train_x, train_y)
score_model(gbc_model)

[ 0.66389855  0.66262626  0.66284944]

Average is ...
0.663124753078


---

## SVC

### Parameter Search

WARNING: GridSearchCV takes forever!!  (Like 2 hrs)

tried

{'kernel': ['rbf'], 
  'gamma': [ 0.001, 0.01, 0.1, 1],
  'C': [1, 10, 50, 100,200,300, 1000]}

best

{'C': 200, 'gamma': 0.001, 'kernel': 'rbf'}

In [29]:
# SVMC = SVC(probability=True)
# svc_param_grid = {'kernel': ['rbf'], 
#                   'gamma': [ 0.001, 0.01, 0.1, 1],
#                   'C': [1, 10, 50, 100,200,300, 1000]}

# gsSVMC = GridSearchCV(SVMC,param_grid = svc_param_grid, cv=kfold, scoring="accuracy", verbose = 1)

# gsSVMC.fit(train_x, train_y)

# SVMC_best = gsSVMC.best_estimator_

# # Best score
# print(gsSVMC.best_score_)
# print(gsSVMC.best_params_)

### Model fitting

In [30]:
svc_model = SVC()
svc_model.fit(train_x, train_y)
score_model(svc_model)

[ 0.57468298  0.56700337  0.56977658]

Average is ...
0.570487641106


---

## XGB

### Model fitting

In [36]:
xgb_model = XGBClassifier()
xgb_model.fit(train_x, train_y)
score_model(xgb_model)

[ 0.6539109   0.65173962  0.65173459]

Average is ...
0.652461701941


---

## LGBM

### Model fitting

In [37]:
lgbm_model = LGBMClassifier()
lgbm_model.fit(train_x, train_y)
score_model(lgbm_model)

[ 0.66704074  0.6650954   0.66790165]

Average is ...
0.666679261662


### Ada Boost Classifier

#### Attempt 1:

tried:

{
"n_estimators": [1, 50, 100],
"learning_rate": [0.05, 0.1, 0.2],
"base_estimator\__max_depth": [None, 2, 4],
"base_estimator\__splitter" :   ["best", "random"],
"base_estimator\__criterion" : ["gini", "entropy"]
}

best:

{'base_estimator\__criterion': 'entropy', 'base_estimator\__max_depth': 4, 'base_estimator\__splitter': 'random', 'learning_rate': 0.1, 'n_estimators': 100}

#### Attempt 2:

tried:

{
"n_estimators": [50, 100, 120, 200],
"base_estimator\__max_depth": [3, 4, 6],
}

best:

{'base_estimator\__max_depth': 3, 'n_estimators': 120}


In [34]:
## Search grid for optimal parameters
ada_param_grid = {
                    "n_estimators": [50, 100, 120, 200],
                    "base_estimator__max_depth": [3, 4, 6],
                 }

DTC = DecisionTreeClassifier(random_state=17, splitter="random", criterion="entropy")
ADA = AdaBoostClassifier(base_estimator=DTC, learning_rate=0.08)

gsADA = GridSearchCV(ADA, param_grid=ada_param_grid, cv=kfold, scoring="accuracy", verbose=1)

gsADA.fit(train_x, train_y)

# Best score
print(gsADA.best_score_)
print(gsADA.best_params_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits


[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:  2.2min finished


0.645203531877
{'base_estimator__max_depth': 3, 'n_estimators': 120}


In [None]:
decision_tree = DecisionTreeClassifier(
                                        random_state=17,
                                        max_depth=4,
                                        splitter="random",
                                        criterion="entropy"
                                      )

ada_model = AdaBoostClassifier(
                                base_estimator=decision_tree,
                                learning_rate=0.1,
                                n_estimators=100
                              )

ada_model.fit(train_x, train_y)
score_model(ada_model)

### Averaging all the models

In [35]:
# model = VotingClassifier(estimators=[
#                                         ("ETC", etc_model),                                    
#                                         ("RFC", rfc_model),
#                                         ("GBC",gbc_model),
#                                         ("XGB", xgb_model),
#                                         ("ADA", ada_model),
#                                         ("LGBM", lgbm_model)
#                                       ], voting='hard')

model = gsADA.best_estimator_

#### Fit and score model
model = model.fit(train_x, train_y)
score_model(model)

[ 0.64605544  0.63995511  0.64657011]

Average is ...
0.644193552372


### Predicting

In [36]:
predictions = model.predict(test_x)

### Save predictions to CSV

In [37]:
pd.DataFrame({
    "ID": test.ID.astype(int),
    "Adoption": [1 if pred == 0 else 0 for pred in predictions],
    "Died": [1 if pred == 4 else 0 for pred in predictions],
    "Euthanasia": [1 if pred == 3 else 0 for pred in predictions],
    "Return_to_owner": [1 if pred == 2 else 0 for pred in predictions],
    "Transfer": [1 if pred == 1 else 0 for pred in predictions]
}).to_csv('../../submissions/shelter_ada_2.csv',index=False)