In [1]:
import numpy as np
import pandas as pd
import math
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score


In [7]:
# load dataset
df = pd.read_csv("Data/Opioid_Related_Deaths_2012-2018.csv")
df.head(10)

Unnamed: 0,ID,Date,Age,Sex,Race,ResidenceCity,ResidenceCounty,ResidenceState,DeathCity,DeathCounty,...,InjuryCounty,InjuryState,COD,Heroin,Synthetic Opioids,Prescription Opioids,DeathCityGeo,DeathCityLat,DeathCityLong,Drug_Category
0,14-0273,06/28/2014 12:00:00 AM,,,,,,,,,...,,,"Acute fent, hydrocod, benzodiazepine",False,True,True,"41.575155, -72.738288",41.575155,-72.738288,Synthetic Opioids
1,16-0165,03/13/2016 12:00:00 AM,30.0,Female,White,SANDY HOOK,FAIRFIELD,CT,DANBURY,,...,,,Acute Heroin and Cocaine Intoxication,True,False,False,"41.393666, -73.451539",41.393666,-73.451539,Heroin
2,16-0208,03/31/2016 12:00:00 AM,23.0,Male,White,RYE,WESTCHESTER,NY,GREENWICH,,...,,,Acute Fentanyl and Morphine Intoxication,True,True,False,"41.026526, -73.628549",41.026526,-73.628549,Heroin
3,13-0052,02/13/2013 12:00:00 AM,22.0,Male,Asian,FLUSHING,QUEENS,,GREENWICH,FAIRFIELD,...,,,Fentanyl Intoxication,False,True,False,"41.026526, -73.628549",41.026526,-73.628549,Synthetic Opioids
4,14-0277,06/29/2014 12:00:00 AM,23.0,Male,White,BRISTOL,,,BRISTOL,HARTFORD,...,,,Heroin Intoxication,True,False,False,"41.673037, -72.945791",41.673037,-72.945791,Heroin
5,12-0205,08/12/2012 12:00:00 AM,21.0,Female,White,WEST HARTFORD,HARTFORD,,WEST HARTFORD,HARTFORD,...,,,Heroin Toxicity,True,False,False,"41.762008, -72.741807",41.762008,-72.741807,Heroin
6,12-0107,04/25/2012 12:00:00 AM,40.0,Male,White,EAST HARTFORD,HARTFORD,,EAST HARTFORD,HARTFORD,...,,,Heroin Toxicity,True,False,False,"41.769319, -72.643785",41.769319,-72.643785,Heroin
7,13-0161,05/15/2013 12:00:00 AM,50.0,Male,White,MONTVILLE,NEW LONDON,,Other,NEW LONDON,...,,,Oxycodone Intoxication,False,False,True,"41.45303, -72.136336",41.45303,-72.136336,Prescription Opioids
8,12-0218,08/23/2012 12:00:00 AM,26.0,Female,Hispanic,,,,STRATFORD,FAIRFIELD,...,,,Multiple Drug Toxicity,True,False,False,"41.200888, -73.131323",41.200888,-73.131323,Heroin
9,15-0334,07/05/2015 12:00:00 AM,49.0,Female,White,,,,NEW HAVEN,NEW HAVEN,...,,,Acute intoxication from the combined effects o...,True,False,True,"41.308252, -72.924161",41.308252,-72.924161,Heroin


In [12]:
# build X and y matrices
X = df.drop(['whether he/she donated blood in March 2007'], axis=1)
y = df[['whether he/she donated blood in March 2007']].values.reshape(-1)


In [13]:
# split to training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1, stratify=y)


Recency (months)                              0
Frequency (times)                             0
Monetary (c.c. blood)                         0
Time (months)                                 0
whether he/she donated blood in March 2007    0
dtype: int64

In [14]:
# call TPOT and wait
tpot_clf = TPOTClassifier(generations=5, population_size=50, verbosity=2, n_jobs=-1,
                          max_time_mins=2, scoring='f1')
tpot_clf.fit(X_train, y_train)

In [15]:
# evaluate result
y_hat_test = tpot_clf.predict(X_test)
print(f'F1: {f1_score(y_test, y_hat_test)}')
print(f'Acc: {accuracy_score(y_test, y_hat_test)}')

Training result: f1: 0.201, acc: 0.774
Test result: f1: 0.186, acc: 0.767





In [17]:
# export model into python code
tpot_clf.export('tpot_model.py')


Training result: f1: 0.528, acc: 0.829
Test result: f1: 0.516, acc: 0.800





Training result: f1: 0.803, acc: 0.918
Test result: f1: 0.345, acc: 0.747

Training result: f1: 0.589, acc: 0.846
Test result: f1: 0.467, acc: 0.787



Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.4s


Training result: f1: 0.575, acc: 0.839
Test result: f1: 0.484, acc: 0.787



[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   17.6s finished
