In [1]:
# necesary imports

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# open data sheet
data = pd.read_csv('../data/reworked1.csv')
data = data.drop(['Unnamed: 0'], axis=1)
y_cols = ['foot', 'lower_leg', 'upper_leg', 'upper_body', 'arms', 'head', 'minor', 'injury']
y_data = data[y_cols]
x_data = data.drop(y_cols, axis=1)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=3)

In [3]:
# testing cell really, to make sure sizes worked
print(data.info())
print(x_data.info())
input_size = len(x_data.columns)
# print(input_size)
cols = list(x_data.columns)
phys_cols = cols[:16]
inj_hist = cols[16:]
len(phys_cols), len(inj_hist)
entries_per_injury = 11
# y_data.columns
# inj_hist

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19980 entries, 0 to 19979
Columns: 178 entries, Age to injury
dtypes: float64(178)
memory usage: 27.1 MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19980 entries, 0 to 19979
Columns: 170 entries, Age to isVeryShort.13
dtypes: float64(170)
memory usage: 25.9 MB
None


In [121]:
# logistic classifier
def logistic_classifier(x_train, y_train, x_test, y_test, params={'max_iter':1000}, verbose=False):
    clf = {}
    stats = {}
    for label in y_cols:
        y_train_label = y_train[label]
        y_test_label = y_test[label]
        clf[label] = LogisticRegression(max_iter=params['max_iter'], solver='liblinear').fit(x_train, y_train_label)
        label_predictions = clf[label].predict(x_test)
        label_score = accuracy_score(y_test_label, label_predictions)
        label_f1_score = f1_score(y_test_label, label_predictions)
        label_auc = roc_auc_score(y_test_label, label_predictions)
        stats[label] = {'accuracy': label_score, 'f1': label_f1_score, 'auc': label_auc}
    
    if verbose:
        print("---------- Logistic Regression Model ----------")
        for label in y_cols:
            print("{:12s} \t| accuracy: {:10f}\tf1: {:10f}\t auc: {:10f}".format(label, stats[label]['accuracy'], stats[label]['f1'], stats[label]['auc']))
        
    return clf, stats

In [120]:
# knn classifier
def knn_classifier(x_train, y_train, x_test, y_test, params={'n_neighbors':7}, verbose=False):
    clf = {}
    stats = {}
    for label in y_cols:
        y_train_label = y_train[label]
        y_test_label = y_test[label]
        clf[label] = KNeighborsClassifier(n_neighbors=params['n_neighbors']).fit(x_train, y_train_label)
        label_predictions = clf[label].predict(x_test)
        label_score = accuracy_score(y_test_label, label_predictions)
        label_f1_score = f1_score(y_test_label, label_predictions)
        label_auc = roc_auc_score(y_test_label, label_predictions)
        stats[label] = {'accuracy': label_score, 'f1': label_f1_score, 'auc': label_auc}
    
    if verbose:
        print("---------- KNN Model ----------")
        for label in y_cols:
            print("{:12s} \t| accuracy: {:10f}\tf1: {:10f}\t auc: {:10f}".format(label, stats[label]['accuracy'], stats[label]['f1'], stats[label]['auc']))
        
    return clf, stats

In [119]:
# random forest classifier
def random_forest_classifier(x_train, y_train, x_test, y_test, params={'n_estimators':15}, verbose=False):
    clf = {}
    stats = {}
    for label in y_cols:
        y_train_label = y_train[label]
        y_test_label = y_test[label]
        clf[label] = RandomForestClassifier(n_estimators=params['n_estimators'], n_jobs=12).fit(x_train, y_train_label)
        label_predictions = clf[label].predict(x_test)
        label_score = accuracy_score(y_test_label, label_predictions)
        label_f1_score = f1_score(y_test_label, label_predictions)
        label_auc = roc_auc_score(y_test_label, label_predictions)
        stats[label] = {'accuracy': label_score, 'f1': label_f1_score, 'auc': label_auc}
    
    if verbose:
        print("---------- Random Forest Model ----------")
        for label in y_cols:
            print("{:12s} \t| accuracy: {:10f}\tf1: {:10f}\t auc: {:10f}".format(label, stats[label]['accuracy'], stats[label]['f1'], stats[label]['auc']))
        
    return clf, stats

In [7]:
# tried to make a custom nn classifier with a lstm DIDNT WORK
def nn_classifier(verbose=True):
    # input layers
    phys_traits_input = keras.Input(
        shape=(len(phys_cols),), name='physical_input'
    )
    inj_input = keras.Input(
        shape=(len(inj_hist),1,), name='inj_hist_input'
    )
    
    # lstm
    phys_features = layers.LSTM(11, return_sequences=False)(inj_input)
    
    # merge into 1 combined layer
    x = layers.concatenate([phys_traits_input, phys_features])
    
    # a few fully connected layers
    seq_model = keras.Sequential(
        [
            layers.Dense(64, activation='relu'),
            layers.Dense(32, activation='relu')
        ]
    )(x)
    
    # a seperate 1 node layer for each output
    
    foot_layer = layers.Dense(1, activation='softmax')(seq_model)
    lower_leg_layer = layers.Dense(1, activation='softmax')(seq_model)
    upper_leg_layer = layers.Dense(1, activation='softmax')(seq_model)
    upper_body_layer = layers.Dense(1, activation='softmax')(seq_model)
    arms_layer = layers.Dense(1, activation='softmax')(seq_model)
    head_layer = layers.Dense(1, activation='softmax')(seq_model)
    minor_layer = layers.Dense(1, activation='softmax')(seq_model)
    injury_layer = layers.Dense(1, activation='softmax')(seq_model)
    
    
    # create model
    model = keras.Model(
        inputs=[phys_traits_input, inj_input],
        outputs=[foot_layer, lower_leg_layer, upper_leg_layer, upper_body_layer, arms_layer, head_layer, minor_layer, injury_layer]
    )
    
    model.compile(
        optimizer='adam',
        loss='BinaryCrossentropy',
        metrics=['accuracy']
    )
    
    phys_data = x_train[phys_cols]
    inj_data = x_train[inj_hist[::-1]]
    print(y_train)
    model.fit([phys_data, inj_data], [y_train.to_numpy()[:]], epochs=1)
    
    phys_data_test = x_test[phys_cols]
    inj_data_test = x_test[inj_hist[::-1]]
    predictions = model.predict([phys_data_test, inj_data_test])
    
    print(perdictions)
#         label_score = accuracy_score(y_test_label, label_predictions)
#         label_f1_score = f1_score(y_test_label, label_predictions)
#         label_auc = roc_auc_score(y_test_label, label_predictions)
#         stats[label] = {'accuracy': label_score, 'f1': label_f1_score, 'auc': label_auc}

#     if verbose:
#         print("---------- Custom Neural Network Model ----------")
#         for label in y_cols:
#             print("{:12s} \t| accuracy: {:10f}\tf1: {:10f}\t auc: {:10f}".format(label))
    
nn_classifier()                                                                                

       foot  lower_leg  upper_leg  upper_body  arms  head  minor  injury
17904   0.0        1.0        1.0         0.0   1.0   0.0    0.0     1.0
17395   0.0        0.0        0.0         0.0   0.0   0.0    0.0     0.0
15507   0.0        0.0        0.0         0.0   0.0   0.0    0.0     0.0
3591    0.0        0.0        1.0         0.0   0.0   1.0    1.0     1.0
6368    1.0        0.0        0.0         1.0   0.0   0.0    1.0     1.0
...     ...        ...        ...         ...   ...   ...    ...     ...
6400    0.0        0.0        0.0         0.0   0.0   0.0    0.0     0.0
15288   0.0        0.0        1.0         1.0   0.0   1.0    1.0     1.0
11513   0.0        0.0        0.0         0.0   0.0   0.0    0.0     0.0
1688    1.0        0.0        1.0         1.0   0.0   1.0    0.0     1.0
5994    0.0        0.0        0.0         0.0   0.0   0.0    0.0     0.0

[14985 rows x 8 columns]


ValueError: in user code:

    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\engine\training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\engine\training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\engine\training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\engine\training.py", line 810, in train_step
        y, y_pred, sample_weight, regularization_losses=self.losses)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\losses.py", line 1807, in binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    File "C:\Users\rvtay\miniconda3\envs\dvahw4\lib\site-packages\keras\backend.py", line 5158, in binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)

    ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs (None, 8)).


In [9]:
rf_clf, rf_stats = random_forest_classifier(x_train, y_train, x_test, y_test, verbose=True)
lr_clf, lr_stats = logistic_classifier(x_train, y_train, x_test, y_test, verbose=True)
knn_clf, knn_stats = knn_classifier(x_train, y_train, x_test, y_test, verbose=True)

---------- Random Forest Model ----------
foot         	| accuracy:   0.891091	f1:   0.821288	 auc:   0.869607
lower_leg    	| accuracy:   0.920721	f1:   0.814607	 auc:   0.880801
upper_leg    	| accuracy:   0.909710	f1:   0.869010	 auc:   0.898191
upper_body   	| accuracy:   0.922923	f1:   0.883014	 auc:   0.911578
arms         	| accuracy:   0.911311	f1:   0.824832	 auc:   0.883635
head         	| accuracy:   0.960160	f1:   0.787166	 auc:   0.882228
minor        	| accuracy:   0.932132	f1:   0.928466	 auc:   0.931829
injury       	| accuracy:   0.924925	f1:   0.939330	 auc:   0.923091
---------- Logistic Regression Model ----------
foot         	| accuracy:   0.915516	f1:   0.862182	 auc:   0.899910
lower_leg    	| accuracy:   0.924525	f1:   0.811594	 auc:   0.863590
upper_leg    	| accuracy:   0.951151	f1:   0.930995	 auc:   0.950073
upper_body   	| accuracy:   0.939940	f1:   0.911609	 auc:   0.938629
arms         	| accuracy:   0.915716	f1:   0.823184	 auc:   0.870013
head         

In [82]:
# a method we can call that will use the dictionary structure above and output probabilities
def classifier_predict(clf, x_data):
    return clf.predict_proba(x_data)[:,1]

# only predicts one label, useful for tabpy
# x_data should be one row
# def classifier_predict_field(clf, field, x_data):
#     x_data = x_data.reshape((1,-1))
#     label_classifier = clf[field]
#     pred = label_classifier.predict_proba(x_data)[0,1]
#     return pred

In [101]:
from joblib import dump, load
import os
def save_classifier(clf, filepath):
    if not os.path.isdir(filepath):
        os.mkdir(filepath)
    for k in clf.keys():
        new_path = os.path.join(filepath, k + ".joblib")
        dump(clf[k], new_path)
    
def load_classifier(filepath):
    clf = load(filepath)
    return clf

def load_label_classifier(filepath, label):
    full_path = os.path.join(filepath, label + ".joblib")
    return load(full_path)

In [12]:
save_classifier(lr_clf, "..\\models\\lr.joblib")

In [40]:
lr2_clf = load_classifier("..\\models\\lr.joblib")
print(classifier_predict(lr2_clf, x_test))
row = x_test.iloc[0]
print(classifier_predict_field(lr2_clf, 'foot', row))

          foot  lower_leg  upper_leg  upper_body      arms          head  \
0     0.994831   0.013657   0.944021    0.955111  0.892417  1.961292e-03   
1     0.435758   0.497008   0.514070    0.530234  0.353820  7.408421e-02   
2     0.019451   0.029781   0.009835    0.003557  0.037421  6.039563e-03   
3     0.992493   0.000114   0.006709    0.996847  0.007090  1.494897e-03   
4     0.000112   0.000123   0.000031    0.000004  0.000176  1.301877e-04   
...        ...        ...        ...         ...       ...           ...   
4990  0.934162   0.006899   0.979093    0.978625  0.005681  1.501599e-07   
4991  0.000053   0.003427   0.940232    0.005521  0.010435  1.170729e-06   
4992  0.330864   0.228490   0.997374    0.768057  0.688141  1.443452e-01   
4993  0.001463   0.000013   0.003532    0.023448  0.000056  2.129177e-08   
4994  0.363903   0.241510   0.420040    0.155238  0.427890  1.243741e-01   

         minor    injury  
0     0.958936  0.940679  
1     0.647787  0.745509  
2     

In [14]:
# trying with no injury history
data2 = pd.read_csv('../data/data_nohist.csv')
y_cols = ['foot', 'lower_leg', 'upper_leg', 'upper_body', 'arms', 'head', 'minor', 'injury']
y_data2 = data2[y_cols]
x_data2 = data2.drop(y_cols, axis=1)
x_train2, x_test2, y_train2, y_test2 = train_test_split(x_data2, y_data2, test_size=0.25, random_state=3)

In [17]:
rf_clf, rf_stats = random_forest_classifier(x_train2, y_train2, x_test2, y_test2, verbose=True)
lr_clf, lr_stats = logistic_classifier(x_train2, y_train2, x_test2, y_test2, verbose=True)
knn_clf, knn_stats = knn_classifier(x_train2, y_train2, x_test2, y_test2, verbose=True)

---------- Random Forest Model ----------
foot         	| accuracy:   0.480480	f1:   0.507123	 auc:   0.479409
lower_leg    	| accuracy:   0.441441	f1:   0.388158	 auc:   0.437122
upper_leg    	| accuracy:   0.552553	f1:   0.639225	 auc:   0.525123
upper_body   	| accuracy:   0.543544	f1:   0.597884	 auc:   0.535016
arms         	| accuracy:   0.441441	f1:   0.384106	 auc:   0.437713
head         	| accuracy:   0.723724	f1:   0.041667	 auc:   0.435325
minor        	| accuracy:   0.633634	f1:   0.755020	 auc:   0.514554
injury       	| accuracy:   0.729730	f1:   0.835766	 auc:   0.540024
---------- Logistic Regression Model ----------
foot         	| accuracy:   0.552553	f1:   0.620865	 auc:   0.540964
lower_leg    	| accuracy:   0.510511	f1:   0.248848	 auc:   0.482261
upper_leg    	| accuracy:   0.609610	f1:   0.757463	 auc:   0.500000
upper_body   	| accuracy:   0.555556	f1:   0.702811	 auc:   0.502161
arms         	| accuracy:   0.447447	f1:   0.386667	 auc:   0.442894
head         

In [27]:
# trying with no injury history
data3 = pd.read_csv('../data/final_dataset1.csv')
data3 = data3.fillna(0)
y_cols = ['foot', 'lower_leg', 'upper_leg', 'upper_body', 'arms', 'head', 'minor']
y_data3 = data3[y_cols]
x_data3 = data3.drop(y_cols, axis=1)
x_train3, x_test3, y_train3, y_test3 = train_test_split(x_data3, y_data3, test_size=0.25, random_state=3)

In [28]:
rf_clf, rf_stats = random_forest_classifier(x_train3, y_train3, x_test3, y_test3, verbose=True)
lr_clf, lr_stats = logistic_classifier(x_train3, y_train3, x_test3, y_test3, verbose=True)
knn_clf, knn_stats = knn_classifier(x_train3, y_train3, x_test3, y_test3, verbose=True)

---------- Random Forest Model ----------
foot         	| accuracy:   0.820189	f1:   0.095238	 auc:   0.503862
lower_leg    	| accuracy:   0.864353	f1:   0.085106	 auc:   0.505753
upper_leg    	| accuracy:   0.785489	f1:   0.209302	 auc:   0.544539
upper_body   	| accuracy:   0.788644	f1:   0.151899	 auc:   0.518338
arms         	| accuracy:   0.744479	f1:   0.164948	 auc:   0.509784
head         	| accuracy:   0.971609	f1:   0.000000	 auc:   0.493590
minor        	| accuracy:   0.615142	f1:   0.460177	 auc:   0.580579
---------- Logistic Regression Model ----------
foot         	| accuracy:   0.835962	f1:   0.000000	 auc:   0.483577
lower_leg    	| accuracy:   0.908517	f1:   0.064516	 auc:   0.511438
upper_leg    	| accuracy:   0.826498	f1:   0.035088	 auc:   0.485021
upper_body   	| accuracy:   0.876972	f1:   0.048780	 auc:   0.505420
arms         	| accuracy:   0.839117	f1:   0.105263	 auc:   0.513896
head         	| accuracy:   0.984227	f1:   0.000000	 auc:   0.500000
minor        

In [95]:
# trying my new strat (counts of injuries in the history)
data4 = pd.read_csv('../data/new_strat.csv')
data4 = data4.drop(columns=['Unnamed: 0'])
y_cols = ['foot', 'lower_leg', 'upper_leg', 'upper_body', 'arms', 'head', 'minor', 'injury']
y_data4 = data4[y_cols]
x_data4 = data4.drop(y_cols, axis=1)
x_train4, x_test4, y_train4, y_test4 = train_test_split(x_data4, y_data4, test_size=0.25, random_state=3)


In [107]:
rf_clf, rf_stats = random_forest_classifier(x_train4, y_train4, x_test4, y_test4, verbose=True)
lr_clf, lr_stats = logistic_classifier(x_train4, y_train4, x_test4, y_test4, verbose=True)
knn_clf, knn_stats = knn_classifier(x_train4, y_train4, x_test4, y_test4, verbose=True)

---------- Random Forest Model ----------
foot         	| accuracy:   0.963964	f1:   0.940984	 auc:   0.955703
lower_leg    	| accuracy:   0.968969	f1:   0.926505	 auc:   0.947744
upper_leg    	| accuracy:   0.967167	f1:   0.952874	 auc:   0.963965
upper_body   	| accuracy:   0.969369	f1:   0.953510	 auc:   0.963988
arms         	| accuracy:   0.968569	f1:   0.936257	 auc:   0.950846
head         	| accuracy:   0.985185	f1:   0.918142	 auc:   0.941057
minor        	| accuracy:   0.876877	f1:   0.871473	 auc:   0.876890
injury       	| accuracy:   0.949149	f1:   0.958726	 auc:   0.950356
---------- Logistic Regression Model ----------
foot         	| accuracy:   0.730330	f1:   0.382393	 auc:   0.602785
lower_leg    	| accuracy:   0.788789	f1:   0.057194	 auc:   0.513127
upper_leg    	| accuracy:   0.651652	f1:   0.153696	 auc:   0.521012
upper_body   	| accuracy:   0.726126	f1:   0.469356	 auc:   0.635175
arms         	| accuracy:   0.744745	f1:   0.061810	 auc:   0.509036
head         

In [108]:
save_classifier(rf_clf, "../models/numbered_history_rf")

In [116]:
my_clf = load_label_classifier("../models/numbered_history_rf", 'injury')
print(my_clf)
print(x_test4.iloc[0].to_numpy().reshape((1,-1)), x_test4.iloc[0].to_numpy().reshape((1,-1)).dtype)
out = classifier_predict(my_clf, x_test4.iloc[0].to_numpy().reshape((1,-1)))
print(out)
# out = classifier_predict_field(my_clf, 'foot', x_test4.iloc[0].to_numpy().reshape((1,-1)))
# print(out)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=31, n_jobs=12,
                       oob_score=False, random_state=None, verbose=0,
                       warm_start=False)
[[ 28. 169.  63.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   1.   0.]] float64
[1.]


In [130]:
def my_cv(classifier_fn, params, data):
    for p in params:
        clf, stats = classifier_fn(data['xtrain'], data['ytrain'], data['xtest'], data['ytest'], params=p, verbose=False)
        mean_acc = sum([stats[k]['accuracy'] for k in stats.keys()])/len(stats.keys())
        mean_f1 = sum([stats[k]['f1'] for k in stats.keys()])/len(stats.keys())
        mean_auc = sum([stats[k]['auc'] for k in stats.keys()])/len(stats.keys())
        print("params: {}\tacc: {}\tf1: {}\tauc: {}".format(p, mean_acc, mean_f1, mean_auc))
 

params: {'n_estimators': 1}	acc: 0.9301801801801802	f1: 0.8764532385323078	auc: 0.9040787081449783
params: {'n_estimators': 11}	acc: 0.9536786786786788	f1: 0.9285408526657065	auc: 0.9404172508970666
params: {'n_estimators': 21}	acc: 0.9552802802802802	f1: 0.9313156781005593	auc: 0.9422824732146599
params: {'n_estimators': 31}	acc: 0.9555055055055056	f1: 0.9312417330619667	auc: 0.9420295523105695
params: {'n_estimators': 41}	acc: 0.9564064064064064	f1: 0.9329490615422648	auc: 0.9432750425266119
params: {'n_estimators': 51}	acc: 0.955880880880881	f1: 0.9319295844460371	auc: 0.9424753336797849
params: {'n_estimators': 61}	acc: 0.956006006006006	f1: 0.9320339352504233	auc: 0.9430879415146577
params: {'n_estimators': 71}	acc: 0.9555555555555555	f1: 0.9311540412770823	auc: 0.9421461868319518
params: {'n_estimators': 81}	acc: 0.9567067067067067	f1: 0.9329686761296578	auc: 0.9438150453723774
params: {'n_estimators': 91}	acc: 0.9561811811811811	f1: 0.9332197348176179	auc: 0.9433153797631697


In [131]:
print("Random Forest")
param = [{'n_estimators': e} for e in range(1,101,10)]
my_cv(random_forest_classifier, param, {'xtrain':x_train4, 'ytrain':y_train4, 'xtest':x_test4, 'ytest':y_test4})

Random Forest
params: {'n_estimators': 1}	acc: 0.929004004004004	f1: 0.8809527175647099	auc: 0.9065776734898292
params: {'n_estimators': 11}	acc: 0.9525775775775776	f1: 0.924895296391141	auc: 0.9380161974929353
params: {'n_estimators': 21}	acc: 0.9548798798798799	f1: 0.9301393127351308	auc: 0.941957275587916
params: {'n_estimators': 31}	acc: 0.9556056056056056	f1: 0.9310979357970022	auc: 0.941958103661168
params: {'n_estimators': 41}	acc: 0.9554054054054053	f1: 0.9315646150370753	auc: 0.9424638077537845
params: {'n_estimators': 51}	acc: 0.9558808808808809	f1: 0.9324792108744793	auc: 0.9429778057264926
params: {'n_estimators': 61}	acc: 0.956006006006006	f1: 0.9319641856206518	auc: 0.9427750388872451
params: {'n_estimators': 71}	acc: 0.9566816816816817	f1: 0.9335141041888567	auc: 0.9439795909420018
params: {'n_estimators': 81}	acc: 0.9568568568568568	f1: 0.9347732434219717	auc: 0.9444343160996009
params: {'n_estimators': 91}	acc: 0.9558558558558559	f1: 0.9324922479271126	auc: 0.942741311

In [133]:
print("Logistic Regression")
param = [{'max_iter': e} for e in range(500,5001,500)]
my_cv(logistic_classifier, param, {'xtrain':x_train4, 'ytrain':y_train4, 'xtest':x_test4, 'ytest':y_test4})

Logistic Regression
params: {'max_iter': 500}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 1000}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 1500}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 2000}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 2500}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 3000}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 3500}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 4000}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 4500}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201
params: {'max_iter': 5000}	acc: 0.737012012012012	f1: 0.3592971440984902	auc: 0.5813029785489201


In [134]:
print("Knn")
param = [{'n_neighbors': e} for e in range(1,31,5)]
my_cv(knn_classifier, param, {'xtrain':x_train4, 'ytrain':y_train4, 'xtest':x_test4, 'ytest':y_test4})

Knn
params: {'n_neighbors': 1}	acc: 0.9493243243243242	f1: 0.9281352658629635	auc: 0.9374494315796731
params: {'n_neighbors': 6}	acc: 0.9023773773773774	f1: 0.8271193233913913	auc: 0.8580776632388594
params: {'n_neighbors': 11}	acc: 0.8668418418418419	f1: 0.759693202630126	auc: 0.8118395882811436
params: {'n_neighbors': 16}	acc: 0.8356606606606607	f1: 0.6704916013048536	auc: 0.7495289958118946
params: {'n_neighbors': 21}	acc: 0.8071821821821822	f1: 0.6178324333379189	auc: 0.7131673058809669
params: {'n_neighbors': 26}	acc: 0.7918418418418418	f1: 0.5595632124229357	auc: 0.6799586457287361
