# Kaggle Dataset: https://www.kaggle.com/mlg-ulb/creditcardfraud 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')

In [2]:
import time
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split,GridSearchCV,RepeatedStratifiedKFold,cross_val_score
from sklearn.metrics import classification_report,confusion_matrix,precision_recall_curve,auc,average_precision_score
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.pipeline import Pipeline as skPipeline
from sklearn.linear_model import SGDClassifier
from imblearn.over_sampling import RandomOverSampler,SMOTE,KMeansSMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as imPipeline

In [3]:
df = pd.read_csv('data/creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
counts = df['Class'].value_counts
print(f'Absolute class count\n{counts()}\n')
print(f'Class proportion\n{counts(normalize=True)}\n')

Absolute class count
0    284315
1       492
Name: Class, dtype: int64

Class proportion
0    0.998273
1    0.001727
Name: Class, dtype: float64



In [5]:
#(clf, X, y_true)
def auc_score(model,X_test,y_test):
    y_test_pred = model.predict_proba(X_test)
    precision, recall, thresholds = precision_recall_curve(y_test, y_test_pred[:,1])
    return auc(recall, precision)

In [6]:
#(clf, X, y_true)
def auc_score_2(model,X_test,y_test):
    y_test_pred = model.decision_function(X_test)
    avg =  average_precision_score(y_test, y_test_pred)
    return avg

# Original Data

In [4]:
X = df.drop(['Time','Class'],axis=1)
y = df['Class']
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.33,random_state=42)

In [131]:
t0 = time.time()

scaler = StandardScaler()
model = SGDClassifier(n_jobs=3, random_state=42,fit_intercept=False)
pipe = skPipeline([('scaler',scaler),
                    ('model',model)
])

parameters = ['model__loss','model__alpha','model__tol']
values = [['log', 'modified_huber'],
         [0.00001,0.0001,0.001,0.01,0.1,1],
         [1e-5,1e-4,1e-3,1e-2,1e-1]]
dict_parameters = dict(zip(parameters,values))


grid_search = GridSearchCV(pipe, param_grid = dict_parameters, scoring = auc_score, cv = 5, verbose=1,n_jobs=3)
grid_search.fit(X_train, y_train)

values2 = values.copy()
values2[0] = ['hinge','squared_hinge','perceptron']
dict_parameters2 = dict(zip(parameters,values2))
grid_search2 = GridSearchCV(pipe, param_grid = dict_parameters2, scoring = auc_score_2, cv = 5, verbose=1,n_jobs=3)
grid_search2.fit(X_train, y_train)

print('total time taken:',time.time()-t0)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   31.8s
[Parallel(n_jobs=3)]: Done 194 tasks      | elapsed:  1.3min
[Parallel(n_jobs=3)]: Done 300 out of 300 | elapsed:  1.5min finished


Fitting 5 folds for each of 90 candidates, totalling 450 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   25.3s
[Parallel(n_jobs=3)]: Done 194 tasks      | elapsed:  2.6min
[Parallel(n_jobs=3)]: Done 444 tasks      | elapsed: 11.6min
[Parallel(n_jobs=3)]: Done 450 out of 450 | elapsed: 11.8min finished


total time taken: 801.143303155899


In [132]:
print('best_params=',grid_search.best_params_)
print('best_score=',grid_search.best_score_)
print('best_params=',grid_search2.best_params_)
print('best_score=',grid_search2.best_score_)

best_params= {'model__alpha': 0.1, 'model__loss': 'modified_huber', 'model__tol': 1e-05}
best_score= 0.7684675247886243
best_params= {'model__alpha': 1, 'model__loss': 'hinge', 'model__tol': 1e-05}
best_score= 0.7413171360967379


In [133]:
grid_df=pd.DataFrame(grid_search.cv_results_)
grid_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__alpha,param_model__loss,param_model__tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,2.962408,0.733873,0.039324,0.018972,1e-05,log,1e-05,"{'model__alpha': 1e-05, 'model__loss': 'log', ...",0.723162,0.689621,0.752917,0.74053,0.781425,0.737531,0.030575,26
1,2.889008,0.599454,0.017092,0.002301,1e-05,log,0.0001,"{'model__alpha': 1e-05, 'model__loss': 'log', ...",0.723162,0.689621,0.752917,0.744996,0.781425,0.738424,0.030715,13
2,1.659236,0.364532,0.014532,0.000741,1e-05,log,0.001,"{'model__alpha': 1e-05, 'model__loss': 'log', ...",0.71156,0.690196,0.755587,0.740775,0.779577,0.735539,0.031622,32
3,0.918824,0.07309,0.015797,0.001585,1e-05,log,0.01,"{'model__alpha': 1e-05, 'model__loss': 'log', ...",0.713367,0.685543,0.772272,0.735826,0.779473,0.737296,0.03537,27
4,0.562186,0.006511,0.014978,0.001162,1e-05,log,0.1,"{'model__alpha': 1e-05, 'model__loss': 'log', ...",0.711159,0.682976,0.753253,0.740611,0.783943,0.734388,0.034736,39


In [135]:
grid_df[grid_df['mean_test_score'] >=0.75][['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
45,"{'model__alpha': 0.1, 'model__loss': 'modified...",0.768468
46,"{'model__alpha': 0.1, 'model__loss': 'modified...",0.762536
47,"{'model__alpha': 0.1, 'model__loss': 'modified...",0.762184
48,"{'model__alpha': 0.1, 'model__loss': 'modified...",0.760828
49,"{'model__alpha': 0.1, 'model__loss': 'modified...",0.750005


In [136]:
grid2_df=pd.DataFrame(grid_search2.cv_results_)
grid2_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__alpha,param_model__loss,param_model__tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,2.562335,0.530736,0.014956,0.000151,1e-05,hinge,1e-05,"{'model__alpha': 1e-05, 'model__loss': 'hinge'...",0.69163,0.624965,0.740981,0.705827,0.720854,0.696851,0.039495,48
1,2.296561,0.295696,0.014884,0.000124,1e-05,hinge,0.0001,"{'model__alpha': 1e-05, 'model__loss': 'hinge'...",0.685863,0.624965,0.740981,0.701796,0.720854,0.694892,0.039541,55
2,1.698176,0.235762,0.015186,0.000453,1e-05,hinge,0.001,"{'model__alpha': 1e-05, 'model__loss': 'hinge'...",0.698158,0.626006,0.734987,0.708829,0.719252,0.697446,0.037732,46
3,0.764775,0.018324,0.016168,0.00115,1e-05,hinge,0.01,"{'model__alpha': 1e-05, 'model__loss': 'hinge'...",0.689997,0.625271,0.737056,0.703352,0.720088,0.695153,0.038353,54
4,0.44451,0.009891,0.015397,0.00174,1e-05,hinge,0.1,"{'model__alpha': 1e-05, 'model__loss': 'hinge'...",0.689057,0.644712,0.744664,0.703886,0.72482,0.701428,0.034036,26


In [139]:
grid2_df[grid2_df['mean_test_score'] >=0.74][['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
60,"{'model__alpha': 0.1, 'model__loss': 'hinge', ...",0.740048
75,"{'model__alpha': 1, 'model__loss': 'hinge', 'm...",0.741317
76,"{'model__alpha': 1, 'model__loss': 'hinge', 'm...",0.741295
77,"{'model__alpha': 1, 'model__loss': 'hinge', 'm...",0.741295
78,"{'model__alpha': 1, 'model__loss': 'hinge', 'm...",0.741295
79,"{'model__alpha': 1, 'model__loss': 'hinge', 'm...",0.741295


# Resampling

## Over for SGDC

In [148]:
loss_parameters = ['log', 'modified_huber','hinge','squared_hinge','perceptron']
scoring_parameters = [auc_score] *2 + [auc_score_2] *3
strategy_parameters = np.linspace(0.3,1.,8)

t0=time.time()
for loss,scoring in zip(loss_parameters,scoring_parameters):
    t1=time.time()
    print('----'*8+f'loss={loss}'+'----'*8)
    
    for strategy in strategy_parameters:
        print('strategy =',strategy)
        scaler = StandardScaler()
        over = RandomOverSampler(strategy,random_state=42)
        model = SGDClassifier(loss=loss,n_jobs=3, random_state=42,fit_intercept=False)
        pipe = imPipeline([('scaler',scaler),
                           ('over',over),
                        ('model',model)])

        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=42)
        scores = cross_val_score(pipe, X_train, y_train, scoring=scoring, cv=cv, n_jobs=3)
        score = np.mean(scores)
        print(f'Score = {score:.5f} \n')
        
    print('time taken:',time.time() - t1)
        
print('total time taken:',time.time()-t0)

--------------------------------loss=log--------------------------------
strategy = 0.3
Score = 0.76891 

strategy = 0.39999999999999997
Score = 0.76987 

strategy = 0.5
Score = 0.77297 

strategy = 0.6
Score = 0.77148 

strategy = 0.7
Score = 0.77504 

strategy = 0.7999999999999999
Score = 0.77396 

strategy = 0.8999999999999999
Score = 0.77428 

strategy = 1.0
Score = 0.77460 

time taken: 119.4126307964325
--------------------------------loss=modified_huber--------------------------------
strategy = 0.3
Score = 0.67052 

strategy = 0.39999999999999997
Score = 0.65343 

strategy = 0.5
Score = 0.64480 

strategy = 0.6
Score = 0.62657 

strategy = 0.7
Score = 0.62310 

strategy = 0.7999999999999999
Score = 0.61376 

strategy = 0.8999999999999999
Score = 0.59960 

strategy = 1.0
Score = 0.58913 

time taken: 217.7218942642212
--------------------------------loss=hinge--------------------------------
strategy = 0.3
Score = 0.71870 

strategy = 0.39999999999999997
Score = 0.72415 

strate

## Under for SGDC

In [158]:
loss_parameters = ['log', 'modified_huber','hinge','squared_hinge','perceptron']
scoring_parameters = [auc_score] *2 + [auc_score_2] *3
strategy_parameters = np.linspace(0.3,1.,8)

t0=time.time()
for loss,scoring in zip(loss_parameters,scoring_parameters):
    t1=time.time()
    print('----'*8+f'loss={loss}'+'----'*8)
    
    for strategy in strategy_parameters:
        print('strategy =',strategy)
        scaler = StandardScaler()
        under = RandomUnderSampler(strategy,random_state=42)
        model = SGDClassifier(loss=loss,n_jobs=3, random_state=42,fit_intercept=False)
        pipe = imPipeline([('scaler',scaler),
                           ('over',under),
                        ('model',model)])

        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=42)
        scores = cross_val_score(pipe, X_train, y_train, scoring=scoring, cv=cv, n_jobs=3)
        score = np.mean(scores)
        print(f'Score = {score:.5f} \n')
        
    print('time taken:',time.time() - t1)
        
print('\ntotal time taken:',time.time()-t0)

--------------------------------loss=log--------------------------------
strategy = 0.3
Score = 0.66440 

strategy = 0.39999999999999997
Score = 0.64825 

strategy = 0.5
Score = 0.61886 

strategy = 0.6
Score = 0.58150 

strategy = 0.7
Score = 0.55335 

strategy = 0.7999999999999999
Score = 0.55124 

strategy = 0.8999999999999999
Score = 0.52618 

strategy = 1.0
Score = 0.52316 

time taken: 22.402328968048096
--------------------------------loss=modified_huber--------------------------------
strategy = 0.3
Score = 0.44597 

strategy = 0.39999999999999997
Score = 0.46043 

strategy = 0.5
Score = 0.46685 

strategy = 0.6
Score = 0.46754 

strategy = 0.7
Score = 0.46936 

strategy = 0.7999999999999999
Score = 0.47247 

strategy = 0.8999999999999999
Score = 0.47135 

strategy = 1.0
Score = 0.47658 

time taken: 21.572594165802002
--------------------------------loss=hinge--------------------------------
strategy = 0.3
Score = 0.70537 

strategy = 0.39999999999999997
Score = 0.70902 

stra

## Over with SMOTE to SGDC

In [151]:
loss_parameters = ['log', 'modified_huber','hinge','squared_hinge','perceptron']
scoring_parameters = [auc_score] *2 + [auc_score_2] *3
strategy_parameters = np.linspace(0.1,1.,10)

t0=time.time()
for loss,scoring in zip(loss_parameters,scoring_parameters):
    t1=time.time()
    print('----'*8+f'loss={loss}'+'----'*8)
    
    for strategy in strategy_parameters:
        print('Strategy =',strategy)
        scaler = StandardScaler()
        over = SMOTE(strategy,random_state=42)
        model = SGDClassifier(loss=loss,n_jobs=3, random_state=42,fit_intercept=False)
        pipe = imPipeline([('scaler',scaler),
                           ('over',over),
                        ('model',model)])

        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=42)
        scores = cross_val_score(pipe, X_train, y_train, scoring=scoring, cv=cv, n_jobs=3)
        score = np.mean(scores)
        print(f'Score = {score:.5f} \n')
        
    print('time taken:',time.time() - t1)
        
print('\ntotal time taken:',time.time()-t0)

--------------------------------loss=log--------------------------------
strategy = 0.1
Score = 0.76102 

strategy = 0.2
Score = 0.76620 

strategy = 0.30000000000000004
Score = 0.77175 

strategy = 0.4
Score = 0.77095 

strategy = 0.5
Score = 0.77357 

strategy = 0.6
Score = 0.77303 

strategy = 0.7000000000000001
Score = 0.77497 

strategy = 0.8
Score = 0.77328 

strategy = 0.9
Score = 0.77551 

strategy = 1.0
Score = 0.77533 

time taken: 140.7603189945221
--------------------------------loss=modified_huber--------------------------------
strategy = 0.1
Score = 0.70968 

strategy = 0.2
Score = 0.69575 

strategy = 0.30000000000000004
Score = 0.67188 

strategy = 0.4
Score = 0.65290 

strategy = 0.5
Score = 0.64845 

strategy = 0.6
Score = 0.62877 

strategy = 0.7000000000000001
Score = 0.63176 

strategy = 0.8
Score = 0.61291 

strategy = 0.9
Score = 0.58748 

strategy = 1.0
Score = 0.57110 

time taken: 244.32001876831055
--------------------------------loss=hinge------------------

In [24]:
dict_classifiers = {
    'KNeighbors': KNeighborsClassifier(n_jobs=3),
    "Random Forest": RandomForestClassifier(random_state=42, n_jobs=3),
    "Logistic": LogisticRegression(solver='liblinear'),
    'Stochastic GD': SGDClassifier(loss='log',n_jobs=3, random_state=42,fit_intercept=False),
    'LinearSVM': LinearSVC(dual=False),
}
classifiers_names = list(dict_classifiers.keys())
classifiers_values=list(dict_classifiers.values())

## SMOTE for multiple classifiers

In [174]:
scoring_parameters = [auc_score] *4 + [auc_score_2]
strategy_parameters = np.linspace(0.1,1.,10)[2:]

t0=time.time()
    
for strategy in strategy_parameters:
    print('----'*8+f'Strategy={strategy}'+'----'*8)
    t1=time.time()
    for name,model,scoring in zip(classifiers_names,classifiers_values,scoring_parameters):
        print(f'Classifier: {name}')
        scaler = StandardScaler()
        over = SMOTE(strategy,random_state=42)
        model = model
        pipe = imPipeline([('scaler',scaler),
                           ('over',over),
                           ('model',model)])

        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)
        scores = cross_val_score(pipe, X_train, y_train, scoring=scoring, cv=cv, n_jobs=3)
        score = np.mean(scores)
        print(f'Score = {score:.5f} \n')

    print('time taken:',time.time() - t1)
        
print('\ntotal time taken:',time.time()-t0)

--------------------------------Strategy=0.30000000000000004--------------------------------
Classifier: Random Forest
Score = 0.83840 

Classifier: Logistic
Score = 0.73420 

Classifier: KNeighbors
Score = 0.74557 

Classifier: Stochastic GD
Score = 0.76967 

Classifier: LinearSVM
Score = 0.72440 

time taken: 1925.9603674411774
--------------------------------Strategy=0.4--------------------------------
Classifier: Random Forest
Score = 0.83643 

Classifier: Logistic
Score = 0.73803 

Classifier: KNeighbors
Score = 0.74171 

Classifier: Stochastic GD
Score = 0.77289 

Classifier: LinearSVM
Score = 0.71831 

time taken: 2031.151977300644
--------------------------------Strategy=0.5--------------------------------
Classifier: Random Forest
Score = 0.83560 

Classifier: Logistic
Score = 0.74318 

Classifier: KNeighbors
Score = 0.73982 

Classifier: Stochastic GD
Score = 0.77366 

Classifier: LinearSVM
Score = 0.71437 

time taken: 1985.008231639862
--------------------------------Strate



Score = 0.83523 

Classifier: Logistic
Score = 0.75408 

Classifier: KNeighbors
Score = 0.73665 

Classifier: Stochastic GD
Score = 0.77217 

Classifier: LinearSVM
Score = 0.70665 

time taken: 1797.7109515666962
--------------------------------Strategy=0.9--------------------------------
Classifier: Random Forest




Score = 0.83332 

Classifier: Logistic
Score = 0.75722 

Classifier: KNeighbors
Score = 0.73568 

Classifier: Stochastic GD
Score = 0.77369 

Classifier: LinearSVM
Score = 0.70563 

time taken: 1868.0858430862427
--------------------------------Strategy=1.0--------------------------------
Classifier: Random Forest




Score = 0.83155 

Classifier: Logistic
Score = 0.75896 

Classifier: KNeighbors
Score = 0.73507 

Classifier: Stochastic GD
Score = 0.77259 

Classifier: LinearSVM
Score = 0.70482 

time taken: 1943.1592214107513

total time taken: 15287.715933799744


## SMOTE for multiple classifiers with specific columns taken feature_selection

In [12]:
scoring_parameters = [auc_score] *4 + [auc_score_2]
strategy_parameters = np.linspace(0.1,1.,10)

t0=time.time()
    
for strategy in strategy_parameters:
    print('----'*8+f'Strategy={strategy}'+'----'*8)
    t1=time.time()
    X = df.drop(['Class','Time'],axis=1)
    cols = ['V1', 'V4', 'V9', 'V10', 'V14', 'V16', 'V17', 'V20', 'V21', 'V27','V28']
    X = X[cols]
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.33,random_state=42)
    for name,model,scoring in zip(classifiers_names,classifiers_values,scoring_parameters):
        print(f'Classifier: {name}')
        scaler = StandardScaler()
        over = SMOTE(strategy,random_state=42)
        model = model
        pipe = imPipeline([('scaler',scaler),
                           ('over',over),
                           ('model',model)])

        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)
        scores = cross_val_score(pipe, X_train, y_train, scoring=scoring, cv=cv, n_jobs=3)
        score = np.mean(scores)
        print(f'Score = {score:.5f} \n')

    print('time taken:',time.time() - t1)
        
print('\ntotal time taken:',time.time()-t0)

--------------------------------Strategy=0.30000000000000004--------------------------------
Classifier: Random Forest
Score = 0.83377 

Classifier: Logistic
Score = 0.72019 

Classifier: KNeighbors
Score = 0.68452 

Classifier: Stochastic GD
Score = 0.67590 

Classifier: LinearSVM
Score = 0.72705 

time taken: 427.9682981967926
--------------------------------Strategy=0.4--------------------------------
Classifier: Random Forest




Score = 0.83117 

Classifier: Logistic
Score = 0.71045 

Classifier: KNeighbors
Score = 0.67750 

Classifier: Stochastic GD
Score = 0.68140 

Classifier: LinearSVM
Score = 0.71828 

time taken: 459.96603441238403
--------------------------------Strategy=0.5--------------------------------
Classifier: Random Forest




Score = 0.82934 

Classifier: Logistic
Score = 0.70139 

Classifier: KNeighbors
Score = 0.67184 

Classifier: Stochastic GD
Score = 0.67475 

Classifier: LinearSVM
Score = 0.71100 

time taken: 478.4833297729492
--------------------------------Strategy=0.6--------------------------------
Classifier: Random Forest




Score = 0.82918 

Classifier: Logistic
Score = 0.69212 

Classifier: KNeighbors
Score = 0.66957 

Classifier: Stochastic GD
Score = 0.67217 

Classifier: LinearSVM
Score = 0.70396 

time taken: 503.93258333206177
--------------------------------Strategy=0.7000000000000001--------------------------------
Classifier: Random Forest




Score = 0.82696 

Classifier: Logistic
Score = 0.68469 

Classifier: KNeighbors
Score = 0.66599 

Classifier: Stochastic GD
Score = 0.68200 

Classifier: LinearSVM
Score = 0.69706 

time taken: 519.141352891922
--------------------------------Strategy=0.8--------------------------------
Classifier: Random Forest
Score = 0.82637 

Classifier: Logistic
Score = 0.67856 

Classifier: KNeighbors
Score = 0.66587 

Classifier: Stochastic GD
Score = 0.66484 

Classifier: LinearSVM
Score = 0.69176 

time taken: 515.7370555400848
--------------------------------Strategy=0.9--------------------------------
Classifier: Random Forest




Score = 0.82547 

Classifier: Logistic
Score = 0.67302 

Classifier: KNeighbors
Score = 0.66405 

Classifier: Stochastic GD
Score = 0.68791 

Classifier: LinearSVM
Score = 0.68660 

time taken: 552.740181684494
--------------------------------Strategy=1.0--------------------------------
Classifier: Random Forest




Score = 0.82482 

Classifier: Logistic
Score = 0.66845 

Classifier: KNeighbors
Score = 0.66309 

Classifier: Stochastic GD
Score = 0.67077 

Classifier: LinearSVM
Score = 0.68216 

time taken: 589.2217519283295

total time taken: 4047.192403078079
