In [6]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from sklearn.preprocessing import LabelEncoder

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import OneClassSVM

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score

In [7]:
scaler = StandardScaler()

In [8]:
testing_data = pd.read_csv('data/with_indicators_testing.csv')

testing_data_features = testing_data.drop('class', axis=1)
testing_data_labels = testing_data['class']

testing_data_features = scaler.fit_transform(testing_data_features)

In [9]:
training = pd.read_csv('data/with_indicators_training.csv')
training_features = training.drop('class', axis=1)
training_labels = training['class']

# scale the features 
training_features = scaler.fit_transform(training_features)

In [12]:
%%time

# uses hyperparams from tuning notebook
svm_model = SVC(C=10, gamma=0.1, kernel='rbf')

svm_model.fit(training_features, training_labels)

svm_predicted_labels = svm_model.predict(testing_data_features)
 
accuracy = accuracy_score(testing_data_labels, svm_predicted_labels)
precision = precision_score(testing_data_labels, svm_predicted_labels)
f1 = f1_score(testing_data_labels, svm_predicted_labels)
auc = roc_auc_score(testing_data_labels, svm_predicted_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"F1 Score: {f1}")
print(f"AUC Score: {auc}")

Accuracy: 0.993
Precision: 0.9930222416048844
F1 Score: 0.9923730660274569
AUC Score: 0.9929037894563223
CPU times: total: 203 ms
Wall time: 199 ms


In [11]:
%%time

knn_model = KNeighborsClassifier(n_neighbors=3, weights='distance')

knn_model.fit(training_features, training_labels)

knn_predicted_labels = knn_model.predict(testing_data_features)

knn_accuracy = accuracy_score(testing_data_labels, knn_predicted_labels)
knn_precision = precision_score(testing_data_labels, knn_predicted_labels)
knn_f1 = f1_score(testing_data_labels, knn_predicted_labels)
knn_auc = roc_auc_score(testing_data_labels, knn_predicted_labels)

print(f"Accuracy: {knn_accuracy}")
print(f"Precision: {knn_precision}")
print(f"F1 Score: {knn_f1}")
print(f"AUC Score: {knn_auc}")

Accuracy: 0.9924
Precision: 0.9912967798085291
F1 Score: 0.9917283413147584
AUC Score: 0.9923819145207512
CPU times: total: 1.95 s
Wall time: 121 ms


In [13]:
%%time

dt_model = DecisionTreeClassifier(random_state=42,
                            criterion='gini',
                            max_depth=9,
                            min_samples_split=10,
                            min_samples_leaf=1)

dt_model.fit(training_features, training_labels)

dt_predicted_labels = dt_model.predict(testing_data_features)

dt_accuracy = accuracy_score(testing_data_labels, dt_predicted_labels)
dt_precision = precision_score(testing_data_labels, dt_predicted_labels)
dt_f1 = f1_score(testing_data_labels, dt_predicted_labels)
dt_auc = roc_auc_score(testing_data_labels, dt_predicted_labels)

print(f"Accuracy: {dt_accuracy}")
print(f"Precision: {dt_precision}")
print(f"F1 Score: {dt_f1}")
print(f"AUC Score: {dt_auc}")

Accuracy: 0.981
Precision: 0.968098681412165
F1 Score: 0.9795567032494082
AUC Score: 0.9817762561078697
CPU times: total: 15.6 ms
Wall time: 18.4 ms


In [14]:
%%time

rf_model = RandomForestClassifier(random_state=42,
                                  n_estimators= 300,
                                  max_depth=15,
                                  min_samples_split=2,
                                  min_samples_leaf=1,
                                  max_features='sqrt')

rf_model.fit(training_features, training_labels)

rf_predicted_labels = rf_model.predict(testing_data_features)

rf_accuracy = accuracy_score(testing_data_labels, rf_predicted_labels)
rf_precision = precision_score(testing_data_labels, rf_predicted_labels)
rf_f1 = f1_score(testing_data_labels, rf_predicted_labels)
rf_auc = roc_auc_score(testing_data_labels, rf_predicted_labels)

print(f"Accuracy: {rf_accuracy}")
print(f"Precision: {rf_precision}")
print(f"F1 Score: {rf_f1}")
print(f"AUC Score: {rf_auc}")

Accuracy: 0.9916
Precision: 0.9969135802469136
F1 Score: 0.99079754601227
AUC Score: 0.9910836700822631
CPU times: total: 938 ms
Wall time: 938 ms
