In [32]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
import time
import pickle

X_tfid = np.load("X_tfid.npy", allow_pickle=True).item()
X_tfid = np.nan_to_num(X_tfid)
Y = np.load("Y.npy", allow_pickle=True)

In [8]:
np.random.seed = 42

# SVM

In [28]:
# Params
C = [1]
gamma = [0.6]
kernel = ['rbf']
param_grid = dict(C=C, gamma=gamma, kernel=kernel)

model = SVC()
CV = GridSearchCV(estimator=model, param_grid=param_grid, cv = 5, n_jobs=-1, verbose=3)
start_time = time.time()
result = CV.fit(X_tfid, Y)

# Summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  7.4min finished


Best: 0.613595 using {'C': 1, 'gamma': 0.6, 'kernel': 'rbf'}
Execution time: 551.9437947273254 ms


# KNN

In [21]:
# Params
n_neighbors = [85]
algorithm = ['auto']
weights = ['uniform']
param_grid = dict(n_neighbors=n_neighbors, algorithm=algorithm, weights=weights)

model = KNeighborsClassifier()
CV = GridSearchCV(estimator=model, param_grid=param_grid, cv = 5, n_jobs=-1, verbose=1)
start_time = time.time()
result = CV.fit(X_tfid, Y)

# Summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Best: 0.527974 using {'algorithm': 'auto', 'n_neighbors': 85, 'weights': 'uniform'}
Execution time: 20.584548234939575 ms


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   20.5s finished


# Random Forest

In [53]:
# Params
n_estimators = [168]

param_grid = dict(n_estimators=n_estimators)

model = RandomForestClassifier()
CV = GridSearchCV(estimator=model, param_grid=param_grid, cv = 5, n_jobs=-1, verbose=1)
start_time = time.time()
result = CV.fit(X_tfid, Y)

# Summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  4.6min finished


Best: 0.553399 using {'n_estimators': 168}
Execution time: 347.9830174446106 ms


# Logistic Regression

In [46]:
# Params
penalty = ['l2']
C = [2.6]

param_grid = dict(penalty=penalty, C=C)

model = LogisticRegression()
CV = GridSearchCV(estimator=model, param_grid=param_grid, cv = 5, n_jobs=-1, verbose=1)
start_time = time.time()
result = CV.fit(X_tfid, Y)

# Summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')

Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    3.8s finished


Best: 0.610327 using {'C': 2.6, 'penalty': 'l2'}
Execution time: 5.231013774871826 ms
