In [29]:
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
%matplotlib inline

In [7]:
X_50_open = np.load('./Np_files/open_50.npy')
X_25_open = np.load('./Np_files/open_25.npy')
X_13_open = np.load('./Np_files/open_25.npy')
open_target = np.ones(X_50_open.shape[0])

X_50_close = np.load('./Np_files/close_50.npy')
X_25_close = np.load('./Np_files/close_25.npy')
X_13_close = np.load('./Np_files/close_25.npy')
close_target = np.zeros(X_50_close.shape[0])

targets = np.concatenate((open_target, close_target))

X_50 = np.concatenate((X_50_open, X_50_close))
X_25 = np.concatenate((X_25_open, X_25_close))
X_13 = np.concatenate((X_13_open, X_13_close))

In [8]:
from sklearn.model_selection import train_test_split

X_50_train, X_50_test, y_50_train, y_50_test = train_test_split(X_50, targets)
X_25_train, X_25_test, y_25_train, y_25_test = train_test_split(X_25, targets)
X_13_train, X_13_test, y_13_train, y_13_test = train_test_split(X_13, targets)

# Logistic Regression

In [14]:
clf = LogisticRegression()
params = {'penalty':['l1', 'l2'], 'C':np.arange(1,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_50_train, y_50_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_50_test, y_50_test))

0.777594728171


In [15]:
clf = LogisticRegression()
params = {'penalty':['l1', 'l2'], 'C':np.arange(1,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_25_train, y_25_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_25_test, y_25_test))

0.831960461285


In [16]:
clf = LogisticRegression()
params = {'penalty':['l1', 'l2'], 'C':np.arange(1,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_13_train, y_13_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_13_test, y_13_test))

0.846787479407


# Adaboost

In [22]:
clf = AdaBoostClassifier()
params = {'n_estimators':np.arange(50,100,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_50_train, y_50_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_50_test, y_50_test))

0.771004942339


In [23]:
clf = AdaBoostClassifier()
params = {'n_estimators':np.arange(50,100,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_25_train, y_25_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_25_test, y_25_test))

0.812191103789


In [24]:
clf = AdaBoostClassifier()
params = {'n_estimators':np.arange(50,100,10)}
cv = GridSearchCV(clf, params)
cv.fit(X_13_train, y_13_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_13_test, y_13_test))

Final Accuracy:  0.823723228995


# SVM

In [26]:
clf = SVC()
params = {'C':np.arange(1,2)}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_50_train, y_50_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_50_test, y_50_test))

Final Accuracy:  0.528830313015


In [27]:
clf = SVC()
params = {'C':np.arange(1,5)}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_25_train, y_25_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_25_test, y_25_test))

Final Accuracy:  0.494233937397


In [28]:
clf = SVC()
params = {'C':np.arange(1,5)}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_13_train, y_13_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_13_test, y_13_test))

Final Accuracy:  0.510708401977


# GradientBoosting

In [31]:
clf = GradientBoostingClassifier()
params = {'n_estimators':[100]}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_50_train, y_50_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_50_test, y_50_test))

Final Accuracy:  0.850082372323


In [32]:
clf = GradientBoostingClassifier()
params = {'n_estimators':[100]}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_25_train, y_25_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_25_test, y_25_test))

Final Accuracy:  0.859967051071


In [None]:
clf = GradientBoostingClassifier()
params = {'n_estimators':[200]}#, 'kernel':['rbf', 'poly', 'sigmoid', 'linear']}
cv = GridSearchCV(clf, params)
cv.fit(X_13_train, y_13_train)
best_clf = cv.best_estimator_
print('Final Accuracy: ',best_clf.score(X_13_test, y_13_test))