In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

In [21]:
data = pd.read_csv('clean_data.csv', sep='\t')
data = data[['Date','HGS','HGA','HYC','HRC','HWW','AGS','AGA','AYC','ARC','AWW','watch']]
data = data.reindex(index=data.index[::-1])
data.reset_index(inplace=True)
data['Date'] =  pd.to_datetime(data['Date'])
data[['HGS','HGA','HYC',
      'HRC','HWW','AGS',
      'AGA','AYC','ARC',
      'AWW','watch']] = data[['HGS','HGA',
                              'HYC','HRC',
                              'HWW','AGS',
                              'AGA','AYC',
                              'ARC','AWW',
                              'watch']].apply(pd.to_numeric)
data.drop('index',axis=1,inplace=True)

In [22]:
recent = data[data['Date'] > '2010-08-01']
recent = data[data['Date'] < '2018-05-13']

In [23]:
X = recent[['HGS','HGA','HYC',
      'HRC','HWW','AGS',
      'AGA','AYC','ARC',
      'AWW']]
y = recent['watch']

In [19]:
X_train, X_vt, y_train, y_vt = train_test_split(X, y, test_size=0.3, random_state=101)
X_validate, X_test, y_validate, y_test = train_test_split(X_vt, y_vt, test_size=0.5, random_state=101)

In [43]:
dtc_clf = DecisionTreeClassifier(random_state=101)
rnd_clf = RandomForestClassifier(random_state=101, 
                                 n_estimators=100)
svm_clf = SVC(random_state=101, 
              gamma=1, 
              C=100, 
              kernel='rbf')

In [44]:
voting_clf = VotingClassifier(
    estimators=[('dt', dtc_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

In [45]:
for clf in (dtc_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_validate)
    print(clf.__class__.__name__)
    cf = confusion_matrix(y_validate, y_pred)
    print(cf)
    print(cf[1][1] / (cf[0][1] + cf[1][1]))

DecisionTreeClassifier
[[394 206]
 [217 119]]
0.36615384615384616
RandomForestClassifier
[[576  24]
 [322  14]]
0.3684210526315789
SVC
[[499 101]
 [276  60]]
0.37267080745341613
VotingClassifier
[[548  52]
 [303  33]]
0.38823529411764707


In [46]:
voting_clf.fit(X_train.append(X_validate), y_train.append(y_validate))
y_pred = voting_clf.predict(X_test)
cf = confusion_matrix(y_validate, y_pred)
print(cf)
print(cf[1][1] / (cf[0][1] + cf[1][1]))

[[537  63]
 [307  29]]
0.31521739130434784
