In [1]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [4]:
weekly = pd.read_csv("../datasets/Weekly.csv", index_col=0)

In [7]:
y = weekly.Direction.replace({'Up':1, 'Down':0}).to_numpy()
X = weekly.iloc[:,[1,2,3,4,5,6]].to_numpy()

clf_b = LogisticRegression(solver='lbfgs')
clf_b.fit(X, y)
print("Coef:{}".format(clf_b.coef_))
print("The most significant predictor is Lag2")

Coef:[[-0.04123854  0.05840384 -0.01605138 -0.02776243 -0.01446302 -0.02270963]]
The most significant predictor is Lag2


In [10]:
y_pred = clf.predict(X)

matrix = confusion_matrix(y, y_pred)
matrix

array([[ 54, 430],
       [ 48, 557]])

In [11]:
def summary(clf, y_true, y_pred):
    matrix = confusion_matrix(y_true, y_pred)
    print("% of Up is mislabelled: {}".format(matrix[0,1]/matrix.sum(axis=1)[0]*100))
    print("% of correct prediction: {}".format(matrix.diagonal().sum()/matrix.sum()*100))

summary(clf_b, y, y_pred)

% of Up is mislabelled: 88.84297520661157
% of correct prediction: 56.10651974288338


In [19]:
train_idx = weekly.Year.isin(range(1990, 2009))

X_train = weekly[train_idx].iloc[:,[2]].to_numpy()
y_train = y[train_idx]

X_test = weekly[~train_idx].iloc[:,[2]].to_numpy()
y_test = y[~train_idx]

clf_d = LogisticRegression(solver='lbfgs')
clf_d.fit(X_train, y_train)

y_pred = clf_d.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[ 9, 34],
       [ 5, 56]])

In [20]:
summary(clf_d, y_test, y_pred)

% of Up is mislabelled: 79.06976744186046
% of correct prediction: 62.5


In [24]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

y_pred = lda.predict(X_test)
#summary(clf_e, y_test, y_pred)
confusion_matrix(y_test, y_pred)

array([[ 9, 34],
       [ 5, 56]])

In [26]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)

y_pred = qda.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[ 0, 43],
       [ 0, 61]])

In [27]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[21, 22],
       [31, 30]])