## Import Smarket data

In [None]:
import pandas as pd
df=pd.read_csv('Smarket.csv')
df

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
0,2001,0.381,-0.192,-2.624,-1.055,5.010,1.19130,0.959,Up
1,2001,0.959,0.381,-0.192,-2.624,-1.055,1.29650,1.032,Up
2,2001,1.032,0.959,0.381,-0.192,-2.624,1.41120,-0.623,Down
3,2001,-0.623,1.032,0.959,0.381,-0.192,1.27600,0.614,Up
4,2001,0.614,-0.623,1.032,0.959,0.381,1.20570,0.213,Up
...,...,...,...,...,...,...,...,...,...
1245,2005,0.422,0.252,-0.024,-0.584,-0.285,1.88850,0.043,Up
1246,2005,0.043,0.422,0.252,-0.024,-0.584,1.28581,-0.955,Down
1247,2005,-0.955,0.043,0.422,0.252,-0.024,1.54047,0.130,Up
1248,2005,0.130,-0.955,0.043,0.422,0.252,1.42236,-0.298,Down


## Check the Correlation Matrix

In [None]:
df.corr()

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today
Year,1.0,0.0297,0.030596,0.033195,0.035689,0.029788,0.539006,0.030095
Lag1,0.0297,1.0,-0.026294,-0.010803,-0.002986,-0.005675,0.04091,-0.026155
Lag2,0.030596,-0.026294,1.0,-0.025897,-0.010854,-0.003558,-0.043383,-0.01025
Lag3,0.033195,-0.010803,-0.025897,1.0,-0.024051,-0.018808,-0.041824,-0.002448
Lag4,0.035689,-0.002986,-0.010854,-0.024051,1.0,-0.027084,-0.048414,-0.0069
Lag5,0.029788,-0.005675,-0.003558,-0.018808,-0.027084,1.0,-0.022002,-0.03486
Volume,0.539006,0.04091,-0.043383,-0.041824,-0.048414,-0.022002,1.0,0.014592
Today,0.030095,-0.026155,-0.01025,-0.002448,-0.0069,-0.03486,0.014592,1.0


# Encoding Target Variable

In [None]:
y = df['Direction'].map({'Up':1,'Down':0})
x = df[['Lag1','Lag2','Lag3','Lag4','Lag5','Volume']]

# Learning Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
reg = LogisticRegression()
reg.fit(x,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

# Check the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = reg.predict(x)
confusion_matrix(y,y_pred)

array([[144, 458],
       [141, 507]])

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
import numpy as np
print(accuracy_score(y,y_pred))
print(precision_score(y,y_pred))
print(recall_score(y,y_pred))
np.mean(y)

0.5208
0.5253886010362694
0.7824074074074074


0.5184

# Split the training and testing dataset

In [None]:
features = ['Lag1','Lag2']
train = df[df['Year']<2005]
test = df[df['Year']==2005]
x_train = train[features]
y_train = train['Direction'].map({'Up':1, 'Down':0})
x_test = test[features]
y_test = test['Direction'].map({'Up':1, 'Down':0})

In [None]:
reg = LogisticRegression()
reg.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
y_test_pred=reg.predict(x_test)
confusion_matrix(y_test,y_test_pred)

array([[ 35,  76],
       [ 35, 106]])

In [None]:
print(precision_score(y_test,y_test_pred))
np.mean(y_test)

0.5824175824175825


0.5595238095238095

## Do the same thing with LDA

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier

LDA = LinearDiscriminantAnalysis()
QDA = QuadraticDiscriminantAnalysis()
KNN = KNeighborsClassifier(n_neighbors=3)

In [None]:
LDA.fit(x_train,y_train)
y_test_pred=LDA.predict(x_test)
print(confusion_matrix(y_test,y_test_pred))
print(precision_score(y_test,y_test_pred))
print(np.mean(y_test))

[[ 35  76]
 [ 35 106]]
0.5824175824175825
0.5595238095238095


## Do the same thing with LDA

In [None]:
QDA.fit(x_train,y_train)
y_test_pred=QDA.predict(x_test)
print(confusion_matrix(y_test,y_test_pred))
print(precision_score(y_test,y_test_pred))
print(np.mean(y_test))

[[ 30  81]
 [ 20 121]]
0.599009900990099
0.5595238095238095


## Do the same thing with KNN

In [None]:
KNN.fit(x_train,y_train)
y_test_pred=KNN.predict(x_test)
print(confusion_matrix(y_test,y_test_pred))
print(precision_score(y_test,y_test_pred))
print(np.mean(y_test))

[[48 63]
 [55 86]]
0.5771812080536913
0.5595238095238095
