# Stochastic Gradient Descent Classification

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# yahoo finance is used to fetch data 
import yfinance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2018-08-27'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.85,3.98,3.84,3.95,3.95,20548400
2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200
2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300
2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100
2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700


In [3]:
dataset = dataset.reset_index()

In [4]:
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],'Increase','Decrease')
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Return'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Return
1,2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200,Increase,1,1,0.012658
2,2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300,Increase,1,1,0.0325
3,2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100,Decrease,1,0,0.012106
4,2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700,Decrease,0,0,0.0
5,2014-01-09,4.2,4.23,4.05,4.09,4.09,30667600,Decrease,0,1,-0.021531


In [5]:
dataset.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Return
1166,2018-08-20,19.790001,20.08,19.35,19.98,19.98,62983200,Decrease,1,1,0.010622
1167,2018-08-21,19.98,20.42,19.860001,20.4,20.4,55629000,Increase,1,1,0.021021
1168,2018-08-22,20.280001,20.92,20.209999,20.9,20.9,62002700,Increase,1,1,0.02451
1169,2018-08-23,21.190001,22.32,21.139999,22.290001,22.290001,113444100,Increase,1,1,0.066507
1170,2018-08-24,22.91,24.0,22.67,23.98,23.98,164328200,Decrease,0,0,0.075819


In [6]:
dataset.shape

(1170, 11)

In [7]:
from sklearn.linear_model import SGDClassifier

In [8]:
X = np.array(dataset['Adj Close']).reshape(1170,-1)
Y = np.array(dataset['Buy_Sell']).reshape(1170,-1)

In [9]:
sgd = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
sgd.fit(X, Y)

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=5, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False)

In [10]:
sgd.coef_

array([[-6.05928235]])

In [11]:
sgd.intercept_ 

array([-6.65650695])

In [12]:
sgd.predict(Y)

array([0, 0, 0, ..., 0, 0, 0])

In [13]:
clf = SGDClassifier(loss="log", max_iter=5).fit(X, Y)
clf.predict_proba(Y)

array([[9.97605461e-01, 2.39453929e-03],
       [9.97605461e-01, 2.39453929e-03],
       [9.99859984e-01, 1.40015727e-04],
       ...,
       [9.97605461e-01, 2.39453929e-03],
       [9.97605461e-01, 2.39453929e-03],
       [9.99859984e-01, 1.40015727e-04]])

In [14]:
from sklearn import model_selection # Binary Number

kfold = model_selection.KFold(n_splits=10, random_state=7, shuffle=True)
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='accuracy')
print("Accuracy: %.3f (%.3f)" % (results.mean(), results.std()))

Accuracy: 0.491 (0.026)


In [15]:
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='neg_log_loss')
print("Logloss: %.3f (%.3f)" % (results.mean(), results.std()))

Logloss: -12.688 (3.612)


In [16]:
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='roc_auc')
print("AUC: %.3f (%.3f)" % (results.mean(), results.std()))

AUC: 0.484 (0.083)


In [19]:
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='neg_mean_absolute_error')
print("MAE: %.3f (%.3f)" % (results.mean(), results.std()))

MAE: -0.513 (0.019)


In [20]:
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='neg_mean_squared_error')
print("MSE: %.3f (%.3f)" % (results.mean(), results.std()))

MSE: -0.503 (0.022)


In [21]:
results = model_selection.cross_val_score(clf, X, Y, cv=kfold, scoring='r2')
print("R^2: %.3f (%.3f)" % (results.mean(), results.std()))

R^2: -0.967 (0.096)


In [22]:
from sklearn.metrics import r2_score # Continous and Binary
r2_score(X,Y)

-1.7917824267171234