# Stepwise Regression Backward

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# yahoo finance is used to fetch data 
import yfinance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2018-08-27'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.85,3.98,3.84,3.95,3.95,20548400
2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200
2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300
2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100
2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700


In [3]:
# Create more data
dataset['Increase/Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,-1)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,-1)
dataset['Return'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Increase/Decrease,Buy_Sell_on_Open,Buy_Sell,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200,1,1,1,0.012658
2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300,1,1,1,0.0325
2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100,0,1,-1,0.012107
2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700,0,-1,-1,0.0
2014-01-09,4.2,4.23,4.05,4.09,4.09,30667600,0,-1,1,-0.021531


In [4]:
dataset.shape

(1171, 10)

In [7]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values

In [10]:
print(X.shape)
print(y.shape)

(1171, 646)
(1171,)


In [11]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
X[:, 0] = labelencoder.fit_transform(X[:, 0])
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
 
# Avoiding the Dummy Variable Trap
X = X[:, 1:]
 
# Splitting the dataset into the Training set and Test set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
 
# Fitting Multiple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
 
# Predicting the Test set results
y_pred = regressor.predict(X_test)

In [16]:
import statsmodels.formula.api as sm
X = np.append ( arr = np.ones([1171,1]).astype(int), values = X, axis = 1)

In [17]:
X_opt = X[:,[0,1,2,3,4,5]]
regressor_OLS = sm.OLS(endog = y, exog = X_opt).fit()
regressor_OLS.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Sun, 14 Oct 2018",Prob (F-statistic):,
Time:,20:36:48,Log-Likelihood:,-3519.4
No. Observations:,1171,AIC:,7041.0
Df Residuals:,1170,BIC:,7046.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.1697,0.024,49.127,0.000,1.123,1.216
x1,1.1697,0.024,49.127,0.000,1.123,1.216
x2,1.1697,0.024,49.127,0.000,1.123,1.216
x3,1.1697,0.024,49.127,0.000,1.123,1.216
x4,1.1697,0.024,49.127,0.000,1.123,1.216
x5,1.1697,0.024,49.127,0.000,1.123,1.216

0,1,2,3
Omnibus:,137.68,Durbin-Watson:,0.004
Prob(Omnibus):,0.0,Jarque-Bera (JB):,115.619
Skew:,0.684,Prob(JB):,7.830000000000001e-26
Kurtosis:,2.296,Cond. No.,1.6e+83


In [21]:
X_Opt = X[:,[0,1,3,4,5]]

In [22]:
# Splitting the dataset into the Training set and Test set
X_opt_train, X_opt_test, y_opt_train, y_opt_test = train_test_split(X_Opt, y, test_size = 1/3, random_state = 0)
regressor_opt = LinearRegression()
regressor_opt.fit(X_opt_train, y_opt_train)
 
y_opt_pred = regressor_opt.predict(X_opt_test)

In [23]:
y_opt_pred

array([6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718, 6.9558718,
       6.9558718, 6.9558718, 6.9558718, 6.9558718, 