# Stock Prediction with Linear Regression Model

## Import Libraries

In [39]:
import pandas as pd
import numpy as np

In [109]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [42]:
import datetime
from pandas_datareader import data, wb

In [85]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
%matplotlib inline

In [86]:
import warnings
warnings.filterwarnings("ignore") 

## Collecting Data

In [118]:
start_date = datetime.datetime(2010, 1, 1)
end_date = datetime.datetime.today()         

#### Create DataFrame

In [119]:
SBI = data.DataReader("SBIN.NS", data_source='yahoo', start=start_date, end=end_date)
HDFC = data.DataReader("HDFCBANK.NS", data_source='yahoo', start=start_date, end=end_date)
AXIS = data.DataReader("AXISBANK.NS", data_source='yahoo', start=start_date, end=end_date)
BOB = data.DataReader("BANKBARODA.NS", data_source='yahoo', start=start_date, end=end_date)
PNB = data.DataReader("PNB.NS", data_source='yahoo', start=start_date, end=end_date)
ICICI = data.DataReader("ICICIBANK.NS", data_source='yahoo', start=start_date, end=end_date)
YESBANK = data.DataReader("YESBANK.NS", data_source='yahoo', start=start_date, end=end_date)
KOTAK = data.DataReader("KOTAKBANK.NS", data_source='yahoo', start=start_date, end=end_date)

## Getting Data Ready

In [120]:
# Get the Adj Close Price
sbi_df = SBI[["Adj Close"]]
hdfc_df = HDFC[["Adj Close"]]
axis_df = AXIS[["Adj Close"]]
bob_df = BOB[["Adj Close"]]
pnb_df = PNB[["Adj Close"]]
icici_df = ICICI[["Adj Close"]]
yesbank_df = YESBANK[["Adj Close"]]
kotak_df = KOTAK[["Adj Close"]]

In [121]:
# A variable for predicting 'n' days out into the future 
forecast_out = 30

# Create another column (the target or dependent variable) shifted 'n' units up
sbi_df['Prediction'] = sbi_df[['Adj Close']].shift(-forecast_out)
hdfc_df['Prediction'] = hdfc_df[['Adj Close']].shift(-forecast_out)
axis_df['Prediction'] = axis_df[['Adj Close']].shift(-forecast_out)
bob_df['Prediction'] = bob_df[['Adj Close']].shift(-forecast_out)
pnb_df['Prediction'] = pnb_df[['Adj Close']].shift(-forecast_out)
icici_df['Prediction'] = icici_df[['Adj Close']].shift(-forecast_out)
yesbank_df['Prediction'] = yesbank_df[['Adj Close']].shift(-forecast_out)
kotak_df['Prediction'] = kotak_df[['Adj Close']].shift(-forecast_out)

In [122]:
# List of the ticker symbols (as strings) in alphabetical order
tickers = 'SBI HDFCBANK AXISBANK BANKBARODA PNB ICICIBANK YESBANK KOTAKBANK'.split()
tickers.sort()

In [132]:
# Create a Single DataFrame by Concatenating all the above DataFrames
bank_stocks = pd.concat([axis_df, bob_df, hdfc_df, icici_df, kotak_df, pnb_df, sbi_df, yesbank_df], axis=1, keys=tickers)
bank_stocks = bank_stocks[:-30]
bank_stocks.head()

Unnamed: 0_level_0,AXISBANK,AXISBANK,BANKBARODA,BANKBARODA,HDFCBANK,HDFCBANK,ICICIBANK,ICICIBANK,KOTAKBANK,KOTAKBANK,PNB,PNB,SBI,SBI,YESBANK,YESBANK
Unnamed: 0_level_1,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction,Adj Close,Prediction
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2010-01-04,140.712021,151.377823,51.206989,57.293846,134.245209,130.329666,85.319977,81.561913,202.426041,186.522232,84.943443,86.630409,95.066895,85.587837,34.458496,31.044094
2010-01-05,143.647934,155.802994,52.686508,58.495327,134.363205,132.517624,86.130264,81.474457,204.430389,184.850494,86.255913,87.150787,95.10218,84.955849,34.965523,30.293184
2010-01-06,141.236801,155.235626,53.009216,56.6087,134.453751,133.316467,86.789574,80.844452,206.915863,179.573486,85.847481,86.114899,95.672699,83.333183,34.471333,29.439579
2010-01-07,142.236725,157.072403,53.326965,56.161865,134.80397,133.528961,85.970222,80.655098,209.240005,176.166992,84.668106,85.934959,95.137444,83.816475,33.887291,30.107061
2010-01-08,144.094727,155.753357,53.540455,56.971127,134.981079,133.875275,84.762535,82.22126,207.948059,173.558487,84.792007,85.910637,94.853218,83.621841,33.726841,30.312439


# Creating and Evaluating the Model

In [136]:
lr_accuracy = {}

for i in range(len(tickers)):
    
    bank = bank_stocks[tickers[i]]
    
    # X and y
    X = np.array(bank.drop(['Prediction'], axis=1))
    y = np.array(bank["Prediction"])
    
    # Training and Testing Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    
    # Instantiate Linear Regression
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    
    # Evaluate Linear Regression
    lr_confidence = lr.score(X_test, y_test)
    lr_accuracy[tickers[i]] = lr_confidence
    print(tickers[i])
    print("Linear Regression Confidence: {:.3f}%".format(lr_confidence*100))
    print()

AXISBANK
Linear Regression Confidence: 94.211%

BANKBARODA
Linear Regression Confidence: 84.805%

HDFCBANK
Linear Regression Confidence: 97.071%

ICICIBANK
Linear Regression Confidence: 93.494%

KOTAKBANK
Linear Regression Confidence: 97.481%

PNB
Linear Regression Confidence: 81.958%

SBI
Linear Regression Confidence: 87.380%

YESBANK
Linear Regression Confidence: 94.227%

