## Stock Prediction Program

This program predicts the stock prices with the help of machine learning algorithms

In [1]:
pip install sklearn

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install quandl

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Importing the dependecies used in this project
import quandl as ql
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [4]:
# Getting the dataset from quandl
data = ql.get("WIKI/AMZN")

# Printing the dataset
print(data.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   
1997-05-21  19.25  19.75  16.50  17.13  1571100.0          0.0          1.0   
1997-05-22  17.25  17.38  15.75  16.75   981400.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  
1997-05-21   1.604167   1.645833  1.375000    1.427500   18853200.0  
1997-05-22   1.437500   1.

In [5]:
# Removing other columns in the dataframe
data = data[['Adj. Close']]

In [6]:
# To predict future prices of stock prices and to know how many days in the future
forecast_num_days = 10

# Target column is created by shifting 'forecast_num_days' units up
# This is used to get the next day's value from the 'Adj. Close' columns
data['Prediction'] = data[['Adj. Close']].shift(-forecast_num_days)


In [7]:
# Input Dataset X is created
#1. Pandas Dataframe is converted to numpy - 'Adj. Close' column
X = np.array(data.drop(['Prediction'],1))

#2. The last 'forecast_num_days' number of rows are removed because of nan
X = X[:-forecast_num_days]

print("Input Dataset:", X)

Input Dataset: [[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1578.89      ]
 [1598.39      ]
 [1588.18      ]]


  X = np.array(data.drop(['Prediction'],1))


In [8]:
# Output Dataset y is created
#1. Pandas Dataframe is converted to numpy - 'Prediction' column
temp = data['Prediction']
y = np.array(temp)

#2. The last 'forecast_num_days' number of rows are removed because of nan
y = y[:-forecast_num_days]

print("Output Dataset:", y)

Output Dataset: [1.51083333e+00 1.47916667e+00 1.41666667e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [9]:
# Splitting the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [10]:
# Create and Train Support Vector Regressor (SVR) model with RBF kernel
svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)

# Training the SVR model
svr_rbf.fit(X_train, y_train)

# Testing the SVR model
# Score returns the coefficient of determination of R^2 of the prediction
svr_confidence = svr_rbf.score(X_test, y_test)
print("SVR score:", svr_confidence)

SVR score: 0.9424579201695173


In [11]:
# Create and Train Linear Regression model with RBF kernel
lr = LinearRegression()

# Training the Linear Regression model
lr.fit(X_train, y_train)

# Testing the Linear Regression model
# Score returns the coefficient of determination of R^2 of the prediction
lr_confidence = lr.score(X_test, y_test)
print("Linear Regression score:", lr_confidence)

Linear Regression score: 0.9963015663540676


In [12]:
# Setting x_forecast to the last 30 rows of the original values from the 'Adj. Close' column
x_forecast = np.array(data.drop(['Prediction'], 1))[-forecast_num_days:]
print(x_forecast)

[[1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


  x_forecast = np.array(data.drop(['Prediction'], 1))[-forecast_num_days:]


In [13]:
# Predicting the next 'n' days using Support Vector Regression (SVR)
svr_pred = svr_rbf.predict(x_forecast)
print("Support Vector Regression Prediction:", svr_pred)

print()

# Predicting the next 'n' days using Linear Regression
lr_pred = lr.predict(x_forecast)
print("Linear Regression Prediction:", lr_pred)

Support Vector Regression Prediction: [1061.34653626  968.50581555  700.71302944 1581.0582435  1304.36700443
 1041.77176807 1517.92215613 1263.72793138  865.85764375  938.63285461]

Linear Regression Prediction: [1619.74620494 1610.9041642  1600.06553361 1572.81615689 1615.17238433
 1610.43557678 1571.97066221 1522.52450348 1583.95020128 1524.04231923]
