<a href="https://colab.research.google.com/github/rks-69/Stock-Market-Prediction/blob/main/Stock_Market_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install quandl

Collecting quandl
  Downloading https://files.pythonhosted.org/packages/1f/37/2d908ea6de71a46f4bb2faf5ff0032cefe56117b715c7f22d1d0d2e7527d/Quandl-3.5.3-py2.py3-none-any.whl
Collecting inflection>=0.3.1
  Downloading https://files.pythonhosted.org/packages/59/91/aa6bde563e0085a02a435aa99b49ef75b0a4b062635e606dab23ce18d720/inflection-0.5.1-py2.py3-none-any.whl
Installing collected packages: inflection, quandl
Successfully installed inflection-0.5.1 quandl-3.5.3


In [3]:
#Install the dependencies
import quandl
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [4]:
# Get the stock data
df = quandl.get("WIKI/AMZN")
# Take a look at the data
print(df.head())

             Open   High    Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                             ...                                   
1997-05-16  22.38  23.75  20.50  ...  1.708333    1.729167   14700000.0
1997-05-19  20.50  21.25  19.50  ...  1.625000    1.708333    6106800.0
1997-05-20  20.75  21.00  19.63  ...  1.635833    1.635833    5467200.0
1997-05-21  19.25  19.75  16.50  ...  1.375000    1.427500   18853200.0
1997-05-22  17.25  17.38  15.75  ...  1.312500    1.395833   11776800.0

[5 rows x 12 columns]


In [5]:
# Get the Adjusted Close Price 
df = df[['Adj. Close']] 
# Take a look at the new data 
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [6]:
# A variable for predicting 'n' days out into the future
forecast_out = 30 #'n=30' days
#Create another column (the target ) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
#print the new data set
print(df.tail())

            Adj. Close  Prediction
Date                              
2018-03-21     1581.86         NaN
2018-03-22     1544.10         NaN
2018-03-23     1495.56         NaN
2018-03-26     1555.86         NaN
2018-03-27     1497.05         NaN


In [7]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

#Remove the last '30' rows
X = X[:-forecast_out]
print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


In [8]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array 
y = np.array(df['Prediction'])
# Get all of the y values except the last '30' rows
y = y[:-forecast_out]
print(y)

[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [10]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [11]:
# Create and train the Support Vector Machine (Regressor) 
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [12]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9502283035238286


In [13]:
# Create and train the Linear Regression  Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [14]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9874873357587036


In [15]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [16]:
# Print linear regression model predictions for the next '30' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)
# Print support vector regressor model predictions for the next '30' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1498.40609918 1537.19680359 1548.56649281 1534.6914379  1555.56240803
 1571.0298564  1572.98319236 1589.1619098  1612.46393393 1601.87982547
 1602.37877542 1582.20845841 1589.42730871 1614.22618268 1629.1203694
 1636.93371326 1644.2162593  1672.91118924 1693.61230407 1682.77341267
 1685.76711235 1676.55246226 1665.25708473 1636.85940156 1681.00054796
 1676.06412827 1635.97827719 1584.44842519 1648.46264183 1586.03020268]
[1057.02681421 1550.20728627  676.9104984  1053.84286987  676.87755278
  676.87755278  676.87755278  676.87755278  676.87755278  676.87755278
  676.87755278  676.87755278  676.87755278  676.87755278  676.87755278
  676.87755278  676.87755278  676.87755278  676.87755278  676.87755278
  676.87755278  676.87755278  676.87755278  676.87755278  676.87755278
  676.87755278  676.87755278  676.87755278  676.87755278  676.87755278]
