In [None]:
# https://www.youtube.com/watch?v=EYnC4ACIt2g

In [1]:
pip install quandl

Collecting quandl
  Using cached Quandl-3.5.3-py2.py3-none-any.whl (25 kB)
Collecting inflection>=0.3.1
  Using cached inflection-0.5.1-py2.py3-none-any.whl (9.5 kB)
Installing collected packages: inflection, quandl
Successfully installed inflection-0.5.1 quandl-3.5.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
# this program predicts stock prices by using machine learning models
# install the dependencies
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [3]:
# get the stock data
df = quandl.get("WIKI/FB")
# take a look at the data
print(df.head())

             Open   High    Low    Close       Volume  Ex-Dividend  \
Date                                                                 
2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

            Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  \
Date                                                                  
2012-05-18          1.0      42.05      45.00     38.00     38.2318   
2012-05-21          1.0      36.53      36.66     33.00     34.0300   
2012-05-22          1.0      32.61      33.59     30.94     31.0000   
2012-05-23          1.0      31.37      32.50     31.36     32.0000   
2012-05-24          1.0      32.95      33.21     31.77     33.0300   

           

In [4]:
# get the adjusted close price
df = df[['Adj. Close']]
# take a look at the new data
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [9]:
# a variable for predicting 'n' days out into the future
forecast = 1
# create the target/dependent column, shifted 'n' units; this is a 2d, 1-column matrix
df['Prediction'] = df[['Adj. Close']].shift(-forecast)
# print the new data set
print(df.head())
print(df.tail())

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318       34.03
2012-05-21     34.0300       31.00
2012-05-22     31.0000       32.00
2012-05-23     32.0000       33.03
2012-05-24     33.0300       31.91
            Adj. Close  Prediction
Date                              
2018-03-21      169.39      164.89
2018-03-22      164.89      159.39
2018-03-23      159.39      160.06
2018-03-26      160.06      152.19
2018-03-27      152.19         NaN


In [16]:
### create the independent data set (X)
# convert the dataframe to a numpy array; this is a list
X = np.array(df['Adj. Close']).reshape(-1, 1)
# remove the last 'n' rows
X = X[:-forecast]
print(X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [164.89  ]
 [159.39  ]
 [160.06  ]]


In [20]:
### create the dependent data set (y)
# convert the dataframe to a numpy array
y = np.array(df['Prediction'])
y = y[:-forecast]
print(y)

[ 34.03  31.    32.   ... 159.39 160.06 152.19]


In [21]:
# split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [25]:
# try with Support Vector Machine
svr = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [26]:
# testing model: score returns the coefficient of determination R^2 of the prediction
# the best possible score is 1.0
svr_score = svr.score(x_test, y_test)
print("SVR score:", svr_score)

SVR score: 0.9947228505654913


In [27]:
# try with Linear Regression
lr = LinearRegression()
# train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [28]:
lr_score = lr.score(x_test, y_test)
print("Linear Regression score:", lr_score)

Linear Regression score: 0.998410625869784


In [37]:
# set x_forecast to the last 30 rows of the original data set from the adjusted close price
x_forecast = np.array(df['Adj. Close'])[-30:].reshape(-1, 1)
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [39]:
# print the linear regression predictions for the next 'n' days
lr_predict = lr.predict(x_forecast)
print(lr_predict)

[173.30290934 179.67507888 180.11522874 177.51434322 176.16388342
 178.06453054 179.14489837 183.4463629  185.08692146 181.61573962
 178.47467018 176.09385958 176.77409118 180.5553786  179.93516744
 183.86650595 182.49603934 185.38702364 184.91686356 182.03588267
 184.34666943 184.01655703 185.24697596 172.7127084  168.30120641
 169.54162874 165.04009609 159.53822286 160.20845106 152.33577063]


In [40]:
# print the support vector machine predictions for the next 'n' days
svm_predict = svr.predict(x_forecast)
print(svm_predict)

[172.7966825  179.38102383 179.86557962 177.24204771 177.35968465
 177.71958791 178.88664469 183.02024849 184.98876201 182.4032513
 178.18235931 177.37868457 177.14019006 180.50835154 179.65150891
 183.10892509 183.16425172 185.64986289 184.61926217 182.92406316
 183.57919977 183.21026961 185.34289554 172.09802541 169.29023626
 169.81182997 165.64026399 159.95991232 159.755057   152.36216827]
