In [2]:
!pip install quandl

# Quandl is a platform that provides its users with economic, financial and alternative datasets.

Collecting quandl
  Downloading Quandl-3.7.0-py2.py3-none-any.whl (26 kB)
Collecting more-itertools
  Downloading more_itertools-8.13.0-py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 877 kB/s eta 0:00:01
Collecting inflection>=0.3.1
  Downloading inflection-0.5.1-py2.py3-none-any.whl (9.5 kB)
Installing collected packages: more-itertools, inflection, quandl
Successfully installed inflection-0.5.1 more-itertools-8.13.0 quandl-3.7.0


In [45]:
import warnings
warnings.simplefilter('ignore')

In [4]:
import quandl 
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [14]:
# Get stock data
df = quandl.get("WIKI/FB")

In [6]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012-05-18,42.05,45.0,38.0,38.2318,573576400.0,0.0,1.0,42.05,45.0,38.0,38.2318,573576400.0
2012-05-21,36.53,36.66,33.0,34.03,168192700.0,0.0,1.0,36.53,36.66,33.0,34.03,168192700.0
2012-05-22,32.61,33.59,30.94,31.0,101786600.0,0.0,1.0,32.61,33.59,30.94,31.0,101786600.0
2012-05-23,31.37,32.5,31.36,32.0,73600000.0,0.0,1.0,31.37,32.5,31.36,32.0,73600000.0
2012-05-24,32.95,33.21,31.77,33.03,50237200.0,0.0,1.0,32.95,33.21,31.77,33.03,50237200.0


In [10]:
# Get Adjusted Close price
df = df['Adj. Close']
df.head()

Date
2012-05-18    38.2318
2012-05-21    34.0300
2012-05-22    31.0000
2012-05-23    32.0000
2012-05-24    33.0300
Name: Adj. Close, dtype: float64

In [15]:
df = df[["Adj. Close"]]
df.head()

Unnamed: 0_level_0,Adj. Close
Date,Unnamed: 1_level_1
2012-05-18,38.2318
2012-05-21,34.03
2012-05-22,31.0
2012-05-23,32.0
2012-05-24,33.03


In [19]:
# A variable for predicting 'n' days out into the future
forecast_out = 30

# create a new column : the target or dependent variable. shifted 'n' units up

df ['Prediction'] = df[['Adj. Close']].shift(- forecast_out)

# DataFrame.shift(periods=1, freq=None, axis=0,
# Shift index by desired number of periods with an optional time freq.
# periods: positive: use the previous rows, negative: use the following rows

df.head()

Unnamed: 0_level_0,Adj. Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-18,38.2318,30.771
2012-05-21,34.03,31.2
2012-05-22,31.0,31.47
2012-05-23,32.0,31.73
2012-05-24,33.03,32.17


In [20]:
df.tail()

Unnamed: 0_level_0,Adj. Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-03-21,169.39,
2018-03-22,164.89,
2018-03-23,159.39,
2018-03-26,160.06,
2018-03-27,152.19,


In [46]:
# convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'], 1))

# same as X = np.array(df['Adj. Close'])

# remove the last 30 (forecase_out) rows
# X now is a numpy array
X = X[:-forecast_out]

X

array([[ 38.2318],
       [ 34.03  ],
       [ 31.    ],
       ...,
       [171.5499],
       [175.98  ],
       [176.41  ]])

In [36]:
# Create the depdent dataset y
y = np.array(df['Prediction'])
y = y[:-forecast_out]
y

array([ 30.771,  31.2  ,  31.47 , ..., 159.39 , 160.06 , 152.19 ])

In [37]:
# split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state = 18)

In [50]:
# Model : SVR
svr_rbf = SVR(kernel ='rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(X_train,y_train)
ypred = svr_rbf.predict(X_test)

from sklearn.metrics import r2_score
R2_svr = r2_score(ypred,y_test)

In [33]:
R2_svr

0.9816207871192477

In [40]:
# Model: Linear Model

linear = LinearRegression()
linear.fit(X_train,y_train)
ypred = linear.predict(X_test)

R2_linear = r2_score(ypred,y_test)

In [41]:
R2_linear

0.9812538638878047

In [47]:
X_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]

In [48]:
# print the prediction for the next 30 days
lr_prediction = linear.predict(X_forecast)
lr_prediction

array([177.23405068, 183.67404911, 184.11888417, 181.49031338,
       180.12547855, 182.0463572 , 183.13822506, 187.48547675,
       189.14349832, 185.63536731, 182.46086259, 180.05470933,
       180.74218169, 184.56371922, 183.93690619, 187.91009203,
       186.52503742, 189.44679495, 188.97163023, 186.05998259,
       188.39536664, 188.06174034, 189.30525652, 176.63756731,
       172.17910686, 173.43273293, 168.88328349, 163.32284529,
       164.00020776, 156.04372619])

In [52]:
svr_prediction = svr_rbf.predict(X_forecast)
svr_prediction

array([181.89300404, 181.4052896 , 182.11269001, 176.12419312,
       175.22525504, 177.56329031, 180.40932277, 186.49553134,
       179.34441812, 185.75707524, 178.72938215, 175.32218469,
       175.01465056, 182.90227269, 181.82436953, 184.62835498,
       187.80670018, 178.99669929, 179.77192746, 186.96509171,
       182.14791362, 183.86008934, 179.09229616, 180.68551511,
       171.16306229, 172.62614129, 172.27279608, 167.71044499,
       166.16010078, 156.01859293])

In [73]:
import pandas as pd
df_pred = pd.DataFrame(lr_prediction, svr_prediction)
df_pred.head()

Unnamed: 0,0
181.893004,177.234051
181.40529,183.674049
182.11269,184.118884
176.124193,181.490313
175.225255,180.125479


In [78]:
narray = np.transpose( [lr_prediction, svr_prediction])

In [80]:
df_pred = pd.DataFrame(narray,columns=['linear','svr'])
df_pred.head()

Unnamed: 0,linear,svr
0,177.234051,181.893004
1,183.674049,181.40529
2,184.118884,182.11269
3,181.490313,176.124193
4,180.125479,175.225255


In [84]:
ytrue = np.array(df['Adj. Close'])[-forecast_out:]

In [87]:
narray = np.transpose([lr_prediction, svr_prediction, ytrue])
df_pred_true = pd.DataFrame(narray, columns=['linear','svr','true'])
df_pred_true

Unnamed: 0,linear,svr,true
0,177.234051,181.893004,173.15
1,183.674049,181.40529,179.52
2,184.118884,182.11269,179.96
3,181.490313,176.124193,177.36
4,180.125479,175.225255,176.01
5,182.046357,177.56329,177.91
6,183.138225,180.409323,178.99
7,187.485477,186.495531,183.29
8,189.143498,179.344418,184.93
9,185.635367,185.757075,181.46
