In [16]:
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [18]:
df = quandl.get("WIKI/FB")
print(df.head())

             Open   High    Low    Close       Volume  Ex-Dividend  \
Date                                                                 
2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

            Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  \
Date                                                                  
2012-05-18          1.0      42.05      45.00     38.00     38.2318   
2012-05-21          1.0      36.53      36.66     33.00     34.0300   
2012-05-22          1.0      32.61      33.59     30.94     31.0000   
2012-05-23          1.0      31.37      32.50     31.36     32.0000   
2012-05-24          1.0      32.95      33.21     31.77     33.0300   

           

In [20]:
# get the adjusted close price
df = df[['Adj. Close']]
df.head()

Unnamed: 0_level_0,Adj. Close
Date,Unnamed: 1_level_1
2012-05-18,38.2318
2012-05-21,34.03
2012-05-22,31.0
2012-05-23,32.0
2012-05-24,33.03


In [31]:
# A variable for predicting 'n' days out into future
forecast_out = 30
# now we are creating n value as 1
# create another column (the target or dependent variable) shifted 'n' units up
df['prediction'] = df[['Adj. Close']].shift(-forecast_out)
# print the new data set
print(df.head())

            Adj. Close  prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170


In [34]:
# create the independent data set(x)
X = np.array(df.drop(['prediction'],1))
#  remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [37]:
# create the dependent data set (y)
# convert the dataframe to a numpy array(All the data including NaN's)
y = np.array(df['prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
print(y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [40]:
# split the data into 80% training and 20% test 
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)


In [42]:
# create and train the svm
svr_rdf = SVR(kernel = 'rbf', C=1e3 , gamma=0.1)
svr_rdf.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [44]:
# Testing model: Score returns the coefficient of determination R^2 of the prediction
# the best possible score is 1.0
svm_confidence = svr_rdf.score(x_test,y_test)
print("svm confidence",svm_confidence )


svm confidence 0.9838197734466927


In [50]:
y_pred = svr_rdf.predict(x_test)
print(y_pred)

[ 76.68362892 118.95325339 124.66041817 163.69266774  94.76682685
 151.49497906  62.96498226 185.45593083 128.59419058 172.43641991
 175.15819257  70.98293377 169.48135576  72.70103411  72.56757858
 172.49022047 100.36624856 105.7217922  154.86643297  26.64036424
  77.46252401  77.93253572  48.03958686 126.11870497 150.63859119
 165.2569635  105.12180861  23.03201511  74.91168848 126.96943705
  82.67529002  28.22880705 105.5276147   26.90374645 128.85249837
  60.62085185  27.06005558  74.79538862  98.11090996 122.66862237
 107.64626307 104.27066651  51.75661066 140.78247254  79.41534026
  79.35024567  77.54845771 172.35447015  80.20719329 149.44647646
  32.81859363  79.41534026  51.84356024  27.09920494  26.30658568
  64.43848413 153.04411754 102.03970408 177.45994305  77.7024796
  52.01696721 123.25128419 126.23583407 103.6865482   78.01692757
 117.42977695 121.33300735  77.63663514  68.63280372  20.4173541
  27.6454956   93.58073971  27.11820988  52.31011237  77.51387913
 138.9907289

In [52]:
# Create and train the linear regression model
lr = LinearRegression()
# Train the model
lr.fit(x_train,y_train)


LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [61]:
# Testing model: Score returns the coefficient of determination R^2 of the prediction
# the best possible score is 1.0
lr_confidence = lr.score(x_test,y_test)
print("lr confidence",lr_confidence )

lr confidence 0.9791647640980308


In [57]:
# set x_forecast equal to the last 30 rows of the original data set from adj.close column
x_forecast = np.array(df.drop(['prediction'],1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [60]:
# print the linear regression predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# print the support vector regression predictions for the next 'n' days
svm_prediction = svr_rdf.predict(x_forecast)
print(svm_prediction)
 



[177.13007839 183.56246553 184.00677485 181.38131071 180.01808894
 181.93669735 183.02727477 187.36938854 189.02545053 185.5214657
 182.35071285 179.94740337 180.63406322 184.45108416 183.82501195
 187.79350197 186.41008433 189.3283887  188.85378557 185.94557914
 188.27820305 187.94497106 189.18701756 176.53429999 172.08110889
 173.33325333 168.78918078 163.23531433 163.91187624 155.96479825]
[174.4963044  181.28188865 180.54153196 175.8947047  175.18436983
 177.42402924 181.04886441 184.3408653  179.63632236 179.73126154
 178.99387556 175.11935286 175.34816555 179.5207817  180.91724002
 183.3018737  183.10297759 179.5647374  179.87671698 181.24393391
 181.54277907 182.77879656 179.54223794 177.5921479  171.68995734
 171.72442733 172.48931688 167.56541446 166.2609655  159.64881243]
