In [0]:
#install dependencies 

import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split


In [57]:
#import stock data

dataFrame = quandl.get("WIKI/FB")
print(dataFrame)

              Open    High     Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                                ...                                   
2012-05-18   42.05   45.00   38.00  ...     38.00     38.2318  573576400.0
2012-05-21   36.53   36.66   33.00  ...     33.00     34.0300  168192700.0
2012-05-22   32.61   33.59   30.94  ...     30.94     31.0000  101786600.0
2012-05-23   31.37   32.50   31.36  ...     31.36     32.0000   73600000.0
2012-05-24   32.95   33.21   31.77  ...     31.77     33.0300   50237200.0
...            ...     ...     ...  ...       ...         ...          ...
2018-03-21  164.80  173.40  163.30  ...    163.30    169.3900  105350867.0
2018-03-22  166.13  170.27  163.72  ...    163.72    164.8900   73389988.0
2018-03-23  165.44  167.10  159.02  ...    159.02    159.3900   52306891.0
2018-03-26  160.82  161.10  149.02  ...    149.02    160.0600  125438294.0
2018-03-27  156.31  162.85  150.75  ...    150.75    152.1900   76787884.0

[1472 rows x 12 columns]

In [58]:
#isolate Adj. Close from dataFrame

dataFrame = dataFrame[["Adj. Close"]]
print(dataFrame)

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300
...                ...
2018-03-21    169.3900
2018-03-22    164.8900
2018-03-23    159.3900
2018-03-26    160.0600
2018-03-27    152.1900

[1472 rows x 1 columns]


In [59]:
#variable that will allow you to predict"n" days out into the future

predictOut = 30

dataFrame["Prediction"] = dataFrame[["Adj. Close"]].shift(-30)

print (dataFrame)

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170
...                ...         ...
2018-03-21    169.3900         NaN
2018-03-22    164.8900         NaN
2018-03-23    159.3900         NaN
2018-03-26    160.0600         NaN
2018-03-27    152.1900         NaN

[1472 rows x 2 columns]


In [60]:
#independant data set (X)

#convert dataFrame to numpy array
X = np.array(dataFrame.drop(["Prediction"],1))

#remove the last "n" rows with result NaN
X = X[:-predictOut]

print (X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [61]:
#dependent data set (Y)

#convert dataFrame to numpy array
Y = np.array(dataFrame["Prediction"])

#remove the last "n" rows with result NaN
Y = Y[:-predictOut]

print(Y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [0]:
#split data into 80% train and 20% test

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [63]:
#create and train support vector machine

svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)


SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [64]:
#testing model

svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)


svm confidence:  0.9759257401011224


In [65]:
#create and train the linear regression  

lr = LinearRegression()
lr.fit(x_train, y_train)


LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [66]:
#testing model

lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9827541838402234


In [67]:
#x_forecast equal to the last "n" rows of the original data set from Adj. Close 

x_forecast = np.array(dataFrame.drop(['Prediction'],1))[-predictOut:]
print(x_forecast)


[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [69]:
# linear regression model predictions for the next 'n' days

lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

#support vector regressor model predictions for the next 'n' days

svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)


[177.15982005 183.60314593 184.04821082 181.41828189 180.05274187
 181.97461301 183.06704503 187.41654288 189.07542112 185.56547751
 182.38933257 179.98193609 180.66976366 184.49327572 183.86613882
 187.84137755 186.45560731 189.37887446 188.90346423 185.99031219
 188.32690289 187.99310422 189.23726291 176.56302848 172.10226441
 173.35653821 168.80473814 163.24142694 163.91913939 155.95854682]
[174.52008447 181.35282921 180.97231548 175.45582666 175.22418311
 177.35875369 181.02887439 179.23567548 178.35264365 180.66197192
 179.04122976 175.30632572 174.65791822 180.47613032 181.16986601
 178.14723051 181.08847284 179.22917855 177.95768265 181.04508026
 177.47620419 177.84730497 178.79857093 175.3526897  171.02102784
 171.90651012 172.22167232 167.73620559 166.62296973 161.13582469]
