In [None]:
pip install quandl




In [None]:
#This is a stock prediction program by using ML models
#Install the dependencies
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split


In [None]:
#Get the stock data
df = quandl.get("WIKI/FB")
#Look at the data
print(df.head())

             Open   High    Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                             ...                                   
2012-05-18  42.05  45.00  38.00  ...     38.00     38.2318  573576400.0
2012-05-21  36.53  36.66  33.00  ...     33.00     34.0300  168192700.0
2012-05-22  32.61  33.59  30.94  ...     30.94     31.0000  101786600.0
2012-05-23  31.37  32.50  31.36  ...     31.36     32.0000   73600000.0
2012-05-24  32.95  33.21  31.77  ...     31.77     33.0300   50237200.0

[5 rows x 12 columns]


In [None]:
#Get the adjusted close price 
df= df[['Adj. Close']]
#take a look at the new data
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [None]:
# A variable for predicting 'n' days out into the future. 
forecast_out= 30
#Create another column(the target or dependent variable), it is going to be shifted 'n' units up
df['Prediction']= df[['Adj. Close']].shift(-forecast_out)
#print out the new data set
print(df.tail())
#df.head and tail indicate the start and the end of the data set respectively.

            Adj. Close  Prediction
Date                              
2018-03-21      169.39         NaN
2018-03-22      164.89         NaN
2018-03-23      159.39         NaN
2018-03-26      160.06         NaN
2018-03-27      152.19         NaN


In [None]:
### Create the independent data set (X) #####
#convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))
#Remove the last 'n' rows
X = X[:-forecast_out]
print(X)
#this is the a dataset which contains list of lists

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [None]:
### Create the dependent data set (y) ###
# Convert the dataframe to a numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])
#Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
print(y)
#this is another data set which contains seperate list.

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [None]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

In [None]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf' , C=1e3, gamma=0.1)
svr_rbf.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [None]:
#Check how good the model works
# Testing Model: Score returns the coefficient of determination R^2 of the prediction.
# The best possible score is 1.0
svm_cinfidence = svr_rbf.score(x_test,y_test)
print("svm confidence", svm_cinfidence)
#We get a nice confiedence score which is 0.98 which is really close to 1. This is good.

svm confidence 0.9818560855507545


In [None]:
#Create and train a linear regression model
lr = LinearRegression()
#Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [None]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction.
# The best possible score is 1.0
#Test the LinearRegression model
lr_cinfidence = lr.score(x_test,y_test)
print("lr confidence", lr_cinfidence)
#lr confidence is 0.982175. So our support vector regressor was better than LinearRegression model.

lr confidence 0.981871596882896


In [None]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column 
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [None]:
#Print LinearRegression model the predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

#Print support vector regressor model the predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)
#Frist we print the LinerRegression model of what the price will be for next 30days. After that it will print the adjusted close price for the next 30 days from our support vector regressor.

[176.80574015 183.22371318 183.66702686 181.04744603 179.68727906
 181.60158813 182.68972171 187.02210539 188.67445638 185.1783235
 182.01467588 179.61675189 180.30187303 184.11034054 183.48567127
 187.44526845 186.06495086 188.9767157  188.50317609 185.60148656
 187.92888337 187.59639811 188.83566135 176.2112968  171.7680847
 173.01742325 168.48353335 162.94211236 163.61715819 155.68788852]
[176.9974223  178.95130718 178.27605434 178.49005158 176.10281325
 179.09361673 179.57149488 185.69742314 179.63430838 181.15738665
 179.48582807 175.89236039 177.52621025 178.00085221 178.53619007
 183.99241685 185.56841035 179.57355549 179.87186691 183.45659859
 181.72014888 183.272743   179.54832287 178.99233894 172.65848136
 172.76757291 172.23306442 167.57711435 167.04391669 157.62825306]
