In [28]:
#This program predicts stock prices by using machine learning models

#import packages
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression #machine learning package
from sklearn.svm import SVR #machine learning package
from sklearn.model_selection import train_test_split #ml package to model data


In [29]:
# First Step: Get Stock Data
df = quandl.get('WIKI/AAPL')

#print data
print(df.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1980-12-12  28.75  28.87  28.75  28.75  2093900.0          0.0          1.0   
1980-12-15  27.38  27.38  27.25  27.25   785200.0          0.0          1.0   
1980-12-16  25.37  25.37  25.25  25.25   472000.0          0.0          1.0   
1980-12-17  25.87  26.00  25.87  25.87   385900.0          0.0          1.0   
1980-12-18  26.63  26.75  26.63  26.63   327900.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1980-12-12   0.422706   0.424470  0.422706    0.422706  117258400.0  
1980-12-15   0.402563   0.402563  0.400652    0.400652   43971200.0  
1980-12-16   0.373010   0.373010  0.371246    0.371246   26432000.0  
1980-12-17   0.380362   0.382273  0.380362    0.380362   21610400.0  
1980-12-18   0.391536   0.

In [30]:
#Print out Adjusted Close Column
df = df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
1980-12-12    0.422706
1980-12-15    0.400652
1980-12-16    0.371246
1980-12-17    0.380362
1980-12-18    0.391536


In [31]:
#This variable is for predicting n days of stock data
forecastOutput = 30 #30 day forecast

#Create another column that is the target or dependent variable and shifted up n units
df['Forecast'] = df[['Adj. Close']].shift(-forecastOutput) #-1 tells code to shift up 1 day from Adj. Close column
print(df.tail())

            Adj. Close  Forecast
Date                            
2018-03-21     171.270       NaN
2018-03-22     168.845       NaN
2018-03-23     164.940       NaN
2018-03-26     172.770       NaN
2018-03-27     168.340       NaN


In [32]:
#Create the independent data set x
#Covert data frame to numpy array
x = np.array(df.drop(['Forecast'], 1))

#remove the last n rows
x = x[:-forecastOutput] #every column will show all data except last n rows
print(x)

[[  0.42270592]
 [  0.40065169]
 [  0.37124607]
 ...
 [155.32      ]
 [155.97      ]
 [162.71      ]]


In [33]:
#Create the dependent data set y
#Convert the data frame to numpy array 
#Give all the values including NaN

y = np.array(df['Forecast'])
#Get all the y values except the last n rows
y = y[:-forecastOutput]
print(y)

[  0.47049006   0.45578725   0.43917307 ... 164.94       172.77
 168.34      ]


In [34]:
#Split data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [35]:
#Create and train our model using Support Vector Regressor
svr_rbf = SVR(kernel = 'rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [36]:
#Testing Model using score that returns coefficient of determination of r squared of forecast
svm_confidence = svr_rbf.score(x_test, y_test)
print('SVM Confidence = ', svm_confidence)

SVM Confidence =  0.9918808560930717


In [37]:
#Create and train linear regression model 
regression = LinearRegression()
#Train model
regression.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [38]:
#Test Linear Regression Model
regression_confidence = regression.score(x_test, y_test)
print('Linear Regression Confidence = ', regression_confidence)

Linear Regression Confidence =  0.9920299750423733


In [39]:
#Set a variable equal to last 30 rows of original data set of Adj. Close column
x_forecast = np.array(df.drop(['Forecast'],1))[-forecastOutput:] #output the last 30 rows of Adj. Close column
print(x_forecast)


[[164.34 ]
 [167.37 ]
 [172.99 ]
 [172.43 ]
 [171.85 ]
 [171.07 ]
 [172.6  ]
 [175.555]
 [178.97 ]
 [178.39 ]
 [178.12 ]
 [175.   ]
 [176.21 ]
 [176.82 ]
 [176.67 ]
 [175.03 ]
 [176.94 ]
 [179.98 ]
 [181.72 ]
 [179.97 ]
 [178.44 ]
 [178.65 ]
 [178.02 ]
 [175.3  ]
 [175.24 ]
 [171.27 ]
 [168.845]
 [164.94 ]
 [172.77 ]
 [168.34 ]]


In [40]:
#Print predictions for next n days 
svr_predict = svr_rbf.predict(x_forecast)
print(svr_predict)

regression_predict = regression.predict(x_forecast)
print(regression_predict)

[160.3537759  175.84050467 172.31586527 171.9767077  171.5064516
 171.3241875  172.10430229 174.19774122 175.48721883 177.02928709
 177.30739201 173.15684279 175.61812584 176.68377668 176.46095773
 173.20319735 176.84150329 167.64111078 136.72834486 167.75960961
 176.952798   176.52783778 177.36116259 173.67700425 173.56363409
 171.28395909 175.36429178 162.13189412 172.21234072 176.29231188]
[167.90542854 170.99879667 176.73633095 176.16461935 175.57248947
 174.77617689 176.33817465 179.35497426 182.84139412 182.24926424
 181.97361758 178.78836723 180.02367265 180.64642993 180.4932929
 178.81899464 180.76893956 183.87251682 185.64890644 183.86230769
 182.30030992 182.51470177 181.87152622 179.0946413  179.03338649
 174.9803596  172.50464419 168.51797668 176.51172996 171.98908283]
