In [1]:
# This program predicts stonk prices by using Machine Learning models

# Install the dependencies

import numpy as np
import pandas_datareader as web
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Get the stonk data
stock = 'SPY'
df = web.DataReader(stock, data_source='yahoo', start='2016-11-11')

# Get the Adjusted Close Price
df = df[['Adj Close']]

# A variable for predicting 'n' days out into the future
forecast_out = 10
# Create another column (the target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Adj Close']].shift(-forecast_out)

# Create the independent data set (x)
# Convert the DataFrame to a numpy array
X = np.array(df.drop(['Prediction'], 1))
# Remove the last 'n' rows
X = X[:-forecast_out]

# Create the dependent data set (y) 
# Convert the DataFrame to numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]

# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

# Testing Model: Score returns the coefficient of determination of R^2 of the prediction.
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print(svm_confidence)

# Create and train the Linear Regression Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

# Testing Model: Score returns the coefficient of determination of R^2 of the prediction.
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print(lr_confidence)

# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]
x_forecast

# Print the predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print Support Vector Regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

0.9257683782701092
0.9387500379683038
[328.27275223 320.98306904 321.80148193 326.7786695  331.97472009
 330.24269354 332.64089308 334.68695435 331.64163359 337.27546827]
[311.32378401 327.10656771 328.44438279 332.28874554 336.96301879
 300.75454411 344.86293865 342.44480901 330.78860271 326.90272807]
