In [1]:
#Install the dependencies
import quandl
import datetime
import pandas as pd
import yfinance as yf
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
#df = yf.download("CL=F", period="max")

dateStart = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d').date()
dateEnd = datetime.datetime.strptime('2018-07-11', '%Y-%m-%d').date()

dateEnd = dateEnd + datetime.timedelta(days=300)

df2 = yf.download('CL=F', 
                start=dateStart.strftime('%Y-%m-%d'), 
                end=dateEnd.strftime('%Y-%m-%d'), 
                progress=False)
df2['Change'] = 100* ((df2['Adj Close'] / df2['Open']) - 1)
df = df2[['Change']]

In [3]:
# Get the Adjusted Close Price 
df = df[['Change']] 
# Take a look at the new data 
print(df.head())

              Change
Date                
2017-01-03 -3.450182
2017-01-04  1.466940
2017-01-05  0.693012
2017-01-06  0.483905
2017-01-09 -3.330234


In [4]:
# A variable for predicting 'n' days out into the future
forecast_out = int(len(df) * 0.2) 
#Create another column (the target ) shifted 'n' units up
df['Prediction'] = df[['Change']].shift(-forecast_out)
#print the new data set
print(df)

              Change  Prediction
Date                            
2017-01-03 -3.450182   -1.995015
2017-01-04  1.466940   -1.959434
2017-01-05  0.693012    0.446539
2017-01-06  0.483905    0.490652
2017-01-09 -3.330234    0.509734
...              ...         ...
2019-04-30  0.519028         NaN
2019-05-01  0.315453         NaN
2019-05-02 -2.829743         NaN
2019-05-03  0.633630         NaN
2019-05-06  1.334852         NaN

[583 rows x 2 columns]


In [5]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
print(df.tail())

X = np.array(df.drop(['Prediction'],1))

#Remove the last '30' rows
X = X[:-forecast_out]
print(X)

### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array 
y = np.array(df['Prediction'])
# Get all of the y values except the last '30' rows
y = y[:-forecast_out]



              Change  Prediction
Date                            
2019-04-30  0.519028         NaN
2019-05-01  0.315453         NaN
2019-05-02 -2.829743         NaN
2019-05-03  0.633630         NaN
2019-05-06  1.334852         NaN
[[-3.45018248]
 [ 1.46693964]
 [ 0.69301168]
 [ 0.48390497]
 [-3.33023426]
 [-1.94868242]
 [ 2.834085  ]
 [ 1.22207257]
 [-1.28181021]
 [-0.13320589]
 [-2.74181   ]
 [-0.03891897]
 [ 1.88532049]
 [-1.08757137]
 [ 0.6053721 ]
 [-0.37771626]
 [ 1.54833782]
 [-1.06066895]
 [-0.97836396]
 [ 0.39924507]
 [ 2.12282559]
 [-0.05599921]
 [ 0.27943652]
 [-1.48671814]
 [-1.82536927]
 [ 1.35553984]
 [ 1.20298087]
 [ 1.3358402 ]
 [-1.61709841]
 [ 0.54810228]
 [ 0.20754832]
 [ 0.66025712]
 [-0.13091396]
 [ 1.08452101]
 [-1.23479209]
 [ 1.00167115]
 [-0.62580552]
 [ 0.05553273]
 [-0.05551923]
 [-0.22242619]
 [-2.01154413]
 [ 1.42639782]
 [ 0.01880454]
 [-0.0564205 ]
 [-4.75469235]
 [-1.87176905]
 [-2.25760717]
 [-0.1031976 ]
 [-1.50670698]
 [ 0.20509084]
 [-0.53050057]
 [-0

In [6]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.01)

In [17]:
# Create and train the Support Vector Machine (Regressor) 
svr_rbf = SVR(kernel='rbf', C=1e6, gamma=0.999) 
svr_rbf.fit(x_train, y_train)

SVR(C=1000000.0, gamma=0.999)

In [18]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  -0.028274714030090298


In [19]:
# Set x_forecast equal to the last 30 rows of the original
x_forecast = np.array(df.drop(['Prediction'],1))[-int(len(df) * 0.2):]
print(len(x_forecast))

116


In [10]:
# Print support vector regressor model predictions for the next '30' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[-8.46675814e+00  1.16175039e-01  6.39129209e-03  1.90714808e-01
  3.22235000e-01 -8.15805945e+00  2.52871066e-01 -8.25769243e-02
  3.00491477e-01  9.47865366e-02  1.48787293e-01  3.99760290e-01
  1.55109829e-01  3.89573412e-01 -4.04162524e-01  3.30643773e-01
  4.10554639e-01  5.49361229e-01  1.62822248e-01  8.57308301e-01
  1.72578618e-01 -4.26972523e-01 -2.41368221e+01  8.61880092e-01
  2.59603998e-01  1.77833914e-01 -2.17166696e+00 -1.43243383e+00
  1.93196815e-01  2.95847536e-01  3.74678662e-01  8.54668006e-02
  1.38835972e-01  2.58734079e-01 -8.19722658e-03 -7.58858788e-01
  6.46267322e-03  2.21261722e-01 -2.73316980e-02  7.96431427e-01
  9.77644371e-02  2.82745910e-01  5.16240924e-01  3.31724610e-01
  3.85686986e-01  6.21774120e-01  1.92019394e-01 -8.64594788e-02
  1.79156208e-01  3.43393514e-03  4.02344011e-01  2.37154424e-01
  1.96970349e-01  2.94851939e-01  1.82687291e-01 -1.36547573e-01
  3.90069636e-01  3.04980786e-01  5.40309904e-01  2.77836674e-01
  2.73806704e-02 -8.18206

In [11]:
boo = df.index[-int(len(df) * 0.2):]

print (len(svm_prediction))

print(len(boo))

svm_prediction = (svm_prediction) -15

116
116


In [12]:
import matplotlib.pyplot as plt
plt.plot(boo,svm_prediction, label = "SVM Predictions")
plt.plot(boo,df["Change"][-int(len(df) * 0.2):], label = "Real Prices")
plt.legend()
plt.show()



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


<Figure size 640x480 with 1 Axes>

In [13]:
g= open("pred-svm.txt","w")

real = list(svm_prediction)

for i in range(0,len(svm_prediction)):
    g.write(str(real[i]) +",")

g.close()