In [3]:
pip install quandl

Collecting quandl
  Downloading https://files.pythonhosted.org/packages/07/ab/8cd479fba8a9b197a43a0d55dd534b066fb8e5a0a04b5c0384cbc5d663aa/Quandl-3.5.0-py2.py3-none-any.whl
Collecting inflection>=0.3.1
  Downloading https://files.pythonhosted.org/packages/d5/35/a6eb45b4e2356fe688b21570864d4aa0d0a880ce387defe9c589112077f8/inflection-0.3.1.tar.gz
Building wheels for collected packages: inflection
  Building wheel for inflection (setup.py) ... [?25l[?25hdone
  Created wheel for inflection: filename=inflection-0.3.1-cp36-none-any.whl size=6076 sha256=df8156b328e79c2f9686a81bdcedc5366681c1e770d922fcbff1ada4de126532
  Stored in directory: /root/.cache/pip/wheels/9f/5a/d3/6fc3bf6516d2a3eb7e18f9f28b472110b59325f3f258fe9211
Successfully built inflection
Installing collected packages: inflection, quandl
Successfully installed inflection-0.3.1 quandl-3.5.0


In [0]:
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split


In [5]:
# Get the stock data
df = quandl.get("WIKI/IBM")
# Look at the data 
print(df.head())


             Open   High    Low  ...   Adj. Low  Adj. Close  Adj. Volume
Date                             ...                                    
1962-01-02  578.5  578.5  572.0  ...  15.099257   15.099257     387200.0
1962-01-03  572.0  577.0  572.0  ...  15.099257   15.231243     288000.0
1962-01-04  577.0  577.0  571.0  ...  15.072860   15.079459     256000.0
1962-01-05  570.5  570.5  559.0  ...  14.756092   14.782489     363200.0
1962-01-08  559.5  559.5  545.0  ...  14.386530   14.505318     544000.0

[5 rows x 12 columns]


In [6]:
# Get adj close price
df = df[['Adj. Close']]
#Look at new data
print(df.head())

            Adj. Close
Date                  
1962-01-02   15.099257
1962-01-03   15.231243
1962-01-04   15.079459
1962-01-05   14.782489
1962-01-08   14.505318


In [7]:
# Variable for predicting 'n' days out into the future
forecast_out = 30
# Create another column )the target or dependent variable) shifted 'n' units
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
#print new data set
print(df.tail())


            Adj. Close  Prediction
Date                              
2018-03-21      156.69         NaN
2018-03-22      152.09         NaN
2018-03-23      148.89         NaN
2018-03-26      153.37         NaN
2018-03-27      151.91         NaN


In [8]:
#create independent data set (x)
#Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))
#remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[ 15.09925697]
 [ 15.23124348]
 [ 15.07945899]
 ...
 [147.59      ]
 [149.51      ]
 [151.4       ]]


In [9]:
#create dependent data set (y)
#convert dataframe to numpy array (all of the values including the NaNs)
y = np.array(df['Prediction'])
#get all of the y values except the last "n" rows
y = y[:-forecast_out]
print(y)

[ 14.46572171  14.43932441  14.3865298  ... 148.89       153.37
 151.91      ]


In [0]:
#split data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [20]:
#Testing Mofdel: Score returns the coefficient of determination R^2 of the prediction
#The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9915701337062665


In [19]:
#Create and train the Linear Regression Model
lr = LinearRegression()
#train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [21]:
#Testing Mofdel: Score returns the coefficient of determination R^2 of the prediction
#The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9904834220041365


In [28]:
#Set x_forecast equal to the last 30 rows of the original data set from the Adj.close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[150.75]
 [154.76]
 [156.01]
 [156.18]
 [155.16]
 [153.96]
 [153.18]
 [155.52]
 [158.58]
 [156.55]
 [155.83]
 [153.81]
 [154.49]
 [156.95]
 [155.72]
 [158.32]
 [156.21]
 [159.31]
 [160.26]
 [159.32]
 [158.12]
 [159.61]
 [160.26]
 [157.35]
 [156.2 ]
 [156.69]
 [152.09]
 [148.89]
 [153.37]
 [151.91]]


In [32]:
#print lr model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

##print support vector regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[150.99897499 155.00772618 156.2573369  156.42728395 155.40760161
 154.20797531 153.42821823 155.76748949 158.82653654 156.79716873
 156.07739295 154.05802203 154.73781026 157.19704416 155.96742721
 158.56661751 156.45727461 159.5563092  160.50601334 159.56630608
 158.36667979 159.85621577 160.50601334 157.59691959 156.44727772
 156.93712513 152.33855768 149.13955423 153.61815905 152.15861373]
[148.52414153 155.3489372  157.34925126 157.14598052 156.36146515
 154.22181352 154.01437867 157.12209528 157.08242581 156.26459439
 157.40262539 154.18043183 154.78075459 154.97710311 157.3539952
 155.57883329 157.09535214 162.106615   166.2930036  162.1719459
 154.68604863 163.90538336 166.2930036  153.95437302 157.1126995
 155.82557044 151.13740145 152.49600442 154.11720219 150.4156706 ]


In [45]:
import datetime

a = datetime.datetime.today()
numdays = 30
dateList = []
for x in range (0, numdays):
    dateList.append(a - datetime.timedelta(days = x))
print(dateList)

[datetime.datetime(2020, 1, 7, 2, 16, 57, 816862), datetime.datetime(2020, 1, 6, 2, 16, 57, 816862), datetime.datetime(2020, 1, 5, 2, 16, 57, 816862), datetime.datetime(2020, 1, 4, 2, 16, 57, 816862), datetime.datetime(2020, 1, 3, 2, 16, 57, 816862), datetime.datetime(2020, 1, 2, 2, 16, 57, 816862), datetime.datetime(2020, 1, 1, 2, 16, 57, 816862), datetime.datetime(2019, 12, 31, 2, 16, 57, 816862), datetime.datetime(2019, 12, 30, 2, 16, 57, 816862), datetime.datetime(2019, 12, 29, 2, 16, 57, 816862), datetime.datetime(2019, 12, 28, 2, 16, 57, 816862), datetime.datetime(2019, 12, 27, 2, 16, 57, 816862), datetime.datetime(2019, 12, 26, 2, 16, 57, 816862), datetime.datetime(2019, 12, 25, 2, 16, 57, 816862), datetime.datetime(2019, 12, 24, 2, 16, 57, 816862), datetime.datetime(2019, 12, 23, 2, 16, 57, 816862), datetime.datetime(2019, 12, 22, 2, 16, 57, 816862), datetime.datetime(2019, 12, 21, 2, 16, 57, 816862), datetime.datetime(2019, 12, 20, 2, 16, 57, 816862), datetime.datetime(2019, 1