In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

In [2]:
## read the dataset
df = pd.read_csv('bitcoin.csv')

In [3]:
df.head()

Unnamed: 0,Date,Price
0,5/23/2019,7881.84668
1,5/24/2019,7987.371582
2,5/25/2019,8052.543945
3,5/26/2019,8673.21582
4,5/27/2019,8805.77832


In [11]:
## remove date column
df.drop(['Date'], axis=1, inplace=True)

In [12]:
df.head()

Unnamed: 0,Price
0,7881.84668
1,7987.371582
2,8052.543945
3,8673.21582
4,8805.77832


In [13]:
predictionDays = 30
# Create another column shifted 'n'  units up
df['Prediction'] = df[['Price']].shift(-predictionDays)
# show the first 5 rows
df.head()

Unnamed: 0,Price,Prediction
0,7881.84668,10701.69141
1,7987.371582,10855.37109
2,8052.543945,11011.10254
3,8673.21582,11790.91699
4,8805.77832,13016.23145


In [14]:
df.tail()

Unnamed: 0,Price,Prediction
362,9729.038086,
363,9522.981445,
364,9081.761719,
365,9182.577148,
366,9180.045898,


In [16]:
# Create the independent dada set
# Here we will convert the data frame into a numpy array and drp the prediction column
x = np.array(df.drop(['Prediction'],axis=1))
# Remove the last 'n' rows where 'n' is the predictionDays
x = x[:len(df)-predictionDays]
print(x)

[[ 7881.84668 ]
 [ 7987.371582]
 [ 8052.543945]
 [ 8673.21582 ]
 [ 8805.77832 ]
 [ 8719.961914]
 [ 8659.487305]
 [ 8319.472656]
 [ 8574.501953]
 [ 8564.016602]
 [ 8742.958008]
 [ 8208.995117]
 [ 7707.770996]
 [ 7824.231445]
 [ 7822.023438]
 [ 8043.951172]
 [ 7954.12793 ]
 [ 7688.077148]
 [ 8000.32959 ]
 [ 7927.714355]
 [ 8145.857422]
 [ 8230.923828]
 [ 8693.833008]
 [ 8838.375   ]
 [ 8994.488281]
 [ 9320.352539]
 [ 9081.762695]
 [ 9273.521484]
 [ 9527.160156]
 [10144.55664 ]
 [10701.69141 ]
 [10855.37109 ]
 [11011.10254 ]
 [11790.91699 ]
 [13016.23145 ]
 [11182.80664 ]
 [12407.33203 ]
 [11959.37109 ]
 [10817.15527 ]
 [10583.13477 ]
 [10801.67773 ]
 [11961.26953 ]
 [11215.4375  ]
 [10978.45996 ]
 [11208.55078 ]
 [11450.84668 ]
 [12285.95801 ]
 [12573.8125  ]
 [12156.5127  ]
 [11358.66211 ]
 [11815.98633 ]
 [11392.37891 ]
 [10256.05859 ]
 [10895.08984 ]
 [ 9477.641602]
 [ 9693.802734]
 [10666.48242 ]
 [10530.73242 ]
 [10767.13965 ]
 [10599.10547 ]
 [10343.10645 ]
 [ 9900.767578]
 [ 9811.

In [17]:
# Create the dependent data set
# convert the data frame into a numpy array
y = np.array(df['Prediction'])
# Get all the values except last 'n' rows
y = y[:-predictionDays]
print(y)

[10701.69141  10855.37109  11011.10254  11790.91699  13016.23145
 11182.80664  12407.33203  11959.37109  10817.15527  10583.13477
 10801.67773  11961.26953  11215.4375   10978.45996  11208.55078
 11450.84668  12285.95801  12573.8125   12156.5127   11358.66211
 11815.98633  11392.37891  10256.05859  10895.08984   9477.641602
  9693.802734 10666.48242  10530.73242  10767.13965  10599.10547
 10343.10645   9900.767578  9811.925781  9911.841797  9870.303711
  9477.677734  9552.860352  9519.145508  9607.423828 10085.62793
 10399.66895  10518.17481  10821.72656  10970.18457  11805.65332
 11478.16895  11941.96875  11966.40723  11862.93652  11354.02441
 11523.5791   11382.61621  10895.83008  10051.7041   10311.5459
 10374.33887  10231.74414  10345.81055  10916.05371  10763.23242
 10138.04981  10131.05566  10407.96484  10159.96094  10138.51758
 10370.82031  10185.5       9754.422852  9510.200195  9598.173828
  9630.664063  9757.970703 10346.76074  10623.54004  10594.49316
 10575.5332   10353.302

In [19]:
## train test split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size = 0.2)

# set the predictionDays array equal to last 30 rows from the original data set
predictionDays_array = np.array(df.drop(['Prediction'],axis=1))[-predictionDays:]
print(predictionDays_array)

[[7550.900879]
 [7569.936035]
 [7679.867188]
 [7795.601074]
 [7807.058594]
 [8801.038086]
 [8658.553711]
 [8864.766602]
 [8988.59668 ]
 [8897.46875 ]
 [8912.654297]
 [9003.070313]
 [9268.761719]
 [9951.518555]
 [9842.666016]
 [9593.896484]
 [8756.430664]
 [8601.795898]
 [8804.477539]
 [9269.987305]
 [9733.72168 ]
 [9328.197266]
 [9377.013672]
 [9670.739258]
 [9726.575195]
 [9729.038086]
 [9522.981445]
 [9081.761719]
 [9182.577148]
 [9180.045898]]


In [20]:
## SVR
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
svr_rbf.fit(xtrain, ytrain)

In [21]:
## testing
svr_rbf_confidence = svr_rbf.score(xtest,ytest)
print('SVR_RBF accuracy :',svr_rbf_confidence)

SVR_RBF accuracy : 0.06714839010117024


In [22]:
# print the predicted values
svm_prediction = svr_rbf.predict(xtest)
print(svm_prediction)
print()
print(ytest)

[ 7947.23326155  8478.86951313  7224.73542047  9109.70165028
  9457.16325245  8811.10927342  8429.32693939  6866.70046899
  8087.54133982  9302.9525339   9910.99650161  9787.45048323
  9842.16266651  9404.45236844  8803.54666917  8388.14522018
  8888.24846488  9669.11553519  8443.7823064   8841.6485231
  9225.09462863 10843.87443756  9412.71891944 10411.87323615
  9062.90449111  8875.12449717  7565.98912703  9356.21796592
  8057.59590952 10053.4150461   8637.535033    9908.19455123
  8991.10652324  9058.01090541  8245.32462016  9268.5617241
  9414.60032875  9096.18817503  9372.01339926  8425.66023119
 10497.80934224  9622.34242472  8408.72128428  8905.53067299
  8560.51409141  8033.87706841 10256.58751998  8463.37537927
  8041.08006087  9622.66804405  9169.84080304  8928.61752527
  8233.67167812  9908.01536554  7667.88109585  9456.48083526
  8113.21837389  8126.22880442  8618.2776009   8417.22107458
  9332.88970491  9339.63078889  8093.74256888  7189.78581256
  7978.82668318  9738.1983

In [23]:
# Print the model predictions for the next 30 days
svm_prediction = svr_rbf.predict(predictionDays_array)
print(svm_prediction)
print()
#Print the actual price for bitcoin for last 30 days
print(df.tail(predictionDays))

[8053.62792945 8125.39868321 8795.62675209 9547.46656086 9603.24848465
 9314.18824648 9334.83498632 9063.46386299 8340.11178484 8891.10862262
 8804.11355288 8252.29294455 7558.20941019 8438.39415427 8186.18529286
 8106.05241296 9403.15744095 9176.9819119  9304.1573534  7560.15854771
 8058.72033195 7688.36357089 7824.72838912 8066.31757944 8057.03985392
 8057.5328121  8103.01540517 7829.63827519 7534.87382892 7537.98450928]

           Price  Prediction
337  7550.900879         NaN
338  7569.936035         NaN
339  7679.867188         NaN
340  7795.601074         NaN
341  7807.058594         NaN
342  8801.038086         NaN
343  8658.553711         NaN
344  8864.766602         NaN
345  8988.596680         NaN
346  8897.468750         NaN
347  8912.654297         NaN
348  9003.070313         NaN
349  9268.761719         NaN
350  9951.518555         NaN
351  9842.666016         NaN
352  9593.896484         NaN
353  8756.430664         NaN
354  8601.795898         NaN
355  8804.477539     