In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

plt.rcParams['figure.figsize'] = [10, 7]
%config InlineBackend.figure_format = 'retina'

In [2]:
df = pd.read_csv('../datasets/BTC-USD.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-12-09,7561.79541,7618.091797,7365.985352,7400.899414,7400.899414,17872021272
1,2019-12-10,7397.134277,7424.022949,7246.043945,7278.119629,7278.119629,18249031194
2,2019-12-11,7277.197754,7324.15625,7195.527344,7217.427246,7217.427246,16350490689
3,2019-12-12,7216.73877,7266.639648,7164.741211,7243.134277,7243.134277,18927080224
4,2019-12-13,7244.662109,7293.560547,7227.122559,7269.68457,7269.68457,17125736940


In [3]:
df_subset = df[['Date', 'Close']]

In [4]:
df_subset.rename(columns = {'Close':'Price',}, inplace = True)

In [5]:
df_subset.head()

Unnamed: 0,Date,Price
0,2019-12-09,7400.899414
1,2019-12-10,7278.119629
2,2019-12-11,7217.427246
3,2019-12-12,7243.134277
4,2019-12-13,7269.68457


In [6]:
prediction_days = 30

In [7]:
df_subset['Prediction'] = df_subset[['Price']].shift(-prediction_days)

In [8]:
df_subset.head()

Unnamed: 0,Date,Price,Prediction
0,2019-12-09,7400.899414,8079.862793
1,2019-12-10,7278.119629,7879.071289
2,2019-12-11,7217.427246,8166.554199
3,2019-12-12,7243.134277,8037.537598
4,2019-12-13,7269.68457,8192.494141


In [9]:
df_subset.tail()

Unnamed: 0,Date,Price,Prediction
362,2020-12-05,19154.230469,
363,2020-12-06,19345.121094,
364,2020-12-07,19191.630859,
365,2020-12-08,18321.144531,
366,2020-12-09,18200.414063,


In [10]:
x = np.array(df_subset.drop(['Date', 'Prediction'], 1))

In [11]:
x = x[:len(df_subset) - prediction_days]

In [12]:
print(x)

[[ 7400.899414]
 [ 7278.119629]
 [ 7217.427246]
 [ 7243.134277]
 [ 7269.68457 ]
 [ 7124.673828]
 [ 7152.301758]
 [ 6932.480469]
 [ 6640.515137]
 [ 7276.802734]
 [ 7202.844238]
 [ 7218.816406]
 [ 7191.158691]
 [ 7511.588867]
 [ 7355.628418]
 [ 7322.532227]
 [ 7275.155762]
 [ 7238.966797]
 [ 7290.088379]
 [ 7317.990234]
 [ 7422.652832]
 [ 7292.995117]
 [ 7193.599121]
 [ 7200.174316]
 [ 6985.470215]
 [ 7344.884277]
 [ 7410.656738]
 [ 7411.317383]
 [ 7769.219238]
 [ 8163.692383]
 [ 8079.862793]
 [ 7879.071289]
 [ 8166.554199]
 [ 8037.537598]
 [ 8192.494141]
 [ 8144.194336]
 [ 8827.764648]
 [ 8807.010742]
 [ 8723.786133]
 [ 8929.038086]
 [ 8942.808594]
 [ 8706.245117]
 [ 8657.642578]
 [ 8745.894531]
 [ 8680.875977]
 [ 8406.515625]
 [ 8445.43457 ]
 [ 8367.847656]
 [ 8596.830078]
 [ 8909.819336]
 [ 9358.589844]
 [ 9316.629883]
 [ 9508.993164]
 [ 9350.529297]
 [ 9392.875   ]
 [ 9344.365234]
 [ 9293.521484]
 [ 9180.962891]
 [ 9613.423828]
 [ 9729.801758]
 [ 9795.943359]
 [ 9865.119141]
 [10116.

In [13]:
y = np.array(df_subset['Prediction'])

In [14]:
y = y[:-prediction_days]

In [15]:
print(y)

[ 8079.862793  7879.071289  8166.554199  8037.537598  8192.494141
  8144.194336  8827.764648  8807.010742  8723.786133  8929.038086
  8942.808594  8706.245117  8657.642578  8745.894531  8680.875977
  8406.515625  8445.43457   8367.847656  8596.830078  8909.819336
  9358.589844  9316.629883  9508.993164  9350.529297  9392.875
  9344.365234  9293.521484  9180.962891  9613.423828  9729.801758
  9795.943359  9865.119141 10116.673828  9856.611328 10208.236328
 10326.054688 10214.379883 10312.116211  9889.424805  9934.433594
  9690.142578 10141.996094  9633.386719  9608.475586  9686.441406
  9663.181641  9924.515625  9650.174805  9341.705078  8820.522461
  8784.494141  8672.455078  8599.508789  8562.454102  8869.669922
  8787.786133  8755.246094  9078.762695  9122.545898  8909.954102
  8108.116211  7923.644531  7909.729492  7911.430176  4970.788086
  5563.707031  5200.366211  5392.314941  5014.47998   5225.629395
  5238.438477  6191.192871  6198.77832   6185.066406  5830.254883
  6416.314941

In [16]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size = 0.2)

In [27]:
prediction_days_array = np.array(df_subset.drop(['Date','Prediction'],1))[-prediction_days:]

In [28]:
print(prediction_days_array)

[[15290.902344]
 [15701.339844]
 [16276.34375 ]
 [16317.808594]
 [16068.138672]
 [15955.587891]
 [16716.111328]
 [17645.40625 ]
 [17804.005859]
 [17817.089844]
 [18621.314453]
 [18642.232422]
 [18370.001953]
 [18364.121094]
 [19107.464844]
 [18732.121094]
 [17150.623047]
 [17108.402344]
 [17717.414063]
 [18177.484375]
 [19625.835938]
 [18802.998047]
 [19201.091797]
 [19445.398438]
 [18699.765625]
 [19154.230469]
 [19345.121094]
 [19191.630859]
 [18321.144531]
 [18200.414063]]


In [29]:
# Create and Train the Support Vector Machine (Regression) using radial basis function
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)

In [30]:
svr_rbf.fit(xtrain, ytrain)

SVR(C=1000.0, gamma=1e-05)

In [31]:
score = svr_rbf.score(xtest,ytest)

In [32]:
print('SVR_RBF accuracy :', score)

SVR_RBF accuracy : 0.5175095165455375


In [33]:
svm_prediction = svr_rbf.predict(xtest)

In [34]:
print(svm_prediction)

[11524.49104688  9026.99920117  9246.694786   11863.50420941
 12296.48872858 15413.674998    7893.78284111  9785.85441409
 15149.17815032  9309.92455085 10284.56120607 11424.61733414
 10557.55682332  9830.32804465  8227.63988413  9740.19730661
  7732.41009686 13338.92253334 10373.90497643  9664.41139937
  9224.38236285 11359.42107029  9376.70105126 10730.63720559
 10951.86530348  9023.35810726 10570.81538508  8898.87844333
  8536.97180906 10824.94461767 10519.49228821  8918.64283535
 10120.18363673  9372.04450475  9435.19569605 11884.77341353
 11446.46429594  9955.49241543 10475.20758682  7129.78408637
  9748.70092274  9147.85714213  9140.17460735  8222.52255954
 10470.24031607  9208.2654203   9224.98821591 10547.27772722
  9216.5703216   9374.43659384  9658.85121574 10242.18980253
  9564.64105633 14919.0421761  13383.22347052 13240.14076763
  9026.7706444  10038.53895666  9216.98895992  7721.35460312
  9367.18428866 11600.15076424  9134.37984631 10497.95809578
  9702.56391151  9228.15

In [35]:
print(ytest)

[11711.505859  8166.554199 11747.022461 11358.101563 12823.689453
 18364.121094  7189.424805 11410.525391 19625.835938  8784.494141
 11079.46582  10745.548828 11053.614258  9690.142578  9328.197266
 10208.236328  8658.553711 13031.173828 11991.233398  5922.042969
  9123.410156 10679.136719  9538.024414 15701.339844 16068.138672
  9081.761719  6791.129395  9842.666016  8897.46875   5392.314941
 10369.563477  9269.987305 11323.466797  5238.438477  9164.231445
 11483.359375 16716.111328 11779.773438  9905.166992  7096.18457
  9934.433594  8807.010742 11601.472656  8723.786133 11758.283203
  9613.423828  9392.875     9332.34082   5014.47998   6681.062988
  9656.717773  9648.717773  6865.493164 17108.402344 13950.300781
 19154.230469  8835.052734 11878.111328 11366.134766  8801.038086
  9190.854492 11414.03418   9143.582031 10538.459961  9633.386719
  5225.629395  9137.993164  7807.058594]


In [36]:
svm_prediction = svr_rbf.predict(prediction_days_array)

In [37]:
print(svm_prediction)

[13068.76069149 12492.67244139 10800.37616996 10796.51941787
 10908.06719229 11135.34874409 10792.06920401 10792.06717502
 10792.06717502 10792.06717502 10792.06717502 10792.06717502
 10792.06717502 10792.06717502 10792.06717502 10792.06717502
 10792.06717503 10792.06717507 10792.06717502 10792.06717502
 10792.06717502 10792.06717502 10792.06717502 10792.06717502
 10792.06717502 10792.06717502 10792.06717502 10792.06717502
 10792.06717502 10792.06717502]


In [40]:
print(df_subset.tail(30))

           Date         Price  Prediction
337  2020-11-10  15290.902344         NaN
338  2020-11-11  15701.339844         NaN
339  2020-11-12  16276.343750         NaN
340  2020-11-13  16317.808594         NaN
341  2020-11-14  16068.138672         NaN
342  2020-11-15  15955.587891         NaN
343  2020-11-16  16716.111328         NaN
344  2020-11-17  17645.406250         NaN
345  2020-11-18  17804.005859         NaN
346  2020-11-19  17817.089844         NaN
347  2020-11-20  18621.314453         NaN
348  2020-11-21  18642.232422         NaN
349  2020-11-22  18370.001953         NaN
350  2020-11-23  18364.121094         NaN
351  2020-11-24  19107.464844         NaN
352  2020-11-25  18732.121094         NaN
353  2020-11-26  17150.623047         NaN
354  2020-11-27  17108.402344         NaN
355  2020-11-28  17717.414063         NaN
356  2020-11-29  18177.484375         NaN
357  2020-11-30  19625.835938         NaN
358  2020-12-01  18802.998047         NaN
359  2020-12-02  19201.091797     

In [43]:
import session_info
session_info.show(html=False)

-----
matplotlib          3.5.2
numpy               1.19.5
pandas              1.1.5
session_info        1.0.0
sklearn             1.0.2
-----
IPython             7.31.1
jupyter_client      6.1.12
jupyter_core        4.10.0
jupyterlab          2.1.5
notebook            6.4.12
-----
Python 3.7.7 (default, May  6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)]
Windows-10-10.0.19041-SP0
-----
Session information updated at 2022-08-30 08:56
