In [2]:
import numpy as np 
import pandas as pd 

In [4]:
#Store the data into the variable df
df = pd.read_csv('BitcoinPrice.csv')
df.head(7)

Unnamed: 0,Date,Price
0,2018-08-25 00:00:00,6719.429231
1,2018-08-26 00:00:00,6673.274167
2,2018-08-27 00:00:00,6719.266154
3,2018-08-28 00:00:00,7000.04
4,2018-08-29 00:00:00,7054.276429
5,2018-08-30 00:00:00,6932.6625
6,2018-08-31 00:00:00,6981.946154


In [5]:
#Remove the Date column
df.drop(['Date'], 1, inplace=True)

In [6]:
#Show the first 7 rows of the new data set
df.head(7)

Unnamed: 0,Price
0,6719.429231
1,6673.274167
2,6719.266154
3,7000.04
4,7054.276429
5,6932.6625
6,6981.946154


In [7]:
#A variable for predicting 'n' days out into the future
prediction_days = 30 #n = 30 days

#Create another column (the target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Price']].shift(-prediction_days)

In [8]:
#Show the first 7 rows of the new data set
df.head(7)

Unnamed: 0,Price,Prediction
0,6719.429231,6639.304167
1,6673.274167,6412.459167
2,6719.266154,6468.631667
3,7000.04,6535.476667
4,7054.276429,6677.3425
5,6932.6625,6550.474167
6,6981.946154,6593.135


In [9]:
#Show the last 7 rows of the new data set
df.tail(7)

Unnamed: 0,Price,Prediction
358,10295.1175,
359,10605.825833,
360,10746.507692,
361,10169.094167,
362,10030.746667,
363,10255.9775,
364,10158.540833,


In [10]:
#CREATE THE INDEPENDENT DATA SET (X)

# Convert the dataframe to a numpy array and drop the prediction column
X = np.array(df.drop(['Prediction'],1))

#Remove the last 'n' rows where 'n' is the prediction_days
X= X[:len(df)-prediction_days]
print(X)

[[ 6719.42923077]
 [ 6673.27416667]
 [ 6719.26615385]
 [ 7000.04      ]
 [ 7054.27642857]
 [ 6932.6625    ]
 [ 6981.94615385]
 [ 7100.94666667]
 [ 7247.93538462]
 [ 7260.94923077]
 [ 7326.8525    ]
 [ 7113.06923077]
 [ 6433.27166667]
 [ 6444.80416667]
 [ 6366.1075    ]
 [ 6286.42583333]
 [ 6297.87769231]
 [ 6296.32083333]
 [ 6273.1375    ]
 [ 6450.17923077]
 [ 6499.0625    ]
 [ 6518.655     ]
 [ 6480.64416667]
 [ 6400.60083333]
 [ 6296.63166667]
 [ 6335.82666667]
 [ 6418.56266667]
 [ 6669.99083333]
 [ 6709.3125    ]
 [ 6710.445     ]
 [ 6639.30416667]
 [ 6412.45916667]
 [ 6468.63166667]
 [ 6535.47666667]
 [ 6677.3425    ]
 [ 6550.47416667]
 [ 6593.135     ]
 [ 6590.96833333]
 [ 6562.64166667]
 [ 6470.4025    ]
 [ 6563.62833333]
 [ 6568.54916667]
 [ 6581.48666667]
 [ 6558.5375    ]
 [ 6618.56769231]
 [ 6621.71166667]
 [ 6563.00916667]
 [ 6248.63583333]
 [ 6260.53083333]
 [ 6260.64583333]
 [ 6299.39916667]
 [ 6452.57166667]
 [ 6596.61833333]
 [ 6596.27615385]
 [ 6568.04076923]
 [ 6487.44

In [11]:
#CREATE THE DEPENDENT DATA SET (y)

# Convert the dataframe to a numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])

# Get all of the y values except the last 'n' rows
y = y[:-prediction_days]
print(y)

[ 6639.30416667  6412.45916667  6468.63166667  6535.47666667
  6677.3425      6550.47416667  6593.135       6590.96833333
  6562.64166667  6470.4025      6563.62833333  6568.54916667
  6581.48666667  6558.5375      6618.56769231  6621.71166667
  6563.00916667  6248.63583333  6260.53083333  6260.64583333
  6299.39916667  6452.57166667  6596.61833333  6596.27615385
  6568.04076923  6487.44416667  6488.82583333  6531.60166667
  6498.48583333  6481.426       6508.31        6478.0825
  6473.75333333  6465.9175      6448.22166667  6382.66833333
  6309.45285714  6310.28416667  6342.28083333  6387.67416667
  6363.79583333  6391.87333333  6436.965       6445.35416667
  6538.79        6486.25166667  6411.28083333  6399.03333333
  6378.26833333  6401.93666667  6372.06333333  6176.155
  5615.18        5596.1925      5558.24333333  5606.04416667
  5303.9425      4671.97        4533.68083333  4548.7975
  4309.3375      4293.84083333  3823.51166667  3920.53666667
  3751.66833333  4103.45384615  4263.

In [12]:
# Split the data into 80% training and 20% testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [13]:
# Set prediction_days_array equal to the last 30 rows of the original data set from the price column
prediction_days_array = np.array(df.drop(['Prediction'],1))[-prediction_days:]
print(prediction_days_array)

[[ 9774.2575    ]
 [ 9725.4025    ]
 [ 9500.32416667]
 [ 9533.97933333]
 [ 9539.7125    ]
 [ 9873.81166667]
 [10088.8       ]
 [10478.90166667]
 [10790.63      ]
 [10826.275     ]
 [11713.16166667]
 [11759.01916667]
 [11703.73833333]
 [11803.88833333]
 [11816.9125    ]
 [11586.1725    ]
 [11377.80416667]
 [11397.80166667]
 [11144.38916667]
 [10450.81333333]
 [ 9988.9475    ]
 [10230.73333333]
 [10292.38333333]
 [10295.1175    ]
 [10605.82583333]
 [10746.50769231]
 [10169.09416667]
 [10030.74666667]
 [10255.9775    ]
 [10158.54083333]]


In [14]:
from sklearn.svm import SVR
# Create and train the Support Vector Machine (Regression) using the radial basis function
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1e-05,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [15]:
# Testing Model: Score returns the accuracy of the prediction. 
# The best possible score is 1.0
svr_rbf_confidence = svr_rbf.score(x_test, y_test)
print("svr_rbf accuracy: ", svr_rbf_confidence)

svr_rbf accuracy:  0.8120977092577375


In [16]:
# Print the predicted value
svm_prediction = svr_rbf.predict(x_test)
print(svm_prediction)

print()

#Print the actual values
print(y_test)

[ 4559.27598333  3818.67989359 10568.45897475  5151.1530661
  4863.97103351  7965.74610033  8021.55654248  6644.32754955
  6599.47661954  6103.47828233  6396.82164445  3912.59785467
  6643.18406502  7972.76891505  6326.06826031  6127.56769452
  6285.28220421  3818.64875789  6931.9609636   9855.4661402
  6206.54242993  7930.97494088  6892.82144165  4317.42999987
  5167.27973975  6615.49917955  6536.4277762   6278.50870946
  3909.39016582  4244.13203853 10902.41917998  5160.91330013
  7134.19653039  3924.96950341 10252.53983244  4551.80479504
  4904.10877503  3923.52872182  9979.28694827  4847.60579372
  3901.55015203  6558.79959637 10546.82314933 10828.01893951
  6373.33498045  3820.05090135  6245.72099164  7633.94235137
  3987.57843098  4028.53319179  6100.60745639  3892.28291328
  7632.22772496  6576.66437174  6591.98571885  9617.41623476
  7209.89752889  6119.22295523 10565.0375113   9976.22341067
  6100.7333915   5115.96814723  6100.92686717  3899.24046305
  6109.35369421  3843.0503

In [17]:
# Print the model predictions for the next 'n=30' days
svm_prediction = svr_rbf.predict(prediction_days_array)
print(svm_prediction)

[10257.66572481 10260.06908189  9996.54532167 10054.05100244
 10063.88470651 10203.56136976  9983.14048108 10330.4457072
  9931.80511059  9847.9983643  10165.13691112 10011.33338888
 10195.09786995  9850.18570051  9801.82017701 10501.9597533
 10543.32582816 10571.81702497  9976.57980318 10286.64535379
 10092.56564631  9931.66352547  9990.11788116  9993.91694453
 10352.80669064 10048.28001824  9926.92611175 10045.65388507
  9949.06107324  9930.95193263]


In [18]:
#Print the actual price for the next 'n' days, n=prediction_days=30 
df.tail(prediction_days)

Unnamed: 0,Price,Prediction
335,9774.2575,
336,9725.4025,
337,9500.324167,
338,9533.979333,
339,9539.7125,
340,9873.811667,
341,10088.8,
342,10478.901667,
343,10790.63,
344,10826.275,
