# Bitcoin Price Prediction

## 1. Importing libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
import warnings
warnings.filterwarnings('ignore')

## 2. Importind data

In [5]:
# Read the data set
df = pd.read_csv('bitcoin.csv')

In [6]:
# First 5 rows
df.head()

Unnamed: 0,Date,Price
0,5/23/2019,7881.84668
1,5/24/2019,7987.371582
2,5/25/2019,8052.543945
3,5/26/2019,8673.21582
4,5/27/2019,8805.77832


In [7]:
# last 5 rows
df.tail()

Unnamed: 0,Date,Price
362,5/19/2020,9729.038086
363,5/20/2020,9522.981445
364,5/21/2020,9081.761719
365,5/22/2020,9182.577148
366,5/23/2020,9180.045898


In [8]:
# Remove the data column
df.drop('Date',axis=1,inplace=True)
df[:3]

Unnamed: 0,Price
0,7881.84668
1,7987.371582
2,8052.543945


In [9]:
# How many days the predict the price
prediction_days = 30

In [10]:
# Creating another column shifted n units up
df['Prediction'] = df[['Price']].shift(-prediction_days)

In [11]:
# Check first 5 rows
df.head()

Unnamed: 0,Price,Prediction
0,7881.84668,10701.69141
1,7987.371582,10855.37109
2,8052.543945,11011.10254
3,8673.21582,11790.91699
4,8805.77832,13016.23145


In [12]:
# Check last 5 rows
df.tail()

Unnamed: 0,Price,Prediction
362,9729.038086,
363,9522.981445,
364,9081.761719,
365,9182.577148,
366,9180.045898,


## 3. Creaing label

In [13]:
# create an independent dataset
#Here we will convert data into np.array and drop the prediction column
X = np.array(df.drop('Prediction',axis=1))

In [14]:
# remove last n rows where n is the prediction days
X = X[:len(df)-prediction_days]
print(X)

[[ 7881.84668 ]
 [ 7987.371582]
 [ 8052.543945]
 [ 8673.21582 ]
 [ 8805.77832 ]
 [ 8719.961914]
 [ 8659.487305]
 [ 8319.472656]
 [ 8574.501953]
 [ 8564.016602]
 [ 8742.958008]
 [ 8208.995117]
 [ 7707.770996]
 [ 7824.231445]
 [ 7822.023438]
 [ 8043.951172]
 [ 7954.12793 ]
 [ 7688.077148]
 [ 8000.32959 ]
 [ 7927.714355]
 [ 8145.857422]
 [ 8230.923828]
 [ 8693.833008]
 [ 8838.375   ]
 [ 8994.488281]
 [ 9320.352539]
 [ 9081.762695]
 [ 9273.521484]
 [ 9527.160156]
 [10144.55664 ]
 [10701.69141 ]
 [10855.37109 ]
 [11011.10254 ]
 [11790.91699 ]
 [13016.23145 ]
 [11182.80664 ]
 [12407.33203 ]
 [11959.37109 ]
 [10817.15527 ]
 [10583.13477 ]
 [10801.67773 ]
 [11961.26953 ]
 [11215.4375  ]
 [10978.45996 ]
 [11208.55078 ]
 [11450.84668 ]
 [12285.95801 ]
 [12573.8125  ]
 [12156.5127  ]
 [11358.66211 ]
 [11815.98633 ]
 [11392.37891 ]
 [10256.05859 ]
 [10895.08984 ]
 [ 9477.641602]
 [ 9693.802734]
 [10666.48242 ]
 [10530.73242 ]
 [10767.13965 ]
 [10599.10547 ]
 [10343.10645 ]
 [ 9900.767578]
 [ 9811.

In [15]:
# create dependent dataset
y = np.array(df.drop('Price',axis=1))
# get all the values except last n rows
y = y[:-prediction_days]

In [16]:
print(y)

[[10701.69141 ]
 [10855.37109 ]
 [11011.10254 ]
 [11790.91699 ]
 [13016.23145 ]
 [11182.80664 ]
 [12407.33203 ]
 [11959.37109 ]
 [10817.15527 ]
 [10583.13477 ]
 [10801.67773 ]
 [11961.26953 ]
 [11215.4375  ]
 [10978.45996 ]
 [11208.55078 ]
 [11450.84668 ]
 [12285.95801 ]
 [12573.8125  ]
 [12156.5127  ]
 [11358.66211 ]
 [11815.98633 ]
 [11392.37891 ]
 [10256.05859 ]
 [10895.08984 ]
 [ 9477.641602]
 [ 9693.802734]
 [10666.48242 ]
 [10530.73242 ]
 [10767.13965 ]
 [10599.10547 ]
 [10343.10645 ]
 [ 9900.767578]
 [ 9811.925781]
 [ 9911.841797]
 [ 9870.303711]
 [ 9477.677734]
 [ 9552.860352]
 [ 9519.145508]
 [ 9607.423828]
 [10085.62793 ]
 [10399.66895 ]
 [10518.17481 ]
 [10821.72656 ]
 [10970.18457 ]
 [11805.65332 ]
 [11478.16895 ]
 [11941.96875 ]
 [11966.40723 ]
 [11862.93652 ]
 [11354.02441 ]
 [11523.5791  ]
 [11382.61621 ]
 [10895.83008 ]
 [10051.7041  ]
 [10311.5459  ]
 [10374.33887 ]
 [10231.74414 ]
 [10345.81055 ]
 [10916.05371 ]
 [10763.23242 ]
 [10138.04981 ]
 [10131.05566 ]
 [10407.

## 4. Splitting data

In [19]:
from sklearn.model_selection import train_test_split

In [20]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [21]:
# Set the predictiondays array is equal to last 30 days from the original dataset
predictiondays_array = np.array(df.drop(['Prediction'],1))[-prediction_days:]
predictiondays_array

array([[7550.900879],
       [7569.936035],
       [7679.867188],
       [7795.601074],
       [7807.058594],
       [8801.038086],
       [8658.553711],
       [8864.766602],
       [8988.59668 ],
       [8897.46875 ],
       [8912.654297],
       [9003.070313],
       [9268.761719],
       [9951.518555],
       [9842.666016],
       [9593.896484],
       [8756.430664],
       [8601.795898],
       [8804.477539],
       [9269.987305],
       [9733.72168 ],
       [9328.197266],
       [9377.013672],
       [9670.739258],
       [9726.575195],
       [9729.038086],
       [9522.981445],
       [9081.761719],
       [9182.577148],
       [9180.045898]])

## 5. Imporing ML algorithm

In [22]:
from sklearn.svm import SVR

In [23]:
svr_rbf = SVR(kernel='rbf',C=1e3,gamma=0.00001)
svr_rbf.fit(X_train,y_train)

SVR(C=1000.0, gamma=1e-05)

In [25]:
# taring score
svr_rbf_train = svr_rbf.score(X_train,y_train)*100
print('The training score is :',round(svr_rbf_train,2))

The training score is : 27.37


In [26]:
# testing score
svr_rbf_test = svr_rbf.score(X_test,y_test)*100
print('The testing score is :',round(svr_rbf_test,2))

The testing score is : 8.13


In [27]:
# Print the predicted values
svr_pred = svr_rbf.predict(X_test)
svr_pred

array([ 7716.36796842,  8891.63923935,  8464.33006262,  9802.31253479,
        8281.42478239,  9060.88459202,  9173.18459597,  9207.94289241,
        9111.48711377,  8625.70780539,  8149.86766761,  9685.76558954,
        8549.00805673,  8578.62241451,  9118.25811831,  8056.47099026,
        8394.8739886 ,  9846.82111252,  9063.99826219,  9456.75332626,
        9255.66480648,  8804.19242546,  9119.17555829,  9892.20517464,
       10081.81674865,  9796.30397858,  8358.02136765,  8163.51188519,
        8819.9403434 , 10492.32117353,  8331.68268478,  8817.35891041,
        7986.663601  ,  8468.37965935,  9682.6559803 ,  8100.25724166,
        9119.74696623,  9876.85416373,  8862.83284755,  8732.16158943,
        8584.42504939,  8241.45199589,  9590.8579885 ,  8407.40229399,
       10346.73943987,  9514.72435759,  8522.81021879,  8381.98813238,
        8441.64182824,  7719.66107107,  9911.3651927 ,  9110.1157694 ,
        8040.38649147,  9944.48257821,  8786.31916745,  8643.24213938,
      

In [28]:
print(y_test)

[[ 7257.665039]
 [ 8897.46875 ]
 [ 9593.896484]
 [10518.17481 ]
 [ 8047.526855]
 [ 7269.68457 ]
 [ 8808.262695]
 [11959.37109 ]
 [ 8804.880859]
 [ 9726.575195]
 [10185.5     ]
 [10855.37109 ]
 [ 8486.993164]
 [ 9268.761719]
 [ 7047.916992]
 [ 8909.954102]
 [ 7911.430176]
 [ 7302.089355]
 [13016.23145 ]
 [ 9795.943359]
 [ 9256.148438]
 [ 9951.518555]
 [10141.99609 ]
 [10441.27637 ]
 [11478.16895 ]
 [10517.25488 ]
 [ 7973.20752 ]
 [ 9754.422852]
 [ 7271.78125 ]
 [ 9811.925781]
 [10358.04883 ]
 [ 9522.981445]
 [ 9358.589844]
 [ 7988.155762]
 [ 8309.286133]
 [ 8343.276367]
 [10256.05859 ]
 [ 8151.500488]
 [ 9003.070313]
 [ 7238.966797]
 [ 7909.729492]
 [10407.96484 ]
 [11862.93652 ]
 [ 8321.756836]
 [ 9607.423828]
 [ 7422.652832]
 [10181.6416  ]
 [ 7200.174316]
 [ 8393.041992]
 [ 9693.802734]
 [10594.49316 ]
 [ 6438.644531]
 [ 6416.314941]
 [10115.97559 ]
 [ 9729.038086]
 [ 7344.884277]
 [ 7550.900879]
 [ 8660.700195]
 [ 9856.611328]
 [ 8912.654297]
 [ 9663.181641]
 [10575.5332  ]
 [ 5014.

In [31]:
# print the model prediction for next 30 days
svr_prediction30 = svr_rbf.predict(predictiondays_array)
print(svr_prediction30)
print()
# print actual bitcoin price for last 30 days
print(df.tail(prediction_days))

[8033.89832549 8122.29297945 8916.02522332 9691.63095291 9737.1362752
 9069.63613669 9118.06125878 8956.5029197  8469.59818954 8862.6983156
 8810.1884842  8393.24135643 7593.98115122 8687.31071278 8345.28421554
 8109.06282905 9105.6299861  9111.99707114 9065.58460871 7596.08947433
 8061.19789471 7743.76877906 7903.44259103 8036.73394653 8052.91430847
 8055.60194373 8163.23169163 7967.77003119 7594.91674876 7599.73347791]

           Price  Prediction
337  7550.900879         NaN
338  7569.936035         NaN
339  7679.867188         NaN
340  7795.601074         NaN
341  7807.058594         NaN
342  8801.038086         NaN
343  8658.553711         NaN
344  8864.766602         NaN
345  8988.596680         NaN
346  8897.468750         NaN
347  8912.654297         NaN
348  9003.070313         NaN
349  9268.761719         NaN
350  9951.518555         NaN
351  9842.666016         NaN
352  9593.896484         NaN
353  8756.430664         NaN
354  8601.795898         NaN
355  8804.477539       