# Bitcoin Historical Dataset

In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error,r2_score

from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()

import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv("BTC-Hourly.csv") #https://www.kaggle.com/datasets/prasoonkottarathil/btcinusd?select=BTC-Hourly.csv

### EDA

In [3]:
df

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
0,1646092800,2022-03-01 00:00:00,BTC/USD,43221.71,43626.49,43185.48,43312.27,5.205632e+01,2.254677e+06
1,1646089200,2022-02-28 23:00:00,BTC/USD,43085.30,43364.81,42892.37,43178.98,1.068161e+02,4.612210e+06
2,1646085600,2022-02-28 22:00:00,BTC/USD,41657.23,44256.08,41650.29,42907.32,5.275406e+02,2.263535e+07
3,1646082000,2022-02-28 21:00:00,BTC/USD,41917.09,41917.09,41542.60,41659.53,6.975168e+01,2.905822e+06
4,1646078400,2022-02-28 20:00:00,BTC/USD,41361.99,41971.00,41284.11,41914.97,2.471517e+02,1.035935e+07
...,...,...,...,...,...,...,...,...,...
33254,1526378400,2018-05-15 10:00:00,BTC/USD,8708.32,8865.00,8695.11,8795.90,1.110127e+07,1.260690e+03
33255,1526374800,2018-05-15 09:00:00,BTC/USD,8728.49,8754.40,8701.35,8708.32,1.593992e+06,1.826200e+02
33256,1526371200,2018-05-15 08:00:00,BTC/USD,8739.00,8750.27,8660.53,8728.49,7.986063e+06,9.177900e+02
33257,1526367600,2018-05-15 07:00:00,BTC/USD,8740.99,8766.00,8721.11,8739.00,2.390399e+06,2.735800e+02


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33259 entries, 0 to 33258
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   unix        33259 non-null  int64  
 1   date        33259 non-null  object 
 2   symbol      33259 non-null  object 
 3   open        33259 non-null  float64
 4   high        33259 non-null  float64
 5   low         33259 non-null  float64
 6   close       33259 non-null  float64
 7   Volume BTC  33259 non-null  float64
 8   Volume USD  33259 non-null  float64
dtypes: float64(6), int64(1), object(2)
memory usage: 2.3+ MB


In [5]:
df.isnull().sum() #We examine the empty lines.

unix          0
date          0
symbol        0
open          0
high          0
low           0
close         0
Volume BTC    0
Volume USD    0
dtype: int64

### Feature Engineering

In [10]:
df["date"]=pd.to_datetime(df["date"])
df["day"]=(df["date"]).dt.day
df["month"]=(df["date"]).dt.month
df["year"]=(df["date"]).dt.year
df["time"]=(df["date"]).dt.time
del df["date"]
#We divide date into 4 as day, month, year and hour.

In [19]:
df['time']=df['time'].astype(str) #We change the type of time to string.
df["time"]=df.time.str.replace(':00','') #We delete the minute part of time.
df['day']=df['day'].astype(int)
df['month']=df['month'].astype(int)
df['year']=df['year'].astype(int)
df['time']=df['time'].astype(int)

In [20]:
df

Unnamed: 0,unix,symbol,open,high,low,close,Volume BTC,Volume USD,day,month,year,time
0,1646092800,BTC/USD,43221.71,43626.49,43185.48,43312.27,5.205632e+01,2.254677e+06,1,3,2022,0
1,1646089200,BTC/USD,43085.30,43364.81,42892.37,43178.98,1.068161e+02,4.612210e+06,28,2,2022,23
2,1646085600,BTC/USD,41657.23,44256.08,41650.29,42907.32,5.275406e+02,2.263535e+07,28,2,2022,22
3,1646082000,BTC/USD,41917.09,41917.09,41542.60,41659.53,6.975168e+01,2.905822e+06,28,2,2022,21
4,1646078400,BTC/USD,41361.99,41971.00,41284.11,41914.97,2.471517e+02,1.035935e+07,28,2,2022,20
...,...,...,...,...,...,...,...,...,...,...,...,...
33254,1526378400,BTC/USD,8708.32,8865.00,8695.11,8795.90,1.110127e+07,1.260690e+03,15,5,2018,10
33255,1526374800,BTC/USD,8728.49,8754.40,8701.35,8708.32,1.593992e+06,1.826200e+02,15,5,2018,9
33256,1526371200,BTC/USD,8739.00,8750.27,8660.53,8728.49,7.986063e+06,9.177900e+02,15,5,2018,8
33257,1526367600,BTC/USD,8740.99,8766.00,8721.11,8739.00,2.390399e+06,2.735800e+02,15,5,2018,7


In [21]:
abs(df.corr()["high"].sort_values(ascending=False)) #We look at their correlations.

high          1.000000
open          0.999958
close         0.999958
low           0.999905
unix          0.824581
year          0.801393
Volume USD    0.482761
time          0.000503
day           0.012292
month         0.039328
Volume BTC    0.324982
Name: high, dtype: float64

### Regression

In [22]:
x,y=df.drop(["low","close","open","high","symbol"],axis=1),df[["high"]]
x=scaler.fit_transform(x)
x.shape

(33259, 7)

In [23]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)

In [28]:
model=Sequential()
model.add(Dense(84,activation="relu"))
model.add(Dense(84,activation="relu"))
model.add(Dense(84,activation="relu"))
model.add(Dense(84,activation="relu"))
model.add(Dense(84,activation="relu"))
model.add(Dense(1))
model.compile(optimizer="adam",loss="mse")

In [29]:
model.fit(x_train,y_train,validation_data=(x_test,y_test),batch_size=128,epochs=100)
model.summary()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [30]:
tahmin=model.predict(x_test)



In [31]:
r2_score(tahmin,y_test)

0.9826427527846779

In [32]:
(mean_squared_error(tahmin,y_test))**0.5

2366.210763868902

In [None]:
tahmin=model.predict([])