# Load the database

In [7]:
import pandas as pd
import numpy as np

datafile = 'Binance_BTCUSDT_1h.csv'
df = pd.read_csv(datafile,header=(1))
df.head()

Unnamed: 0,Unix,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,1677884400000,2023-03-03 23:00:00,BTCUSDT,22320.99,22377.26,22313.17,22354.34,6800.13647,151941800.0,225360
1,1677880800000,2023-03-03 22:00:00,BTCUSDT,22239.92,22338.75,22224.68,22320.99,5433.81752,121037900.0,212620
2,1677877200000,2023-03-03 21:00:00,BTCUSDT,22307.08,22316.15,22147.0,22239.92,14773.22101,328693800.0,374089
3,1677873600000,2023-03-03 20:00:00,BTCUSDT,22357.83,22378.22,22274.0,22308.16,8238.69603,183999000.0,251722
4,1677870000000,2023-03-03 19:00:00,BTCUSDT,22313.2,22397.76,22289.84,22358.48,8452.36394,188740900.0,270570


In [8]:
df.shape

(48398, 10)

In [9]:
# We will predict the next hour close price
predictionHours = 1
# Create another column shifted 'n'  units up
df['NextHourClose'] = df[['Close']].shift(predictionHours)
# show the first 5 rows
df.head()

Unnamed: 0,Unix,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount,NextHourClose
0,1677884400000,2023-03-03 23:00:00,BTCUSDT,22320.99,22377.26,22313.17,22354.34,6800.13647,151941800.0,225360,
1,1677880800000,2023-03-03 22:00:00,BTCUSDT,22239.92,22338.75,22224.68,22320.99,5433.81752,121037900.0,212620,22354.34
2,1677877200000,2023-03-03 21:00:00,BTCUSDT,22307.08,22316.15,22147.0,22239.92,14773.22101,328693800.0,374089,22320.99
3,1677873600000,2023-03-03 20:00:00,BTCUSDT,22357.83,22378.22,22274.0,22308.16,8238.69603,183999000.0,251722,22239.92
4,1677870000000,2023-03-03 19:00:00,BTCUSDT,22313.2,22397.76,22289.84,22358.48,8452.36394,188740900.0,270570,22308.16


In [10]:
#Removing the first row and the columns that we will not use
df.dropna(inplace = True)
df.drop(['Date','Symbol','Volume USDT'],axis = 1,inplace = True)
df.head()

Unnamed: 0,Unix,Open,High,Low,Close,Volume BTC,tradecount,NextHourClose
1,1677880800000,22239.92,22338.75,22224.68,22320.99,5433.81752,212620,22354.34
2,1677877200000,22307.08,22316.15,22147.0,22239.92,14773.22101,374089,22320.99
3,1677873600000,22357.83,22378.22,22274.0,22308.16,8238.69603,251722,22239.92
4,1677870000000,22313.2,22397.76,22289.84,22358.48,8452.36394,270570,22308.16
5,1677866400000,22413.38,22422.91,22229.25,22313.2,14044.11611,366559,22358.48


In [11]:
conditions = [
    (df['NextHourClose'] >= df['Close']),
    (df['NextHourClose'] < df['Close'])]

# create a list of the values we want to assign for each condition
values = ['Up', 'Down']

# create a new column and use np.select to assign values to it using our lists as arguments
df['UpOrDown'] = np.select(conditions, values)
df.head()

Unnamed: 0,Unix,Open,High,Low,Close,Volume BTC,tradecount,NextHourClose,UpOrDown
1,1677880800000,22239.92,22338.75,22224.68,22320.99,5433.81752,212620,22354.34,Up
2,1677877200000,22307.08,22316.15,22147.0,22239.92,14773.22101,374089,22320.99,Up
3,1677873600000,22357.83,22378.22,22274.0,22308.16,8238.69603,251722,22239.92,Down
4,1677870000000,22313.2,22397.76,22289.84,22358.48,8452.36394,270570,22308.16,Down
5,1677866400000,22413.38,22422.91,22229.25,22313.2,14044.11611,366559,22358.48,Up


In [12]:
#Drop NextHourClose Column and using the new dataframe
bitcoin_data = df.drop(['NextHourClose'],axis = 1)
bitcoin_data.head()

Unnamed: 0,Unix,Open,High,Low,Close,Volume BTC,tradecount,UpOrDown
1,1677880800000,22239.92,22338.75,22224.68,22320.99,5433.81752,212620,Up
2,1677877200000,22307.08,22316.15,22147.0,22239.92,14773.22101,374089,Up
3,1677873600000,22357.83,22378.22,22274.0,22308.16,8238.69603,251722,Down
4,1677870000000,22313.2,22397.76,22289.84,22358.48,8452.36394,270570,Down
5,1677866400000,22413.38,22422.91,22229.25,22313.2,14044.11611,366559,Up


In [14]:
import numpy as np
import matplotlib.pyplot as plt
from time import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# split X and y data
X = bitcoin_data.iloc[:, :-1].values
y = bitcoin_data.iloc[:, -1].values

In [15]:
# Scale the X data
sc = StandardScaler()
X_sc = sc.fit_transform(X)

In [18]:
#Splitting the data into train and test split
x_train, x_test, y_train, y_test = train_test_split(X_sc, y, test_size=0.2, random_state=42)

# Classification Using Logistic Regression

In [20]:
from sklearn.model_selection import KFold

cv = KFold(n_splits=10) # creates a folds sequence
vacc = []
for train_index, test_index in cv.split(X, y):
    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = LogisticRegression(solver = 'lbfgs', max_iter = 1000)
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test) 
    score = accuracy_score(y_pred, y_test)
    print("Accuracy:",score)
    vacc.append(score)
print('\n Mean accuracy:', np.mean(vacc))
print('Standard deviation accuracy:', np.std(vacc))

Accuracy: 0.5055785123966943
Accuracy: 0.49669421487603305
Accuracy: 0.5024793388429752
Accuracy: 0.5132231404958678
Accuracy: 0.5144628099173554
Accuracy: 0.5119834710743801
Accuracy: 0.5111570247933884
Accuracy: 0.5174622855961976
Accuracy: 0.5058896466212027
Accuracy: 0.5246951849555693

 Mean accuracy: 0.5103625629569664
Standard deviation accuracy: 0.007589114827426654
