In [223]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [224]:
df = pd.read_csv("cryptodata/Binance_BTCUSDT_d.csv")

In [225]:
df = df.drop(['unix','symbol', 'open', 'high', 'Volume USDT', 'tradecount','low'], axis=1)

In [226]:
def label(x):
    if x>0:
        return 'BUY'
    return 'SELL'

In [227]:
df = df.reindex(index=df.index[::-1])
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')

In [228]:
df['MA5'] = df['close'].rolling(window=5).mean()
df['MA30'] = df['close'].rolling(window=30).mean()
df['pct_change'] = df["close"].pct_change(periods=7).shift(-7)
df['action'] = df['pct_change'].apply(label)

In [229]:
df = df.dropna()

In [231]:
df['action'] = df['action'].astype('category')
encode_map = {
    'BUY': 1,
    'SELL': 0
}
df['action'].replace(encode_map, inplace=True)

In [232]:
X = df.drop(['action','close'],axis=1)
y = df['action']

In [233]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

In [235]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [191]:
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001

In [192]:
## train data
class trainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = trainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
## test data    
class testData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = testData(torch.FloatTensor(X_test))

In [193]:
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [194]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        # Number of input features is 12.
        self.layer_1 = nn.Linear(4, 64) 
        self.layer_2 = nn.Linear(64, 64)
        self.layer_out = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

In [195]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [196]:
model = binaryClassification()
model.to(device)
print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

binaryClassification(
  (layer_1): Linear(in_features=4, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [197]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [198]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

Epoch 001: | Loss: 0.45954 | Acc: 81.812
Epoch 002: | Loss: 0.27633 | Acc: 92.188
Epoch 003: | Loss: 0.20667 | Acc: 93.938
Epoch 004: | Loss: 0.17599 | Acc: 95.000
Epoch 005: | Loss: 0.15891 | Acc: 94.688
Epoch 006: | Loss: 0.13309 | Acc: 95.562
Epoch 007: | Loss: 0.12231 | Acc: 95.750
Epoch 008: | Loss: 0.12707 | Acc: 94.062
Epoch 009: | Loss: 0.10873 | Acc: 96.125
Epoch 010: | Loss: 0.09906 | Acc: 95.750
Epoch 011: | Loss: 0.09489 | Acc: 96.688
Epoch 012: | Loss: 0.10083 | Acc: 96.188
Epoch 013: | Loss: 0.09183 | Acc: 95.812
Epoch 014: | Loss: 0.09531 | Acc: 95.625
Epoch 015: | Loss: 0.08969 | Acc: 95.938
Epoch 016: | Loss: 0.09536 | Acc: 96.438
Epoch 017: | Loss: 0.08858 | Acc: 96.062
Epoch 018: | Loss: 0.11124 | Acc: 94.250
Epoch 019: | Loss: 0.10692 | Acc: 95.312
Epoch 020: | Loss: 0.10237 | Acc: 95.250
Epoch 021: | Loss: 0.07644 | Acc: 97.625
Epoch 022: | Loss: 0.09347 | Acc: 95.875
Epoch 023: | Loss: 0.06596 | Acc: 96.938
Epoch 024: | Loss: 0.10096 | Acc: 95.562
Epoch 025: | Los

In [199]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [200]:
confusion_matrix(y_test, y_pred_list)

array([[195,   1],
       [ 72, 226]], dtype=int64)

In [201]:
print(classification_report(y_test, y_pred_list))

              precision    recall  f1-score   support

           0       0.73      0.99      0.84       196
           1       1.00      0.76      0.86       298

    accuracy                           0.85       494
   macro avg       0.86      0.88      0.85       494
weighted avg       0.89      0.85      0.85       494



In [207]:
df1 = pd.DataFrame(y_test)

In [210]:
df1['pred'] = y_pred_list

In [238]:
df1.tail(20)

Unnamed: 0_level_0,action,pred
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-27,1,1.0
2021-09-28,1,1.0
2021-09-29,1,1.0
2021-09-30,1,1.0
2021-10-01,1,1.0
2021-10-02,1,1.0
2021-10-03,1,1.0
2021-10-04,1,1.0
2021-10-05,1,1.0
2021-10-06,1,0.0
