In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import random
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

The objective for this notebook is to provide a LSTM model to predict the Binance price and the change of Binance price. 
The model needs the X and Y data.
X: [X1,X2,X3] where X1 is the Binance Price in day one, X2 is the Binance price in two day ,...
Y: [X4] is the Binance Price in the fourth day. 
The idea is that the LSTM model recibe a sample of X and predict the price for de next day. For example, following the previous examples recibe the data of the 
first three days and predict the price for the fourth day.


In [None]:
data = pd.read_csv('/kaggle/input/binance-coin-data/Binance Coin - Historic data.csv')

In [None]:
data.head()

In [None]:
##Checking NaN values
data.isnull().values.any()

In [None]:
data_price = data['Price(in dollars)']
data_change = data['Change%']

In [None]:
plt.plot(data_price, label = 'Price Evolution')
plt.show()

In [None]:
data_change = data['Change%']
plt.plot(data_change, label = 'Changing Evolution')
plt.show()

In [None]:
data_price = np.array([[i] for i in data_price])
data_change = np.array([[i] for i in data_change])

In [None]:
#This function separate the data into two grups: X and Y. 
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(data_price)  #Data Normalization


seq_length = 6 ##Hiperparamter that determinates how many data goes into the LSTM model. 
#For example: if we have a seq_length = 2 the X data will be (X1,X2) where X1 is the price in the day one and X2 the price in the day two. The Y value for this sample will be the third day or X3. 
#That separation of data in X and Y samples take place in the sliding_windows function
x, y = sliding_windows(training_data, seq_length)

train_size = int(len(y) * 0.70) #70% train
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

random_indexs_train = random.sample(range(0,len(x)),train_size)
random_indexs_test = [i for i in range(len(x)) if i not in random_indexs_train]

trainX = Variable(torch.Tensor(np.array(x[random_indexs_train])))
trainY = Variable(torch.Tensor(np.array(y[random_indexs_train])))

testX = Variable(torch.Tensor(np.array(x[random_indexs_test])))
testY = Variable(torch.Tensor(np.array(y[random_indexs_test])))


Model


In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

Training

In [None]:
num_epochs = 1500
learning_rate = 0.0075

input_size = 1
hidden_size = 8
num_layers = 1
num_classes = 1
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
#Criterion default:  MSE
#Optimizer default: Adam
criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
  outputs = lstm(trainX)
  optimizer.zero_grad()
  
  # obtain the loss function
  loss = criterion(outputs, trainY)
  
  loss.backward()
  
  optimizer.step()
  if epoch % 100 == 0:
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

Evalutation

In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)



plt.plot(dataY_plot)
plt.plot(data_predict)
plt.suptitle('Price prediction')
plt.show()

Training the model to predict the change

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(data_change)


seq_length = 6 ##Hiperparamter that determinates how many data goes into the LSTM model. 
#For example: if we have a seq_length = 2 the X data will be (X1,X2) where X1 is the price in the day one and X2 the price in the day two. The Y value for this sample will be the third day or X3. 
#That separation of data in X and Y samples take place in the sliding_windows function
x, y = sliding_windows(training_data, seq_length)

train_size = int(len(y) * 0.70) #70% train
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

random_indexs_train = random.sample(range(0,len(x)),train_size)
random_indexs_test = [i for i in range(len(x)) if i not in random_indexs_train]

trainX = Variable(torch.Tensor(np.array(x[random_indexs_train])))
trainY = Variable(torch.Tensor(np.array(y[random_indexs_train])))

testX = Variable(torch.Tensor(np.array(x[random_indexs_test])))
testY = Variable(torch.Tensor(np.array(y[random_indexs_test])))

In [None]:
num_epochs = 2500
learning_rate = 0.01

input_size = 1
hidden_size = 10
num_layers = 1
num_classes = 1
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
#Criterion default:  MSE
#Optimizer default: Adam
criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
  outputs = lstm(trainX)
  optimizer.zero_grad()
  
  # obtain the loss function
  loss = criterion(outputs, trainY)
  
  loss.backward()
  
  optimizer.step()
  if epoch % 100 == 0:
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)



plt.plot(dataY_plot)
plt.plot(data_predict)
plt.suptitle('Change prediction')
plt.show()