In [None]:
import seaborn as sns
from datetime import datetime
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



# 1. Read the Data

Read the datasets from 'g-research-crypto-forecasting datasets

In [None]:
train_data = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')
asset_ID = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/asset_details.csv')

In [None]:
asset_ID

# **2. Data exploration**
## **Take the Bitcoin data**
Take the each Asset_ID data, the Bitcoin is Asset_ID == 1

In [None]:
Bitcoin_Cash = train_data[train_data['Asset_ID'] == 2]
Binance_Coin = train_data[train_data['Asset_ID'] == 0]
Bitcoin = train_data[train_data['Asset_ID'] == 1]
EOS_IO = train_data[train_data['Asset_ID'] == 5]
Ethereum_Classic = train_data[train_data['Asset_ID'] == 6]
Ethereum = train_data[train_data['Asset_ID'] == 6]
Litecoin = train_data[train_data['Asset_ID'] == 9]
Monero = train_data[train_data['Asset_ID'] == 11]
TRON = train_data[train_data['Asset_ID'] == 13]
Stellar = train_data[train_data['Asset_ID'] == 12]
Cardano = train_data[train_data['Asset_ID'] == 3]
IOTA = train_data[train_data['Asset_ID'] == 8]
Maker = train_data[train_data['Asset_ID'] == 10]
Dogecoin = train_data[train_data['Asset_ID'] == 4]

# **3. Data Clean**
Add the row which is NaN (More information can search the G-Research Crypto Forecasting notebook)

In [None]:
Bitcoin_Cash = Bitcoin_Cash.reindex(range(Bitcoin_Cash.index[0], Bitcoin_Cash.index[-1], 60), method = 'pad')
Binance_Coin = Binance_Coin.reindex(range(Binance_Coin.index[0], Binance_Coin.index[-1], 60), method = 'pad')
Bitcoin = Bitcoin.reindex(range(Bitcoin.index[0], Bitcoin.index[-1], 60), method = 'pad')
EOS_IO = EOS_IO.reindex(range(EOS_IO.index[0], EOS_IO.index[-1], 60), method = 'pad')
Ethereum_Classic = Ethereum_Classic.reindex(range(Ethereum_Classic.index[0], Ethereum_Classic.index[-1], 60), method = 'pad')
Ethereum = Ethereum.reindex(range(Ethereum.index[0], Ethereum.index[-1], 60), method = 'pad')
Litecoin = Litecoin.reindex(range(Litecoin.index[0], Litecoin.index[-1], 60), method = 'pad')
Monero = Monero.reindex(range(Monero.index[0], Monero.index[-1], 60), method = 'pad')
TRON = TRON.reindex(range(TRON.index[0], TRON.index[-1], 60), method = 'pad')
Stellar = Stellar.reindex(range(Stellar.index[0], Stellar.index[-1], 60), method = 'pad')
Cardano = Cardano.reindex(range(Cardano.index[0], Cardano.index[-1], 60), method = 'pad')
IOTA = IOTA.reindex(range(IOTA.index[0], IOTA.index[-1], 60), method = 'pad')
Maker = Maker.reindex(range(Maker.index[0], Maker.index[-1], 60), method = 'pad')
Dogecoin = Dogecoin.reindex(range(Dogecoin.index[0], Dogecoin.index[-1], 60), method = 'pad')

In [None]:
Bit_coin = Bitcoin.copy()

In [None]:
Bitcoin = Bitcoin.Close.fillna(method = 'pad')

# **4. Data Preprocessing for Deep Learning Model**
## Split the train_data and test_data
I prepare the bit_train_data and bit_test_data.
the first 8 of 19 is bit_train_data, remaining the data as bit_test_data


In [None]:
import math
Bitcoin_Target = Bitcoin.values
train_len = math.ceil(len(Bitcoin_Target) * 0.8)

In [None]:
Bitcoin_Target = Bitcoin_Target.reshape(-1, 1)

## Scaler the train, test data

I ues the MinMaxScaler method to scale the data

In [None]:
from sklearn.preprocessing import MinMaxScaler
Normalize = MinMaxScaler(feature_range=(0, 1))
Bitcoin_Target = Normalize.fit_transform(Bitcoin_Target)

In [None]:
bit_train_data, bit_test_data = Bitcoin_Target[0:train_len, :], Bitcoin_Target[train_len:len(Bitcoin_Target), :1]

## Prepare the Time series data
The data from No.0 to No.99 is x_train and predicting No.100's data is y_train

In [None]:
x_train = []
y_train = []
for i in range(100, len(bit_train_data)):
   x_train.append(bit_train_data[i-100:i, 0])
   y_train.append(bit_train_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)

The data from No.0 to No.99 is x_test and predicting No.100's data is y_test

In [None]:
x_test = []
y_test = []
for i in range(100, len(bit_test_data)):
   x_test.append(bit_test_data[i-100:i, 0])
   y_test.append(bit_test_data[i, 0])
x_test, y_test = np.array(x_test), np.array(y_test)

In [None]:
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

In [None]:
x_train.shape

# **5. Deep Learning LSTM Model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Conv1D, MaxPooling1D, TimeDistributed, Flatten
from tensorflow.keras.utils import plot_model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(25))
model.add(Dense(1))
model.compile(loss = 'mse', optimizer = 'adam')
history = model.fit(x_train,y_train, batch_size = 512 , epochs = 30)

In [None]:
sns.set()
df_history = pd.DataFrame(history.history)
sns.lineplot(x = df_history.index, y = df_history.loss)

# **6. The Comparsion between Reality and Pridiction data**

#### After MinMaxScaler comparsion between prediction(y_pred) and reality(y_test)

In [None]:
import matplotlib.pyplot as plt
y_pred = model.predict(x_test)
plt.scatter(y_test, y_pred)
plt.plot([x for x in range(2)], [x for x in range(2)], color = 'r')
plt.xlabel("Reality MinMax")
plt.ylabel("Predicted MinMax")
plt.title('Bitcoin')
plt.show()
plt.clf()

#### No MinMaxScaler comparsion between prediction(y_pred) price and reality(y_test) price

In [None]:
y_pred = model.predict(x_test)
y_test = y_test.reshape(-1, 1)
y_pred = Normalize.inverse_transform(y_pred)
y_test = Normalize.inverse_transform(y_test)
y_pred = y_pred.reshape(len(y_pred), 1)
y_test = y_test.reshape(len(y_pred), 1)

In [None]:
plt.scatter(y_test, y_pred)
plt.plot([10000*x for x in range(2, 8)], [10000*x for x in range(2, 8)], color = 'r')
plt.xlabel("Reality Prices")
plt.ylabel("Predicted prices")
plt.title('Bitcoin')
plt.show()
plt.clf()

In [None]:
df_pred = pd.DataFrame(y_pred, columns = ['Prediction'], index = Bitcoin.index[train_len +100:])
df_pred['Reality'] = y_test

In [None]:
df_pred

In [None]:
def log_return(close):
    return np.log(close).diff(periods = 15)

In [None]:
Bitcoin_Target[train_len:len(Bitcoin_Target), :1]

In [None]:
df_pred['Log_return_prediction'] = log_return(df_pred['Prediction'])[15: ]
df_pred['Log_return_reality'] = log_return(df_pred['Reality'])[15: ]

In [None]:
plt.figure(figsize=(20,10))
plt.plot(df_pred['Log_return_prediction'], color = 'r' , label = 'Prediction', alpha = 0.5)
plt.plot(df_pred['Log_return_reality'], color = 'b', label = 'Reality', alpha = 0.5)
plt.title('Log_Return Between the Prediction and Reality')
plt.legend()
plt.show()

### the Bitcoin prediction and reality comparsion

In [None]:
plt.figure(figsize = (30, 15))
sns.lineplot(x = Bitcoin.index[-100000:], y = Bitcoin.values[-100000:], label = f'The Bitcoin Close')
sns.lineplot(x = df_pred.index, y = df_pred.Prediction, label = 'Prediction', color = 'r')
sns.lineplot(x = df_pred.index, y = df_pred.Reality, label = 'Reality', color = 'g')
plt.legend()
plt.show()