In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from sklearn.preprocessing import RobustScaler
# from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.model_selection import train_test_split
# import tensorflow_probability as tfp
import gresearch_crypto
import gc


In [None]:
dir_path = '/kaggle/input/g-research-crypto-forecasting/'
train = pd.read_csv(dir_path + 'train.csv').set_index('timestamp')
assets = pd.read_csv(dir_path + 'asset_details.csv')
assets_order = pd.read_csv(dir_path + 'supplemental_train.csv').Asset_ID[:14]

## train data set

In [None]:
np.random.seed(1)
tf.random.set_seed(1)

In [None]:
train = train[:1000000]

In [None]:
VWAP_MAX =np.max(train[np.isfinite(train.VWAP)].VWAP)
VWAP_MIN =np.min(train[np.isfinite(train.VWAP)].VWAP)
print(VWAP_MAX)
print(VWAP_MIN)

In [None]:
tmp_VWAP = np.nan_to_num(train.VWAP, posinf=VWAP_MAX, neginf=VWAP_MIN)
del train['VWAP']
train['VWAP'] = tmp_VWAP
train.shape

In [None]:
# print("Data NULL: \n", train.isnull().sum())

In [None]:
train['Target'] = train.groupby(['Asset_ID'], sort=False)['Target'].apply(lambda x: x.fillna(x.mean()))

In [None]:
df = train[['Asset_ID','Target']].copy()
time = dict((i,j) for j,i in enumerate(df.index.unique()))
df['id'] = df.index.map(time)
# df

In [None]:
df['id'] = df['id'].astype(str) + '_' + df['Asset_ID'].astype(str)
# df

In [None]:
ids = df.id.copy()
del df

In [None]:
def add_features(df):
    df['upper'] = df['High'] - np.maximum(df['Close'],df['Open'])
    df['lower'] = np.minimum(df['Close'],df['Open'])-df['Low']
    df['range'] = df['High'] - df['Low']
    df['mean_trade'] = df['Volume']/df['Count']
    df['log_price_change'] = np.log(df['Close']/df['Open'])
    return df
train=add_features(train)
train.shape

In [None]:
# train.head()

In [None]:
features = train.columns.drop(['Asset_ID','Target'])
train[features] = RobustScaler().fit_transform(train[features])
train.head()

In [None]:
index = train.index.unique()
# for i in range(index[0],index[-1]+60,60):
#     print(i)
def reindex(df):
    df = df.reindex(range(index[0],index[-1]+60,60),method='nearest')
    df = df.fillna(method="ffill").fillna(method="bfill")
    return df
train=train.groupby('Asset_ID').apply(reindex).reset_index(0, drop=True).sort_index()
train.shape

In [None]:
#  把group_num and row_id 合併起來
train['group_num'] = train.index.map(time)
train = train.dropna(subset=['group_num'])
train['group_num'] = train['group_num'].astype('int')

# 標記產生出來的row為non-real
train['id'] = train['group_num'].astype(str) + '_' + train['Asset_ID'].astype(str)
train['is_real'] = train.id.isin(ids)*1
train = train.drop('id', axis=1)

In [None]:
# non_real 的設為0
features = train.columns.drop(['Asset_ID','group_num','is_real'])
train.loc[train.is_real==0, features]=0

In [None]:
# 將'supplemental_train.csv' 中的asset_order合併進來
train['asset_order'] = train.Asset_ID.map(assets_order)
train=train.sort_values(by=['group_num', 'asset_order'])
train.head(5)

In [None]:
y_train = train.Target.copy()
y_train.shape

In [None]:
x_train = train.drop(['Target','Asset_ID','Open','High','Low','is_real','group_num','asset_order','Close','upper','lower'],axis = 1)
print(x_train.shape)
print(x_train.head(5))

In [None]:
x_train = x_train.to_numpy().reshape(-1,6,1)
x_train.shape

In [None]:
train_x, val_x, train_y, val_y = train_test_split(x_train, y_train, test_size = 0.25, random_state=2)

In [None]:
model = keras.models.Sequential([
      keras.layers.LSTM(64,return_sequences=True,input_shape=(6,1)),
      keras.layers.ReLU(),
      keras.layers.Dropout(0.2),
      keras.layers.LSTM(32,return_sequences=True, activation='relu'),
      keras.layers.Dropout(0.5),
      keras.layers.LSTM(32),

      keras.layers.Dropout(0.5),
      keras.layers.Dense(1)#activation = "linear"
])
model.summary()

In [None]:
# from keras.layers import Dense, LSTM, Dropout, GRU
# from keras.models import Sequential
# from tensorflow.keras.optimizers import SGD
# # The GRU architecture
# regressorGRU = Sequential()
# # First GRU layer with Dropout regularisation
# regressorGRU.add(GRU(units=64, return_sequences=True, input_shape=(train_x.shape[1],1), activation='ReLU'))
# regressorGRU.add(Dropout(0.2))
# # Second GRU layer
# regressorGRU.add(GRU(units=32, input_shape=(train_x.shape[1],1)))
# regressorGRU.add(Dropout(0.5))
# # # Third GRU layer
# # regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(train_x.shape[1],1), activation='tanh'))
# # regressorGRU.add(Dropout(0.2))
# # # Fourth GRU layer
# # regressorGRU.add(GRU(units=50, activation='tanh'))
# # regressorGRU.add(Dropout(0.2))
# # The output layer
# regressorGRU.add(Dense(units=1))


In [None]:
from keras.metrics import accuracy
# optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer = optimizer, loss='mse', metrics = ['mae'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                          mode="min",
                          patience=5)
model.summary()

In [None]:
# from keras.metrics import accuracy
# from tensorflow.keras import layers, Model
# # optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
# # model = LSTM_model()
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
# model.compile(optimizer = optimizer, loss='mse', metrics = ['mae'])

# early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
#                           mode="min",
#                           patience=5)
# # model.summary()

In [None]:
history=model.fit(x_train,y_train, batch_size=64,epochs=5,validation_data=(val_x,val_y), callbacks=[early_stopping], shuffle=True)

In [None]:
# Diffining Figure
import matplotlib.pyplot as plt
figure = plt.figure(figsize=(20,7))

#Adding Subplot 1 (For Accuracy)
figure.add_subplot(121)

plt.plot(history.epoch,history.history['mae'],label = "mae") # Accuracy curve for training set
plt.plot(history.epoch,history.history['val_mae'],label = "val_mae") # Accuracy curve for validation set

plt.title("MAE Curve",fontsize=18)
plt.xlabel("Epochs",fontsize=15)
plt.ylabel("MAE",fontsize=15)
plt.grid(alpha=0.3)
plt.legend()

#Adding Subplot 1 (For Loss)
figure.add_subplot(122)

plt.plot(history.epoch,history.history['loss'],label="loss") # Loss curve for training set
plt.plot(history.epoch,history.history['val_loss'],label="val_loss") # Loss curve for validation set

plt.title("Loss Curve",fontsize=18)
plt.xlabel("Epochs",fontsize=15)
plt.ylabel("Loss",fontsize=15)
plt.grid(alpha=0.3)
plt.legend()

plt.show()

## test data set

In [None]:
def add_features_test(df):
    df['upper'] = df['High'] - np.maximum(df['Close'],df['Open'])
    df['lower'] = np.minimum(df['Close'],df['Open'])-df['Low']
    df['range'] = df['High'] - df['Low']
    df['mean_trade'] = df['Volume']/df['Count']
    df['log_price_change'] = np.log(df['Close']/df['Open'])
    return df

def reindex_test(df):
    df = df.reindex(range(index[0],index[-1]+60,60),method='nearest')
    df = df.fillna(method="ffill").fillna(method="bfill")
    return df

In [None]:
import gresearch_crypto
env = gresearch_crypto.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission

In [None]:
for i, (test, submission) in enumerate(iter_test): 
#     submission['Target'] = test_y[i*14:i*14+14]  # make your predictions here
    VWAP_MAX =np.max(test[np.isfinite(test.VWAP)].VWAP)
    VWAP_MIN =np.min(test[np.isfinite(test.VWAP)].VWAP)
    tmp_VWAP = np.nan_to_num(test.VWAP, posinf=VWAP_MAX, neginf=VWAP_MIN)
    del test['VWAP']
    test['VWAP'] = tmp_VWAP
#     print(test.shape)
    df = test[['Asset_ID']].copy()
    time = dict((i,j) for j,i in enumerate(df.index.unique()))
    df['id'] = df.index.map(time)

    df['id'] = df['id'].astype(str) + '_' + df['Asset_ID'].astype(str)
    ids = df.id.copy()
    del df
    
    test=add_features_test(test)
#     print(test.shape)
    
    features = test.columns.drop(['Asset_ID','row_id'])
    test[features] = RobustScaler().fit_transform(test[features])
    
    index = test.index.unique()
    test=test.groupby('Asset_ID').apply(reindex_test).reset_index(0, drop=True).sort_index()
#     print(test.shape)
    
    #  把group_num and row_id 合併起來
    test['group_num'] = test.index.map(time)
    test = test.dropna(subset=['group_num'])
    test['group_num'] = test['group_num'].astype('int')

    # 標記產生出來的row為non-real
    test['id'] = test['group_num'].astype(str) + '_' + test['Asset_ID'].astype(str)
    test['is_real'] = test.id.isin(ids)*1
    test = test.drop('id', axis=1)
    
    # non_real 的設為0
    features_test = test.columns.drop(['Asset_ID','group_num','is_real'])
    test.loc[test.is_real==0, features_test]=0
    
    test['asset_order'] = test.Asset_ID.map(assets_order)
    test=test.sort_values(by=['asset_order'])
#     print(test.shape)
    
    del test['row_id']
    del test['timestamp']
#     print(test.shape)
    test = test.drop(['Asset_ID','Open','High','Low', 'Close','group_num','is_real','asset_order','upper','lower'],axis = 1)
#     print(test.shape)
#     print(test.info())
    test = test.to_numpy().reshape(-1,6,1)
#     print(test.shape)

    test_y = np.mean(model.predict(test),axis =1)
#     submission['Target'] = 0
    submission['Target'] = test_y
    print(submission)
    env.predict(submission)   # register your predictions