In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from sklearn.preprocessing import RobustScaler
# from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.model_selection import train_test_split
# import tensorflow_probability as tfp
# import gresearch_crypto
import gc

In [None]:
dir_path = '/kaggle/input/g-research-crypto-forecasting/'
train = pd.read_csv(dir_path + 'train.csv').set_index('timestamp')
assets = pd.read_csv(dir_path + 'asset_details.csv' )
assets_order = pd.read_csv(dir_path + 'supplemental_train.csv').Asset_ID[:14]
submission = pd.read_csv(dir_path + 'example_sample_submission.csv' )

## train data set

In [None]:
# train.info()
# assets.info()

In [None]:
# train.describe()

In [None]:
train = train[:1000000]

In [None]:
VWAP_MAX =np.max(train[np.isfinite(train.VWAP)].VWAP)
VWAP_MIN =np.min(train[np.isfinite(train.VWAP)].VWAP)
print(VWAP_MAX)
print(VWAP_MIN)

In [None]:
tmp_VWAP = np.nan_to_num(train.VWAP, posinf=VWAP_MAX, neginf=VWAP_MIN)
del train['VWAP']
train['VWAP'] = tmp_VWAP
train.shape

In [None]:
# print("Data NULL: \n", train.isnull().sum())

In [None]:
train['Target'] = train['Target'].fillna(0)

In [None]:
df = train[['Asset_ID','Target']].copy()
time = dict((i,j) for j,i in enumerate(df.index.unique()))
df['id'] = df.index.map(time)
# df

In [None]:
df['id'] = df['id'].astype(str) + '_' + df['Asset_ID'].astype(str)
# df

In [None]:
ids = df.id.copy()
del df

In [None]:
def add_features(df):
    df['upper'] = df['High'] - np.maximum(df['Close'],df['Open'])
    df['lower'] = np.minimum(df['Close'],df['Open'])-df['Low']
    df['range'] = df['High'] - df['Low']
    df['mean_trade'] = df['Volume']/df['Count']
    df['log_price_change'] = np.log(df['Close']/df['Open'])
    return df
train=add_features(train)
train.shape

In [None]:
# train.head()

In [None]:
features = train.columns.drop(['Asset_ID','Target'])
train[features] = RobustScaler().fit_transform(train[features])
train.head()

In [None]:
index = train.index.unique()
# for i in range(index[0],index[-1]+60,60):
#     print(i)
def reindex(df):
    df = df.reindex(range(index[0],index[-1]+60,60),method='nearest')
    df = df.fillna(method="ffill").fillna(method="bfill")
    return df
train=train.groupby('Asset_ID').apply(reindex).reset_index(0, drop=True).sort_index()
train.shape

In [None]:
#  把group_num and row_id 合併起來
train['group_num'] = train.index.map(time)
train = train.dropna(subset=['group_num'])
train['group_num'] = train['group_num'].astype('int')

# 標記產生出來的row為non-real
train['id'] = train['group_num'].astype(str) + '_' + train['Asset_ID'].astype(str)
train['is_real'] = train.id.isin(ids)*1
train = train.drop('id', axis=1)

In [None]:
# non_real 的設為0
features = train.columns.drop(['Asset_ID','group_num','is_real'])
train.loc[train.is_real==0, features]=0

In [None]:
# 將'supplemental_train.csv' 中的asset_order合併進來
train['asset_order'] = train.Asset_ID.map(assets_order)
train=train.sort_values(by=['group_num', 'asset_order'])
train.head(5)

In [None]:
# train.shape

In [None]:
y_train = train.Target.copy()
y_train.shape

In [None]:
x_train = train.drop(['Target'],axis = 1)
print(x_train.shape)
print(x_train.head(5))

In [None]:
x_train = x_train.to_numpy().reshape(-1,16,1)
x_train.shape

In [None]:
train_x, val_x, train_y, val_y = train_test_split(x_train, y_train, test_size = 0.25, random_state=2)

In [None]:
# model = keras.models.Sequential([
#       keras.layers.LSTM(64,return_sequences=True,input_shape=(16,1)),
#       keras.layers.ReLU(),
#       keras.layers.Dropout(0.2),
#       # keras.layers.LSTM(40,return_sequences=True, activation='relu'),
#       # keras.layers.Dropout(0.5),
#       keras.layers.LSTM(32),

#       keras.layers.Dropout(0.5),
#       keras.layers.Dense(1,activation = 'linear' )#activation = "linear"
# ])
# model.summary()

In [None]:
from keras.layers import Dense, LSTM, Dropout, GRU
from tensorflow.keras.optimizers import RMSprop
# The GRU architecture
regressorGRU = Sequential()
# First GRU layer with Dropout regularisation
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(train_x.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Second GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(train_x.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Third GRU layer
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(train_x.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
# Fourth GRU layer
regressorGRU.add(GRU(units=50, activation='tanh'))
regressorGRU.add(Dropout(0.2))
# The output layer
regressorGRU.add(Dense(units=1))

In [None]:
# regressorGRU.compile(optimizer=SGD(lr=0.001, decay=1e-7, momentum=0.9, nesterov=False),loss='mse', metrics = ['mae'])
regressorGRU.compile(optimizer=RMSprop(lr=0.001),loss='mse', metrics = ['mae'])
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss",mode="min",patience=5)

In [None]:
gru = regressorGRU.fit(x_train,y_train, batch_size=64,epochs=5,validation_data=(val_x,val_y), callbacks=[early_stopping], shuffle=True)

## test data set

In [None]:
test = pd.read_csv(dir_path + 'example_test.csv').set_index('timestamp')
Row_Id = test['row_id']
Group_Num = test['group_num']

In [None]:
VWAP_MAX =np.max(test[np.isfinite(test.VWAP)].VWAP)
VWAP_MIN =np.min(test[np.isfinite(test.VWAP)].VWAP)
tmp_VWAP = np.nan_to_num(test.VWAP, posinf=VWAP_MAX, neginf=VWAP_MIN)
del test['VWAP']
test['VWAP'] = tmp_VWAP
test.shape

In [None]:
df = test[['Asset_ID']].copy()
time = dict((i,j) for j,i in enumerate(df.index.unique()))
# df['id'] = df.index.map(time)
#
# df['id'] = df['id'].astype(str) + '_' + df['Asset_ID'].astype(str)
# ids = df.id.copy()
# del df

In [None]:
def add_features_test(df):
    df['upper'] = df['High'] - np.maximum(df['Close'],df['Open'])
    df['lower'] = np.minimum(df['Close'],df['Open'])-df['Low']
    df['range'] = df['High'] - df['Low']
    df['mean_trade'] = df['Volume']/df['Count']
    df['log_price_change'] = np.log(df['Close']/df['Open'])
    return df
test=add_features_test(test)
test.shape

In [None]:
features = test.columns.drop(['Asset_ID','row_id','group_num'])
test[features] = RobustScaler().fit_transform(test[features])

In [None]:
index = test.index.unique()
def reindex_test(df):
    df = df.reindex(range(index[0],index[-1]+60,60),method='nearest')
    df = df.fillna(method="ffill").fillna(method="bfill")
    return df
test=test.groupby('Asset_ID').apply(reindex_test).reset_index(0, drop=True).sort_index()
test.shape

In [None]:
#  把group_num and row_id 合併起來
test['group_num'] = test.index.map(time)
test = test.dropna(subset=['group_num'])
test['group_num'] = test['group_num'].astype('int')

# 標記產生出來的row為non-real
test['id'] = test['group_num'].astype(str) + '_' + test['Asset_ID'].astype(str)
test['is_real'] = test.id.isin(ids)*1
test = test.drop('id', axis=1)

In [None]:
# non_real 的設為0
features_test = test.columns.drop(['Asset_ID','group_num','is_real'])
test.loc[test.is_real==0, features_test]=0

In [None]:
test['asset_order'] = test.Asset_ID.map(assets_order)
test=test.sort_values(by=['asset_order'])
print(test.shape)

In [None]:
# print(test.head(20))

In [None]:
del test['row_id']
print(test.shape)

In [None]:
test = test.to_numpy().reshape(-1,16,1)
test.shape

In [None]:
test_y = np.mean(regressorGRU.predict(test),axis =1)
test_y = pd.Series(test_y,name="sales")
test_y

In [None]:
# print(Group_Num)
# print(Row_Id)

In [None]:
# Group_Num=Group_Num.reset_index(drop=True)
# Group_Num

In [None]:
# Row_Id = Row_Id.reset_index(drop = True)
# Row_Id

In [None]:
import gresearch_crypto
env = gresearch_crypto.make_env()   # initialize the environment
iter_test = env.iter_test()    # 

In [None]:
(test_df,sample_prediction_df) = next(iter_test)
sample_prediction_df['Target']= test_y
env.predict(sample_prediction_df)

In [None]:
predictions = test_y
# print(predictions)
submission.Target = predictions
print(submission)
submission.to_csv("submission.csv", index=False)