In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle

In [None]:
stock_prices = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv', index_col=False)
print(f"Original stock prices: {len(stock_prices)}")
cleaned_stock_prices = stock_prices[stock_prices['Close']. notna()]
print(f"Cleaned stock prices have: {len(cleaned_stock_prices)}")

Get the training mean and std.

In [None]:
with open ('../input/jpx-gru-tensorflow-baseline-no-leak/windowed_train_data90', 'rb') as fp:
    train_windowed_data = pickle.load(fp)
train_d = np.array(train_windowed_data)
t_mean = train_d.mean()
t_std = train_d.std()
print(f"Train mean: {t_mean}")
print(f"Train std: {t_std}")

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
window_size = 90
batch_size = 2048
OUT_STEPS = 2   # predict next 2 days

In [None]:
model = tf.keras.Sequential([tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[window_size]),
                                tf.keras.layers.GRU(256, return_sequences=True),
                                tf.keras.layers.GRU(256, return_sequences=True),
                                tf.keras.layers.GRU(512, return_sequences=True),
                                tf.keras.layers.GRU(512, return_sequences=True),
                                tf.keras.layers.GRU(256, return_sequences=True),
                                tf.keras.layers.GRU(256, return_sequences=True),
                                tf.keras.layers.GRU(128, return_sequences=True),
                                tf.keras.layers.GRU(128),
                                # tf.keras.layers.Dropout(0.1),
                                tf.keras.layers.Dense(512, activation='relu'),
                                tf.keras.layers.Dense(256, activation='relu'),
                                tf.keras.layers.Dense(128, activation='relu'),
                                tf.keras.layers.Dense(64, activation='relu'),
                                tf.keras.layers.Dense(32, activation='relu'),
                                # tf.keras.layers.Dropout(0.1),
                                tf.keras.layers.Dense(OUT_STEPS*1),
                                tf.keras.layers.Reshape([OUT_STEPS, 1])])
model.summary()
model.load_weights('../input/jpx-gru-tensorflow-baseline-no-leak/model/mymodel')
model.compile(optimizer=tf.keras.optimizers.Adam(),
             loss=tf.keras.losses.MeanSquaredError(),
             metrics=['mae'])

In [None]:
def windowed_test(series, batch_size, shift=1):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size, shift=shift, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset

In [None]:
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files

In [None]:
tag = 0
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    if tag == 0:
        train_series_for_predict = cleaned_stock_prices[cleaned_stock_prices['Date'] > '2021-01-01']
        train_series_for_predict['Close'] = (train_series_for_predict['Close'] - t_mean)/t_std
        target_series = train_series_for_predict
        tag = 1
    cleaned_prices = prices[prices['Close'].notna()]
    cleaned_prices['Close'] = (cleaned_prices['Close'] - t_mean)/t_std
    target_series = target_series.append(cleaned_prices)
    new_securities = target_series.groupby('SecuritiesCode')['Close'].apply(list)
    security_code = []
    next_day = []
    next2_day = []
    for idx, value in new_securities.items():
        s_id = idx
        if len(value) < window_size:
            sv_padded = np.zeros(window_size)
            sv_padded[-len(value):] = value
            s_v = sv_padded
        else:
            s_v = value[-window_size:]
        s_input = windowed_test(s_v, 1)
        pred = model.predict(s_input).squeeze()
        security_code.append(s_id)
        next_day.append(pred[0]*t_std + t_mean)
        next2_day.append(pred[1]*t_std + t_mean)
    tuple_list = list(zip(security_code, next_day, next2_day))
    next_day_df = pd.DataFrame(tuple_list, columns=['SecuritiesCode', 'NextDay', "Next2Day"])
    new_df = pd.merge(prices, next_day_df, on="SecuritiesCode")
    new_df['Target'] = (new_df['Next2Day'] - new_df['NextDay']) / new_df['NextDay']
    new_df['Rank'] = new_df['Target'].rank(ascending=False, method='first') - 1
    new_df = new_df.sort_values('Rank').reset_index(drop=True)
    new_df['Rank'] = new_df['Rank'].astype('int')
    rankdict = dict(zip(new_df["SecuritiesCode"],new_df["Rank"]))
    sample_prediction['Rank'] = sample_prediction["SecuritiesCode"].map(rankdict)
    env.predict(sample_prediction)