In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
def split_seq(a,split_length):
    x=np.zeros((split_length-1,a.shape[1]))    
    x=np.vstack([x,a])
    sales_prev=[]

    for i in range(split_length,x.shape[0]+1): 
        sales_prev.append(x[i-split_length:i])
    
    return np.array((sales_prev))

In [None]:
split_len=8

stocks_lists = pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/stock_list.csv")
stock_prices = pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")

train_x = stock_prices.pivot(index=['Date'], columns=['SecuritiesCode'], values=['Open', 'High', 'Low', 'Close', 'Volume'])
train_y = stock_prices.pivot(index=['Date'], columns=['SecuritiesCode'], values=['Target'])



for column in train_x:
    train_x[column]=train_x[column].fillna(method='bfill')
    train_x[column]=train_x[column].fillna(method='ffill')
    
for column in train_y:
    train_y[column]=train_y[column].fillna(method='bfill')
    train_y[column]=train_y[column].fillna(method='ffill')
    
print(train_y.isnull().sum().sum())

display(train_y)
display(train_x)

In [None]:
display(train_x)

scaler = StandardScaler()

for column in train_x:
    train_x[column] = scaler.fit_transform(train_x[column].values.reshape(-1,1))

scaler2 = MinMaxScaler(feature_range=(-1, 1))

#for column in train_y:
#   train_y[column] = scaler2.fit_transform(train_y[column].values.reshape(-1,1))
    
train_x=split_seq(train_x,split_len)


stock_number = len(stock_prices["SecuritiesCode"].unique())
print(stock_number)
display(train_x.shape)

train_x=np.asarray(train_x)



In [None]:

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

In [None]:
def transformer_encoder(inp):

    x=layers.MultiHeadAttention(key_dim=1024, num_heads=3, dropout=0.25)(inp, inp)
    
    x=keras.layers.Dropout(0.25)(x)

    x = layers.LayerNormalization(epsilon=1e-6)(x)

    res = x + inp
    
    # Feed Forward Part
    
    x = layers.Conv1D(filters=64, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(0.25)(x)
    x = layers.Conv1D(filters=stock_number*5, kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    #x=x+res
    
    return x + res

def network():
    
    inp = layers.Input(shape=(split_len, stock_number*5,))
    
    x=inp
    
    for _ in range(8):
        x = transformer_encoder(x)

    
    x = layers.GlobalAveragePooling1D()(x)
    
    x=keras.layers.Dense(4096,activation="relu")(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    x=keras.layers.Dropout(0.3)(x)   
    
    x=keras.layers.Dense(6144,activation="relu")(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    x=keras.layers.Dropout(0.3)(x)
    
    x=keras.layers.Dense(8192,activation="relu")(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    x=keras.layers.Dropout(0.3)(x)

    x=keras.layers.Dense(2048,activation="relu")(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    x=keras.layers.Dropout(0.3)(x)
    out=keras.layers.Dense(stock_number)(x)
    
    model = keras.Model(inputs=inp, outputs=out)
    return model


print(train_x.shape)

print(train_y.shape)

model=network()
model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(learning_rate=0.0005),metrics=[tf.keras.metrics.CategoricalAccuracy()])


train_x, ttest_x, train_y, ttest_y = train_test_split(train_x, train_y, test_size=0.02,shuffle=False)
display(model.summary())
history=model.fit(x=train_x, y=train_y, batch_size=64 ,epochs=1000 ,validation_split=0.1)

In [None]:
class Evaluator(object):
    """JPX sharpe ratio evaluator.

    The evaluator is used to evaluate performance of the model on a
    single dataset (e.g., training set, validation set). Moreover, to
    facilitate the prediction analysis, rolling sharpe ratio can be
    derived. For running evaluation (e.g., evaluation on one epoch), 
    rolling sharpe ratio derivation can be disabled.

    Parameters:
        derive_rolling_sr: whether to derive rolling sharpe ratio
    """

    WTS: np.ndarray = np.linspace(2, stop=1, num=200)
    WTS_MEAN: float = np.mean(WTS)
    ddsr: pd.DataFrame
    rolling_sr: pd.DataFrame

    def __init__(self, derive_rolling_sr: bool = False):
        self.derive_rolling_sr = derive_rolling_sr
        self.ddsr = None
        self.rolling_sr = None

    def evaluate(self, pred: pd.DataFrame, window: int = 60) -> float:
        """Run evalution.

        Parameters:
            pred: prediction results on different datasets following
                  the format of `sample_submission.csv`
            window: size of sliding window to derive rolling sr

        Return:
            sr: sharpe ratio
        """
        pred = pred.sort_values(["Date"])
        self.ddsr = pred.groupby("Date").apply(self._derive_daily_spread_return)
        sr = self._derive_overall_sr()
        if self.derive_rolling_sr:
            self.rolling_sr = self._derive_rolling_sr(window)

        return sr

    def get_daily_spread_return(self) -> pd.DataFrame:
        """Return daily spread return for current fold.

        Return:
            self.ddsr: daily spread return
        """
        return self.ddsr

    def get_rolling_sr(self) -> pd.DataFrame:
        """Return rolling sharpe ratio for current fold.

        Return:
            self.rolling_sr: rolling sharpe ratio
        """
        return self.rolling_sr

    def _derive_daily_spread_return(self, pred_date: pd.DataFrame) -> float:
        """Derive daily spread return for one date.

        Parameters:
            pred_date: prediction for one date

        Return:
            daily_spread_return: daily spread return
        """
        pred_date.sort_values("Rank", inplace=True)
        s_up = np.dot(pred_date[:200]["Target"], Evaluator.WTS) / Evaluator.WTS_MEAN
        s_down = (
            np.dot(pred_date[-200:]["Target"][::-1], Evaluator.WTS) / Evaluator.WTS_MEAN
        )
        daily_spread_return = s_up - s_down

        return daily_spread_return

    def _derive_rolling_sr(self, window: int = 60) -> pd.DataFrame:
        """Derive rolling sharpe ratio.

        Parameters:
            window: size of sliding window to derive rolling sr

        Return:
            rolling_sr: rolling sharpe ratio
        """
        rolling_sr = self.ddsr.rolling(window).mean() / self.ddsr.rolling(window).std()

        return rolling_sr

    def _derive_overall_sr(self) -> float:
        """Derive overall sharpe ratio for different datasets.

        Return:
            sr: overall sharpe ratio
        """
        sr = self.ddsr.mean() / self.ddsr.std()

        return sr


In [None]:
test_x = stock_prices.pivot(index=['Date'], columns=['SecuritiesCode'], values=['Open', 'High', 'Low', 'Close', 'Volume'])



import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files


for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    prices = prices.pivot(index=['Date'], columns=['SecuritiesCode'], values=['Open', 'High', 'Low', 'Close', 'Volume'])
    test_x=test_x.append(prices)
    display(test_x)
    test_xtmp=test_x.copy()
    for column in test_xtmp:
        test_xtmp[column]=test_xtmp[column].fillna(method='bfill')
        test_xtmp[column]=test_xtmp[column].fillna(method='ffill')
    

    for column in test_xtmp:
        test_xtmp[column] = scaler.transform(test_xtmp[column].values.reshape(-1,1))

    test_xtmp=split_seq(test_xtmp,split_len)

    
    test_xtmp = np.expand_dims(test_xtmp[-1],axis=0)
    print(test_xtmp.shape)
    predictions=model.predict(test_xtmp)
    
    
    sorted_arr = predictions.argsort(axis=-1)
    sorted_arr=np.fliplr(sorted_arr).reshape(-1)

    rank_idx = np.empty_like(sorted_arr)
    rank_idx[sorted_arr] = np.arange(len(rank_idx))
    
    
    print("RANK")
    print(rank_idx)
    rank_sec=rank_idx#[::-1]
    
    print(prices)
    print(sample_prediction)
    sample_prediction['Rank'] = rank_sec


    #sample_prediction_df['Rank'] = np.arange(len(sample_prediction))  # make your predictions here
    scr=env.predict(sample_prediction)   # register your predictions
    print(scr)

In [None]:
sample_prediction['Target'] = predictions.reshape(2000)
evaluator = Evaluator(derive_rolling_sr=True)

sr = evaluator.evaluate(sample_prediction, window=1)
print(sample_prediction.isna().sum())
print(sr)
display(sample_prediction)
print(calc_spread_return_sharpe(sample_prediction))
print(sample_prediction.dtypes)

from matplotlib import pyplot as plt
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train'], loc='upper left')
plt.show()

plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['val'], loc='upper left')
plt.show()

testResults=model.evaluate(ttest_x, ttest_y)

testpredicts=model.predict(ttest_x).reshape(-1)

print(ttest_y['Target', 1301].index)
plt.plot(ttest_y['Target', 1301].values,'o-')
plt.legend(['real'], loc='upper left')
plt.show()
plt.plot(testpredicts[0::2000],'o-')
plt.show()
