# <div style="padding:20px;color:white;margin:0;font-size:100%;text-align:left;display:fill;border-radius:5px;background-color:#504EFF;overflow:hidden"> Demo on Submission for the JPX Stock Prediction Competition </div>



Check the source data path

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# <div style="padding:20px;color:white;margin:0;font-size:100%;text-align:left;display:fill;border-radius:5px;background-color:#504EFF;overflow:hidden"> Submission </div>

## Load the Raw Data


The local data is assumed to be available during testing period

In [None]:
old_price_total_df = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv')

new_price_total_df = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv')


In [None]:
price_total_df = pd.concat([old_price_total_df,new_price_total_df])

price_total_df = price_total_df.loc[price_total_df['Date']>'2021']

In [None]:
# inspect the price data
price_total_df.tail()

In [None]:
trading_dates = np.array(sorted(price_total_df['Date'].unique()))
stock_ids = np.array(sorted(price_total_df['SecuritiesCode'].unique()))

temp_mat = pd.DataFrame(np.nan, index=stock_ids, columns=trading_dates)

def create_factor(item, temp_mat=temp_mat):
    output_mat = pd.pivot_table(price_total_df,
                                values=item,
                                index='SecuritiesCode', columns='Date')
    
    output_factor = temp_mat.copy()
    output_factor.loc[output_mat.index, output_mat.columns] = output_mat.values

    return output_factor

close_factor = create_factor('Close')

# share split adjustment 
adj_factor = create_factor('AdjustmentFactor')

adjusted_factor = adj_factor.iloc[:,::-1].cumprod(axis=1).iloc[:,::-1].fillna(axis=1,
                                                                              method='bfill')

adjusted_factor  = adjusted_factor.T.div(adjusted_factor.iloc[:,-1]).T

close_adj_factor = adjusted_factor*close_factor

rtn_mat = close_adj_factor.pct_change(1,axis=1)


## Baseline Model: 5-Day Reversal Factor

In [None]:
reversal_5d = -np.log(rtn_mat.T+1).rolling(5).sum().T#.shift(2, axis=1)


Access the features from test set

In [None]:
import jpx_tokyo_market_prediction

env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

counter = 0


for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    pred_dt = prices['Date'][0]
    #print(" => {} Now generating ranking at date [{}]".format(counter+1, pred_dt))
    pred_stocks = sample_prediction['SecuritiesCode'].values
    
    #### Generating the Ranking Signal ####
    # access the predict 
    signal = reversal_5d.loc[pred_stocks,pred_dt]
    signal[signal.isna()] = 0
    # convert to rank
    pred_rank = signal.rank(method='first').astype(int)
    
    # assign the rank score: 0 -> 1999
    sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(pred_rank.to_dict()) 
    sample_prediction['Rank'] = (sample_prediction['Rank'].values-1).astype(int)
    
    assert sample_prediction["Rank"].notna().all()
    assert sample_prediction["Rank"].min() == 0
    assert sample_prediction["Rank"].max() == len(sample_prediction["Rank"]) - 1
    
    #### upload prediction ####
    env.predict(sample_prediction)
    ########################
    
    counter+=1
    
    

In [None]:
# ! tail submission.csv

# Evaluation

In [None]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio
