### After seeing [this notebook](https://www.kaggle.com/code/ikeppyo/examples-of-higher-scores-than-perfect-predictions), I wanted to see how far we can go. Basic idea is, since the final score = mean / std, as std approaches 0, score gets to infinity. Here, I just minimized the std as much as possible. This has no value at all so I suggest don't waste time trying with different parameters.

In [1]:
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings("ignore")

def calc_spread_return_per_day(df, portfolio_size=200, toprank_weight_ratio=2):
    assert df['Rank'].min() == 0
    assert df['Rank'].max() == len(df['Rank']) - 1
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
    short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
    return purchase - short

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size=200, toprank_weight_ratio=2):
    buf = df.groupby('Date').apply(calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio, buf

def add_rank(df):
    df["Rank"] = df.groupby("Date")["Target"].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df

def adjuster(ff, step=1, offset=95, cap=11.4):
    org_score = calc_spread_return_per_day(ff)
    if cap >= org_score: return ff.Rank.values
    for i in range(0, 2000, step):
        f, l = ff.index[i], ff.index[i+offset]
        ff.loc[f, "Rank"], ff.loc[l, "Rank"] = ff.loc[l, "Rank"], ff.loc[f, "Rank"]
        new_score = calc_spread_return_per_day(ff)
        if cap >= new_score:
            return ff.Rank.values

In [2]:
df = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv', parse_dates=["Date"])
df = add_rank(df)
df = df.sort_values(["Date", "Rank"])

for date in df.Date.unique():
    df.loc[df.Date==date, "Rank"] = adjuster(df[df.Date==date])

_, buf = calc_spread_return_sharpe(df)
buf.mean(), buf.std(), buf.mean() / buf.std(), buf.min(), buf.max()

(11.397067221625909,
 0.003997576775717838,
 2850.9939548514003,
 11.376374792663487,
 11.399967304242807)

In [3]:
def calc_spread_return_per_day_2(df, portfolio_size=200, toprank_weight_ratio=2):
    assert df['Rank'].min() == 0
    assert df['Rank'].max() == len(df['Rank']) - 1
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
    short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
    return purchase - short

def calc_spread_return_sharpe_2(df: pd.DataFrame, portfolio_size=200, toprank_weight_ratio=2):
    buf = df.groupby('Date').apply(calc_spread_return_per_day_2, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio, buf

In [4]:
# df.to_pickle("hoge.pkl")
# df = pd.read_pickle("hoge.pkl")

#df["Rank"] = df.groupby('Date')["Rank"].sample(frac=1).reset_index(drop=True)

tmp = calc_spread_return_sharpe_2(df, portfolio_size=200, toprank_weight_ratio=2)
print(tmp[0])
# df

2850.9939548514003


In [5]:
li = tmp[1].index.to_list()

In [6]:
# df = pd.read_pickle("hoge.pkl")

import random

for i in li:
    tmp = 12
    print(i)
    
    min_num = 11.37637479
    
    while min_num <= tmp:
        
        r1 = random.randint(1, 1900)
        r2 = random.randint(1, 1900)

        hoge = df[df["Date"]==i]

        rank_1 = hoge[r1:r1+1]
        r1_index = hoge[r1:r1+1].index[0]
        rank_1 = rank_1["Rank"].values[0]

        rank_2 = hoge[r1+5:r1+6]
        r2_index = hoge[r1+5:r1+6].index[0]
        rank_2 = rank_2["Rank"].values[0]

        df.loc[r1_index,"Rank"] = rank_2
        df.loc[r2_index,"Rank"] = rank_1
        
        tmp = calc_spread_return_per_day_2(df[df["Date"]==i])
#         print(tmp)

# calc_spread_return_per_day_2(df[df["Date"]=="2021-12-06"])

2021-12-06 00:00:00
2021-12-07 00:00:00
2021-12-08 00:00:00
2021-12-09 00:00:00
2021-12-10 00:00:00
2021-12-13 00:00:00
2021-12-14 00:00:00
2021-12-15 00:00:00
2021-12-16 00:00:00
2021-12-17 00:00:00
2021-12-20 00:00:00
2021-12-21 00:00:00
2021-12-22 00:00:00
2021-12-23 00:00:00
2021-12-24 00:00:00
2021-12-27 00:00:00
2021-12-28 00:00:00
2021-12-29 00:00:00
2021-12-30 00:00:00
2022-01-04 00:00:00
2022-01-05 00:00:00
2022-01-06 00:00:00
2022-01-07 00:00:00
2022-01-11 00:00:00
2022-01-12 00:00:00
2022-01-13 00:00:00
2022-01-14 00:00:00
2022-01-17 00:00:00
2022-01-18 00:00:00
2022-01-19 00:00:00
2022-01-20 00:00:00
2022-01-21 00:00:00
2022-01-24 00:00:00
2022-01-25 00:00:00
2022-01-26 00:00:00
2022-01-27 00:00:00
2022-01-28 00:00:00
2022-01-31 00:00:00
2022-02-01 00:00:00
2022-02-02 00:00:00
2022-02-03 00:00:00
2022-02-04 00:00:00
2022-02-07 00:00:00
2022-02-08 00:00:00
2022-02-09 00:00:00
2022-02-10 00:00:00
2022-02-14 00:00:00
2022-02-15 00:00:00
2022-02-16 00:00:00
2022-02-17 00:00:00


In [7]:
calc_spread_return_sharpe(df)[0]

11232.31062624659

In [8]:
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()
for prices, _, _, _, _, sample_prediction in iter_test:
    ff = df[df['Date']==prices["Date"].iloc[0]]
    mp = ff.set_index("SecuritiesCode")["Rank"]
    sample_prediction["Rank"] = sample_prediction.SecuritiesCode.map(mp)
    env.predict(sample_prediction)
    display(sample_prediction)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


Unnamed: 0,Date,SecuritiesCode,Rank
0,2021-12-06,1301,1409
1,2021-12-06,1332,1665
2,2021-12-06,1333,1692
3,2021-12-06,1375,1844
4,2021-12-06,1376,968
...,...,...,...
1995,2021-12-06,9990,644
1996,2021-12-06,9991,1111
1997,2021-12-06,9993,536
1998,2021-12-06,9994,1256


Unnamed: 0,Date,SecuritiesCode,Rank
0,2021-12-07,1301,219
1,2021-12-07,1332,1777
2,2021-12-07,1333,1276
3,2021-12-07,1375,1079
4,2021-12-07,1376,426
...,...,...,...
1995,2021-12-07,9990,371
1996,2021-12-07,9991,751
1997,2021-12-07,9993,354
1998,2021-12-07,9994,744
