## JPX simple overfitting model with BayesianRidge
Thanks for the [Paulo Pinto](https://www.kaggle.com/paulorzp)'s notebook [JPX simple overfitting model](https://www.kaggle.com/code/paulorzp/jpx-simple-overfitting-model). I will experiment modeling with BayesianRidge and more based on this notebook.

In [None]:
import numpy as np
import pandas as pd
import jpx_tokyo_market_prediction
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import StackingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import optuna
optuna.logging.set_verbosity(optuna.logging.CRITICAL)

In [None]:
def concat_df(df1, df2):
    df1 = pd.concat([df1, df2],
                    ignore_index=True, sort=False
                    ).drop_duplicates(["RowId"], keep="first")
    return df1

In [None]:
path = "../input/jpx-tokyo-stock-exchange-prediction/"
df_prices = pd.read_csv(f"{path}train_files/stock_prices.csv")
prices = pd.read_csv(f"{path}supplemental_files/stock_prices.csv")
df_prices = concat_df(df_prices, prices)
df_prices = df_prices[df_prices.Date>"2021-11-01"]
df_prices.info(show_counts=True)

In [None]:
def prep_prices(price):
    price.fillna(0,inplace=True)
    return price

In [None]:
df_prices = prep_prices(df_prices)
pd.options.display.float_format = '{:,.6g}'.format
df_prices.describe()

In [None]:
feats = ["SecuritiesCode","Open","High","Low","Close","Volume",
         "AdjustmentFactor","ExpectedDividend","SupervisionFlag"]

bayesian_ridge = BayesianRidge()

tree = DecisionTreeRegressor()
estimators = [
    ('bayesian_ridge', bayesian_ridge),
    ('decision_tree', tree)
]
model = StackingRegressor(
    estimators=estimators
)
model.fit(df_prices[feats],df_prices["Target"])
model.score(df_prices[feats],df_prices["Target"])

In [None]:
def calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): spread return
    """
    assert df['Rank'].min() == 0
    assert df['Rank'].max() == len(df['Rank']) - 1
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
    short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
    return purchase - short

def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    buf = df.groupby('Date').apply(calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio, buf

def add_rank(df, col_name="pred"):
    df["Rank"] = df.groupby("Date")[col_name].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df

### Predictions Ajuster

In [None]:
# By Yuike - https://www.kaggle.com/code/ikeppyo/examples-of-higher-scores-than-perfect-predictions

# This function adjusts the predictions so that the daily spread return approaches a certain value.
def adjuster(df):
    def calc_pred(df, x, y, z):
        return df['Target'].where(df['Target'].abs() < x, df['Target'] * y + np.sign(df['Target']) * z)

    def objective(trial, df):
        x = trial.suggest_uniform('x', 0, 0.2)
        y = trial.suggest_uniform('y', 0, 0.1)
        z = trial.suggest_uniform('z', 0, 1e-3)
        df["Rank"] = calc_pred(df, x, y, z).rank(ascending=False, method="first") - 1 
        df["Rank"] = df["Rank"].astype("int")
        return calc_spread_return_per_day(df, 200, 2)
    
    def predictor_per_day(df):
        study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=2022))
        study.optimize(lambda trial: abs(objective(trial, df) - 10), 10)
        return calc_pred(df, *study.best_params.values())

    return df.groupby("Date").apply(predictor_per_day).reset_index(level=0, drop=True)

def _predictor_base(feature_df):
    return model.predict(feature_df[feats])

def _predictor_with_adjuster(feature_df):
    feature_df["Target"] = model.predict(feature_df[feats])
    return adjuster(feature_df)

In [None]:
predictor = _predictor_with_adjuster

In [None]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

for prices, options, financials, trades, secondary_prices, sample_prediction in iter_test:
    current_date = prices["Date"].iloc[0]
    feature_df = df_prices[df_prices['Date'] == current_date].copy()
    feature_df["pred"] = adjuster(feature_df).iloc[0]
    feature_df = add_rank(feature_df)
    feature_map = feature_df.set_index('SecuritiesCode')['Rank'].to_dict()
    sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(feature_map)
    env.predict(sample_prediction)