In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from tensorflow.keras import layers
import tensorflow as tf
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
import jpx_tokyo_market_prediction

In [None]:
params = {'n_estimators'    : 2048,
          'max_depth'       : 10,
          'learning_rate'   : 0.05,
          'subsample'       : 0.95,
          'reg_lambda'      : 1.50,
          'random_state'    : 42
         }

In [None]:
file_path = '/kaggle/input/jpx-tokyo-stock-exchange-prediction/'
prices = pd.read_csv(Path(file_path, 'train_files/stock_prices.csv'))

In [None]:
pd.set_option('display.max_columns', 150)

In [None]:
prices['Date'] = pd.to_datetime(prices['Date'])
min_date = prices['Date'].min()
prices['date_rank'] = (prices['Date'] - min_date).dt.days

In [None]:
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'date_rank', 'SecuritiesCode']

In [None]:
pd.set_option('display.max_columns', 100)

In [None]:
prices.info(show_counts=True)

In [None]:
prices = prices.dropna(subset=features)

In [None]:
target = prices.pop('Target')

In [None]:
train_f, valid_f = train_test_split(prices[features], test_size=0.2)
train_idx = train_f.index
valid_idx = valid_f.index

In [None]:
cbr = CatBoostRegressor(**params)
cbr.fit(train_f, 
        target[train_idx], 
        eval_set=[(valid_f, target[valid_idx])], 
        early_stopping_rounds=128, 
        verbose=50,
        plot=True)

In [None]:
test_prices = pd.read_csv(Path(file_path, 'example_test_files/stock_prices.csv'))
test_prices['date_rank'] = (pd.to_datetime(test_prices['Date']) - min_date).dt.days

In [None]:
preds = cbr.predict(test_prices[features])
preds = np.squeeze(preds)
preds

In [None]:
target_mean = target.mean()

In [None]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    prices['date_rank'] = (pd.to_datetime(prices['Date']) - min_date).dt.days
    preds = cbr.predict(prices[features])
    preds = np.squeeze(preds)
    print(preds)
    sample_prediction["Prediction"] = preds
    sample_prediction = sample_prediction.sort_values(by = "Prediction", ascending=False)
    sample_prediction.Rank = np.arange(0,2000)
    sample_prediction = sample_prediction.sort_values(by = "SecuritiesCode", ascending=True)
    sample_prediction.drop(["Prediction"],axis=1)
    submission = sample_prediction[["Date","SecuritiesCode","Rank"]]
    env.predict(submission)