# Based on this submission https://www.kaggle.com/code/listeningtounity/random-forest-with-optuna

## import

In [None]:
import numpy as np
import pandas as pd
import jpx_tokyo_market_prediction

## reading the dataset as dataframe

In [None]:
path = "../input/jpx-tokyo-stock-exchange-prediction/"
prices = pd.read_csv(f"{path}supplemental_files/stock_prices.csv")

## preprocessing the dataframe: replacing NaN as zeroes

In [None]:
def prep_prices(prices):
    prices.Date = pd.to_datetime(prices.Date).view(int)
    prices["Volume"].fillna(1,inplace=True)
    prices.fillna(0,inplace=True)
    return prices

In [None]:
prices = prep_prices(prices)
feats = ["Date","SecuritiesCode","Open","High","Low","Close","Volume"]

In [None]:
prices.head()

## split data

In [None]:
X_train= prices[feats]
y_train = prices["Target"]

## Importing Optuna & sklearn

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
import optuna

## setting up random forest using optuna
### hyperparameters are be tuned following this website https://optuna.readthedocs.io/en/stable/faq.html

In [None]:
def objective(trial):
    criterion = trial.suggest_categorical('criterion', ['mse', 'mae'])
    bootstrap = trial.suggest_categorical('bootstrap',['True','False'])
    max_depth = trial.suggest_int('max_depth', 1, 10000)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt','log2'])
    max_leaf_nodes = trial.suggest_int('max_leaf_nodes', 1, 10000)
    n_estimators =  trial.suggest_int('n_estimators', 30, 1000)
    
    regr = RandomForestRegressor(bootstrap = bootstrap, criterion = criterion,
                                 max_depth = max_depth, max_features = max_features,
                                 max_leaf_nodes = max_leaf_nodes,n_estimators = n_estimators,n_jobs=2)
    
    score = cross_val_score(regr, X_train, y_train, cv=5, scoring="r2")
    r2_mean = score.mean()

    return r2_mean

## fitting the model

In [None]:
study = optuna.create_study(direction='maximize')

study.optimize(objective, timeout = 600, n_trials=1000) # timeout: 60 seconds, number of trials: 1000

#Create an instance with tuned hyperparameters
optimised_rf = RandomForestRegressor(bootstrap = study.best_params['bootstrap'], criterion = study.best_params['criterion'],
                                     max_depth = study.best_params['max_depth'], max_features = study.best_params['max_features'],
                                     max_leaf_nodes = study.best_params['max_leaf_nodes'],n_estimators = study.best_params['n_estimators'],
                                     n_jobs=2)
#learn
optimised_rf.fit(X_train ,y_train)

In [None]:
prices["Prediction"] = model.predict(prices[feats])
prices["rate"] = prices["Prediction"]/prices["Volume"] 
prices.sort_values(by = "rate", ascending=False, inplace=True)
prices.Rank = np.arange(0,2000)
prices.sort_values(by = "SecuritiesCode", ascending=True, inplace=True)
prices.head()

## calling API for submission

In [None]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    prices = prep_prices(prices)
    sample_prediction["Prediction"] = model.predict(prices[feats])
    sample_prediction["rate"] = sample_prediction["Prediction"]/prices["Volume"]
    sample_prediction.sort_values(by = "rate", ascending=False, inplace=True)
    sample_prediction.Rank = np.arange(0,2000)
    sample_prediction.sort_values(by = "SecuritiesCode", ascending=True, inplace=True)
    submission = sample_prediction[["Date","SecuritiesCode","Rank"]]
    env.predict(submission)