In [None]:
import numpy as np
import pandas as pd
import jpx_tokyo_market_prediction
import lightgbm as lgb
from lightgbm import LGBMRegressor
import optuna.integration.lightgbm as lgb_o
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Load data

In [None]:
prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv")

In [None]:
NDAYS = 34
lastdays = prices[prices["Date"]>=prices.Date.iat[-2000*NDAYS]].reset_index(drop=True)

In [None]:
average = pd.DataFrame(prices.groupby("SecuritiesCode").Target.mean())
def get_avg(_id_):
    return average.loc[_id_]
prices["Avg"] = prices["SecuritiesCode"].apply(get_avg)

# Train the model

In [None]:
prices.Date = pd.to_datetime(prices.Date)
prices['Date'] = prices['Date'].dt.strftime("%Y%m%d").astype(int)
X = prices[["Date","SecuritiesCode","Avg"]]
y = prices[["Target"]]
codes = X.SecuritiesCode.unique()

In [None]:
train = lgb_o.Dataset(X, y)
val = lgb_o.Dataset(X, y)

params = {'objective': 'regression',
          'metric': 'rmse',
          'random_seed':0} 

gbm_o = lgb_o.train(params,
                    train,
                    valid_sets=val,
                    early_stopping_rounds=100,
                    verbose_eval=200,)

best_params = gbm_o.params
print("  Params: ")
for key, value in best_params.items():
    print("    {}: {}".format(key, value))

# Let's take a look at our predictions

In [None]:
plt.plot(gbm_o.predict(X[X.SecuritiesCode == 1301], num_iteration=gbm_o.best_iteration))
plt.plot(np.array(prices[prices.SecuritiesCode == 1301].Target))

# Submission

In [None]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    ds=[prices, options, financials, trades, secondary_prices, sample_prediction]
    sample_prediction["Avg"] = sample_prediction["SecuritiesCode"].apply(get_avg)
    df = sample_prediction[["Date","SecuritiesCode","Avg"]]
    df.Date = pd.to_datetime(df.Date)
    df['Date'] = df['Date'].dt.strftime("%Y%m%d").astype(int)
    sample_prediction["Prediction"] = gbm_o.predict(df)
    sample_prediction = sample_prediction.sort_values(by = "Prediction", ascending=False)
    sample_prediction.Rank = np.arange(0,2000)
    sample_prediction = sample_prediction.sort_values(by = "SecuritiesCode", ascending=True)
    sample_prediction.drop(["Prediction"],axis=1)
    submission = sample_prediction[["Date","SecuritiesCode","Rank"]]
    env.predict(submission)