In [None]:
import warnings, gc
import numpy as np 
import pandas as pd
import matplotlib.colors
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
from datetime import datetime, timedelta
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn import preprocessing
import jpx_tokyo_market_prediction

from lightgbm import LGBMRegressor, LGBMClassifier, Booster
from decimal import ROUND_HALF_UP, Decimal
warnings.filterwarnings("ignore")
import plotly.figure_factory as ff

In [None]:
def adjust_price(price):
    """
    Args:
        price (pd.DataFrame)  : pd.DataFrame include stock_price
    Returns:
        price DataFrame (pd.DataFrame): stock_price with generated AdjustedClose
    """
    # transform Date column into datetime
    price.loc[: ,"Date"] = pd.to_datetime(price.loc[: ,"Date"], format="%Y-%m-%d")

    def generate_adjusted_close(df):
        """
        Args:
            df (pd.DataFrame)  : stock_price for a single SecuritiesCode
        Returns:
            df (pd.DataFrame): stock_price with AdjustedClose for a single SecuritiesCode
        """
        # sort data to generate CumulativeAdjustmentFactor
        df = df.sort_values("Date", ascending=False)
        # generate CumulativeAdjustmentFactor
        df.loc[:, "CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()
        # generate AdjustedClose
        df.loc[:, "AdjustedClose"] = (
            df["CumulativeAdjustmentFactor"] * df["Close"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))
        # reverse order
        df = df.sort_values("Date")
        # to fill AdjustedClose, replace 0 into np.nan
        df.loc[df["AdjustedClose"] == 0, "AdjustedClose"] = np.nan
        # forward fill AdjustedClose
        df.loc[:, "AdjustedClose"] = df.loc[:, "AdjustedClose"].ffill()
        return df
    
    # generate AdjustedClose
    price = price.sort_values(["SecuritiesCode", "Date"])
    price = price.groupby("SecuritiesCode").apply(generate_adjusted_close).reset_index(drop=True)
    return price



In [None]:
def create_features(df):
    df=df.copy()
    col='AdjustedClose'
    periods=[5,10,20,30,50]
    for period in periods:
        df.loc[:,"Return_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].pct_change(period)
        df.loc[:,"MovingAvg_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].rolling(window=period).mean().values
        df.loc[:,"ExpMovingAvg_{}Day".format(period)] = df.groupby("SecuritiesCode")[col].ewm(span=period,adjust=False).mean().values
        df.loc[:,"Volatility_{}Day".format(period)] = np.log(df[col]).groupby(df["SecuritiesCode"]).diff().rolling(period).std()
    return df

In [None]:
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

In [None]:
def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

In [None]:
def get_features(df):
    df_feat = df[['Date','RowId','Open', 'High', 'Low', 'Close', 'Volume','SecuritiesCode','AdjustmentFactor','SupervisionFlag']].copy()
    df_feat['Upper_Shadow'] = upper_shadow(df_feat)
    df_feat['Lower_Shadow'] = lower_shadow(df_feat)
    df_feat['MA5'] = df['Close'].rolling(5).mean()
    df_feat['MA20'] = df['Close'].rolling(20).mean()
    
    # train=df_feat.drop('ExpectedDividend',axis=1).fillna(0)
    prices=adjust_price(df_feat)
    price_features=create_features(df=prices)
    price_features.drop(['Date','RowId','SupervisionFlag','AdjustmentFactor','CumulativeAdjustmentFactor','Close'],axis=1,inplace=True)
    return price_features

In [None]:

def get_Xy_and_model(df_train):
    df_proc = get_features(df_train)
    df_proc['y'] = df_train['Target']
    df_proc = df_proc.dropna(how = "any")
    
    
    X = df_proc.drop("y", axis=1)
    y = df_proc["y"]
    #lbl = preprocessing.LabelEncoder()
    #X = lbl.fit_transform(X.astype())
    try:
        model = LGDMRegressor(device_type = 'gpu')
        model.fit(X, y)
    except:
        model = LGBMRegressor()
        model.fit(X, y)
    return X, y, model

In [None]:
stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
X, y, model = get_Xy_and_model(stock_prices)
Xs, ys, model = X, y, model

In [None]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

for (df_test, options, financials, trades, secondary_prices, df_pred) in iter_test:
    df_pred['row_id'] = (df_pred['Date'].astype(str) + '_' + df_pred['SecuritiesCode'].astype(str))
    df_test['row_id'] = (df_test['Date'].astype(str) + '_' + df_pred['SecuritiesCode'].astype(str))
    
    x_test = get_features(df_test)
    y_pred = model.predict(x_test)
    
    df_pred['Target'] = y_pred
    df_pred = df_pred.sort_values(by = "Target", ascending = False)
    df_pred['Rank'] = np.arange(0, 2000)
    df_pred = df_pred.sort_values(by = "SecuritiesCode", ascending = True)
    df_pred.drop(["Target"], axis = 1)
    submission = df_pred[["Date", "SecuritiesCode", "Rank"]]
    env.predict(submission)

In [None]:
! head submission.csv