In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!ls ../input/packages/ta/

In [None]:
!mkdir -p /tmp/pip/cache/
!cp ../input/packages/ta/numpy-1.22.4-cp39-cp39-win_amd64.whl /tmp/pip/cache/numpy-1.22.4-cp39-cp39-win_amd64.whl
!cp ../input/packages/ta/pytz-2022.1-py2.py3-none-any.whl /tmp/pip/cache/pytz-2022.1-py2.py3-none-any.whl
!cp ../input/packages/ta/pandas-1.4.2-cp39-cp39-win_amd64.whl /tmp/pip/cache/pandas-1.4.2-cp39-cp39-win_amd64.whl
!cp ../input/packages/ta/six-1.16.0-py2.py3-none-any.whl /tmp/pip/cache/six-1.16.0-py2.py3-none-any.whl
!cp ../input/packages/ta/python_dateutil-2.8.2-py2.py3-none-any.whl /tmp/pip/cache/python_dateutil-2.8.2-py2.py3-none-any.whl
!cp ../input/packages/ta/ta-0.10.1.xyz /tmp/pip/cache/ta-0.10.1.tar.gz

In [None]:
!pip install --no-index --find-links /tmp/pip/cache/ ta

In [None]:

import ta
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

# import data

In [None]:
root = "../input/jpx-tokyo-stock-exchange-prediction"
stock_prices_raw = pd.read_csv(f"{root}/train_files/stock_prices.csv")
financials_raw = pd.read_csv(f"{root}/train_files/financials.csv")
options_raw = pd.read_csv(f"{root}/train_files/options.csv")
secondary_stock_prices_raw = pd.read_csv(f"{root}/train_files/secondary_stock_prices.csv")
trades_raw = pd.read_csv(f"{root}/train_files/trades.csv")

stock_prices_raw = stock_prices_raw.drop('Target',axis=1)


# Adjust Close price

In [None]:
def prep_prices(price):
    from decimal import ROUND_HALF_UP, Decimal
    pcols = ["Open","High","Low","Close"]
    price.ExpectedDividend.fillna(0,inplace=True)
    def qround(x):
        return float(Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
    
    def adjust_prices(df):
        df = df.sort_values("Date", ascending=False)
        df.loc[:, "CumAdjust"] = df["AdjustmentFactor"].cumprod()

        # generate adjusted prices
        for p in pcols:     
            df.loc[:, p] = (df["CumAdjust"] * df[p]).apply(qround)
        df.loc[:, "Volume"] = df["Volume"] / df["CumAdjust"]
        
        return df

    # generate Adjusted
    price = price.sort_values(["SecuritiesCode", "Date"])
    price = price.groupby("SecuritiesCode").apply(adjust_prices).reset_index(drop=True)
    price = price.sort_values("RowId").reset_index(drop=True)
    price = price.sort_values(["SecuritiesCode", "Date"]).reset_index(drop=True)
    
    # price.dropna(subset=["Open","High","Low","Close",'Volume','Target'],inplace=True)
    return price.drop('CumAdjust',axis=1)

In [None]:
# # Adjust prices
# stock_prices_raw_withNaTarget = prep_prices(stock_prices_raw_withNaTarget)

# # For training data: drop columns where Target == nan
# # stock_prices_raw = stock_prices_raw_withNaTarget.dropna(subset=['Target'])
# stock_prices_raw = stock_prices_raw_withNaTarget

In [None]:
feature_min = pd.read_csv("../input/d/junxum/models/feature_min.csv",index_col='SecuritiesCode')
feature_max = pd.read_csv("../input/d/junxum/models/feature_max.csv",index_col='SecuritiesCode')

# Generate features

In [None]:
def generate_ta_features(stock_prices,istrain):
    """
    INPUT: original dataset
    OUTPUT: features with Date and SecuritiesCode
    """
    # if 'Target' in stock_prices.columns:
    #     stock_prices = stock_prices.drop('Target',axis=1)
    
    # Calculate features
    # features = stock_prices.groupby('SecuritiesCode').apply(lambda x: add_all_ta_features(x,open="Open",high='High',low='Low',close='Close',volume='Volume'))
    stock_prices = stock_prices.drop(['RowId', 'AdjustmentFactor', 'ExpectedDividend', 'SupervisionFlag'],axis=1)
    
    
    stock_prices = stock_prices.fillna(method="ffill")

    features = stock_prices[['Date', 'SecuritiesCode', 'Close']].copy()

    features['volume_log'] = np.log(stock_prices['Volume'])
    def volume_adi(df):
        return ta.volume.acc_dist_index(high=df.High, low=df.Low, close=df.Close, volume=df.Volume, fillna=False)
    features['volume_adi'] = stock_prices.groupby('SecuritiesCode').apply(volume_adi).droplevel(0)
    def volume_obv(df):
        return ta.volume.on_balance_volume(close=df.Close, volume=df.Volume, fillna=False)
    features['volume_obv'] = stock_prices.groupby('SecuritiesCode').apply(volume_obv).droplevel(0)
    def volume_cmf(df,window):
        return ta.volume.chaikin_money_flow(high=df.High, low=df.Low, close=df.Close, volume=df.Volume, window=window, fillna=False)
    features['volume_cmf'] = stock_prices.groupby('SecuritiesCode').apply(volume_cmf, 2).droplevel(0)
    def volume_fi(df,window):
        return ta.volume.force_index(close=df.Close, volume=df.Volume, window=window, fillna=False)
    features['volume_fi'] = stock_prices.groupby('SecuritiesCode').apply(volume_fi, 2).droplevel(0)
    def volume_em(df,window):
        return ta.volume.ease_of_movement(high=df.High, low=df.Low, volume=df.Volume, window=window, fillna=False)
    features['volume_em'] = stock_prices.groupby('SecuritiesCode').apply(volume_em, 2).droplevel(0)
    def volume_sem(df,window):
        return ta.volume.sma_ease_of_movement(high=df.High, low=df.Low, volume=df.Volume, window=window, fillna=False)
    features['volume_sem'] = stock_prices.groupby('SecuritiesCode').apply(volume_sem, 4).droplevel(0)
    def volume_vpt(df):
        return ta.volume.volume_price_trend(close=df.Close, volume=df.Volume, fillna=False)
    features['volume_vpt'] = stock_prices.groupby('SecuritiesCode').apply(volume_vpt).droplevel(0)
    def volume_vwap(df,window):
        return ta.volume.volume_weighted_average_price(high=df.High, low=df.Low, close=df.Close, volume=df.Volume, window=window, fillna=False)
    features['volume_vwap'] = stock_prices.groupby('SecuritiesCode').apply(volume_vwap,5).droplevel(0)
    def volume_mfi(df,window):
        return ta.volume.money_flow_index(high=df.High,low=df.Low,close=df.Close,volume=df.Volume,window=window,fillna=False)
    features['volume_mfi'] = stock_prices.groupby('SecuritiesCode').apply(volume_mfi,2).droplevel(0)
    def volume_nvi(df):
        return ta.volume.negative_volume_index(close=df.Close, volume=df.Volume, fillna=False)
    features['volume_nvi'] = stock_prices.groupby('SecuritiesCode').apply(volume_nvi).droplevel(0)

    def volatility_bbw(df,window,window_dev):
        return ta.volatility.bollinger_wband(close=df.Close,window=window,window_dev=window_dev,fillna=False)
    features['volatility_bbw'] = stock_prices.groupby('SecuritiesCode').apply(volatility_bbw,5,1.8).droplevel(0)
    def volatility_bbp(df,window,window_dev):
        return ta.volatility.bollinger_pband(close=df.Close,window=window,window_dev=window_dev,fillna=False)
    features['volatility_bbp'] = stock_prices.groupby('SecuritiesCode').apply(volatility_bbp,5,1.8).droplevel(0)
    def volatility_bbhi(df,window,window_dev):
        return ta.volatility.bollinger_hband_indicator(close=df.Close,window=window,window_dev=window_dev,fillna=False)
    features['volatility_bbhi'] = stock_prices.groupby('SecuritiesCode').apply(volatility_bbhi,5,1.8).droplevel(0)
    def volatility_bbli(df,window,window_dev):
        return ta.volatility.bollinger_lband_indicator(close=df.Close,window=window,window_dev=window_dev,fillna=False)
    features['volatility_bbli'] = stock_prices.groupby('SecuritiesCode').apply(volatility_bbli,5,1.8).droplevel(0)
    def volatility_kcw(df,window):
        return ta.volatility.keltner_channel_wband(close=df.Close,high=df.High,low=df.Low,window=window,fillna=False)
    features['volatility_kcw'] = stock_prices.groupby('SecuritiesCode').apply(volatility_kcw,5).droplevel(0)
    def volatility_kcp(df,window):
        return ta.volatility.keltner_channel_pband(close=df.Close,high=df.High,low=df.Low,window=window,fillna=False)
    features['volatility_kcp'] = stock_prices.groupby('SecuritiesCode').apply(volatility_kcp,5).droplevel(0)
    def volatility_kchi(df,window):
        return ta.volatility.keltner_channel_hband_indicator(close=df.Close,high=df.High,low=df.Low,window=window,fillna=False)
    features['volatility_kchi'] = stock_prices.groupby('SecuritiesCode').apply(volatility_kchi,5).droplevel(0)
    def volatility_kcli(df,window):
        return ta.volatility.keltner_channel_lband_indicator(close=df.Close,high=df.High,low=df.Low,window=window,fillna=False)
    features['volatility_kcli'] = stock_prices.groupby('SecuritiesCode').apply(volatility_kcli,5).droplevel(0)
    def volatility_dcw(df,window):
        return ta.volatility.donchian_channel_wband(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['volatility_dcw'] = stock_prices.groupby('SecuritiesCode').apply(volatility_dcw,2).droplevel(0)
    def volatility_dcp(df,window):
        return ta.volatility.donchian_channel_pband(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['volatility_dcp'] = stock_prices.groupby('SecuritiesCode').apply(volatility_dcp,5).droplevel(0)
    def volatility_atr(df,window):
        return ta.volatility.average_true_range(close=df.Close,high=df.High,low=df.Low,window=window,fillna=False)
    features['volatility_atr'] = stock_prices.groupby('SecuritiesCode').apply(volatility_atr,2).droplevel(0)
    def volatility_ui(df,window):
        return ta.volatility.ulcer_index(close=df.Close,window=window,fillna=False)
    features['volatility_ui'] = stock_prices.groupby('SecuritiesCode').apply(volatility_ui,5).droplevel(0)
    
    def trend_macd_signal(df, window_slow, window_fast, window_sign):
        return ta.trend.macd_signal(close=df.Close,window_slow=window_slow, window_fast=window_fast, window_sign=window_sign,fillna=False)
    features['trend_macd_signal'] = stock_prices.groupby('SecuritiesCode').apply(trend_macd_signal,20,8,4).droplevel(0)
    def trend_macd_diff(df,window_slow, window_fast, window_sign):
        return ta.trend.macd_diff(close=df.Close,window_slow=window_slow, window_fast=window_fast, window_sign=window_sign,fillna=False)
    features['trend_macd_diff'] = stock_prices.groupby('SecuritiesCode').apply(trend_macd_diff,20,8,4).droplevel(0)
    def trend_vortex_ind_pos(df,window):
        return ta.trend.vortex_indicator_pos(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['trend_vortex_ind_pos'] = stock_prices.groupby('SecuritiesCode').apply(trend_vortex_ind_pos,2).droplevel(0)
    def trend_vortex_ind_neg(df,window):
        return ta.trend.vortex_indicator_neg(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['trend_vortex_ind_neg'] = stock_prices.groupby('SecuritiesCode').apply(trend_vortex_ind_neg,2).droplevel(0)
    def trend_mass_index(df,window_fast,window_slow):
        return ta.trend.mass_index(high=df.High,low=df.Low,window_fast=window_fast,window_slow=window_slow,fillna=False)
    features['trend_mass_index'] = stock_prices.groupby('SecuritiesCode').apply(trend_mass_index,10,8).droplevel(0)
    def trend_dpo(df,window):
        return ta.trend.dpo(close=df.Close,window=window,fillna=False)
    features['trend_dpo'] = stock_prices.groupby('SecuritiesCode').apply(trend_dpo,15).droplevel(0)
    def trend_kst_sig(df,roc1,roc2,roc3,roc4,window1,window2,window3,window4,nsig):
        return ta.trend.kst_sig(close=df.Close,roc1=roc1,roc2=roc2,roc3=roc3,roc4=roc4,window1=window1,window2=window2,window3=window3,window4=window4,nsig=nsig,fillna=False)
    features['trend_kst_sig'] = stock_prices.groupby('SecuritiesCode').apply(trend_kst_sig,3,5,5,20,3,3,5,5,3).droplevel(0)
    def trend_stc(df,window_slow,window_fast,cycle,smooth1,smooth2):
        return ta.trend.stc(close=df.Close,window_fast=window_fast,window_slow=window_slow,cycle=cycle,smooth1=smooth1,smooth2=smooth2,fillna=False)
    features['trend_stc'] = stock_prices.groupby('SecuritiesCode').apply(trend_stc,10,20,5,2,2).droplevel(0)
    def trend_adx(df,window):
        return ta.trend.adx(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['trend_adx'] = stock_prices.groupby('SecuritiesCode').apply(trend_adx,3).droplevel(0)
    def trend_adx_pos(df,window):
        return ta.trend.adx_pos(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['trend_adx_pos'] = stock_prices.groupby('SecuritiesCode').apply(trend_adx_pos,3).droplevel(0)
    def trend_adx_neg(df,window):
        return ta.trend.adx_neg(high=df.High,low=df.Low,close=df.Close,window=window,fillna=False)
    features['trend_adx_neg'] = stock_prices.groupby('SecuritiesCode').apply(trend_adx_neg,3).droplevel(0)
    def trend_cci(df,window,constant):
        return ta.trend.cci(high=df.High, low=df.Low, close=df.Close, window=window, constant=constant, fillna=False)
    features['trend_cci'] = stock_prices.groupby('SecuritiesCode').apply(trend_cci,2,0.015).droplevel(0)
    def trend_aroon_up(df,window):
        return ta.trend.aroon_up(close=df.Close,window=window,fillna=False)
    features['trend_aroon_up'] = stock_prices.groupby('SecuritiesCode').apply(trend_aroon_up,2).droplevel(0)
    def trend_aroon_down(df,window):
        return ta.trend.aroon_down(close=df.Close,window=window,fillna=False)
    features['trend_aroon_down'] = stock_prices.groupby('SecuritiesCode').apply(trend_aroon_down,2).droplevel(0)
    
    def momentum_rsi(df,window):
        return ta.momentum.rsi(close=df.Close,window=window,fillna=False)
    features['momentum_rsi'] = stock_prices.groupby('SecuritiesCode').apply(momentum_rsi,2).droplevel(0)
    def momentum_stochrsi(df,window,smooth1,smooth2):
        return ta.momentum.stochrsi(close=df.Close,window=window,smooth1=smooth1,smooth2=smooth2,fillna=False)
    features['momentum_stochrsi'] = stock_prices.groupby('SecuritiesCode').apply(momentum_stochrsi,20,3,3).droplevel(0)
    def momentum_roc(df,window):
        return ta.momentum.roc(close=df.Close, window=window, fillna=False)
    features['momentum_tsi'] = stock_prices.groupby('SecuritiesCode').apply(momentum_roc,8).droplevel(0)
    def momentum_pvo(df,window_slow,window_fast,window_sign):
        return ta.momentum.pvo(volume=df.Volume,window_fast=window_fast,window_slow=window_slow,window_sign=window_sign,fillna=False)
    features['momentum_pvo'] = stock_prices.groupby('SecuritiesCode').apply(momentum_pvo,20,15,3).droplevel(0)
    def momentum_pvo_hist(df,window_slow,window_fast,window_sign):
        return ta.momentum.pvo_hist(volume=df.Volume,window_fast=window_fast,window_slow=window_slow,window_sign=window_sign,fillna=False)
    features['momentum_pvo_hist'] = stock_prices.groupby('SecuritiesCode').apply(momentum_pvo_hist,20,15,3).droplevel(0)
    def others_dr(df):
        return ta.others.daily_return(close=df.Close,fillna=False)
    features['momentum_pvo_hist'] = stock_prices.groupby('SecuritiesCode').apply(others_dr).droplevel(0)
    def others_dlr(df):
        return ta.others.daily_log_return(close=df.Close,fillna=False)
    features['momentum_pvo_hist'] = stock_prices.groupby('SecuritiesCode').apply(others_dlr).droplevel(0)

    # get all the column names
    # col = features.columns
    # col = col.drop(['Date','SecuritiesCode'])

    # def minmaxscale(df):
    #     return (df-df.min())/(df.max()-df.min())
    # features[col] = features.groupby('SecuritiesCode')[col].apply(minmaxscale)


    # filling data for nan and inf
    if istrain:
        features = features[features.Date > '2017-02-10']
        # features = features.dropna(thresh=40,axis=0)
        features = features.dropna(how='any',axis=0) # about 2% is nan
    else:
        features = features.fillna(method="ffill")
    
    features = features.replace([np.inf, -np.inf], 0)

    return features


In [None]:
# financials data cleaning
def prep_financials(financials):
    """
    - Date to datetime type.
    - Fill all NaN by np.nan.
    - Transform numbers to numerical type and substitude all '－' to np.nan.
    - Delete ForecastRevision and duplicated rows.
    - Will not drop Nan.
    """
    financials.fillna(np.nan,inplace=True)
    financials.dropna(subset=['DateCode'],inplace=True)
    financials = financials[~financials.TypeOfDocument.str.contains('ForecastRevision')]
    financials = financials[~financials.TypeOfDocument.str.contains('NumericalCorrection')]
    financials = financials.drop_duplicates(subset=['DateCode'],keep='first')  # delete duplicates
    financials = financials.apply(pd.to_numeric, errors='ignore')
    financials = financials.replace('－',np.nan)

    # Clean some number-type columns.
    lis = ['NetSales','OrdinaryProfit','OperatingProfit','EarningsPerShare','TotalAssets','Profit','NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock','Equity']
    financials[lis] = financials[lis].apply(pd.to_numeric,errors='coerce')
    
    financials['Date'] = pd.to_datetime(financials['Date'])

    def adjust_to_year(df):
    
        df['TypeMark'] = df.apply(lambda x:0 if x['TypeOfCurrentPeriod']=='FY' else 1,axis=1)

        items_to_adjust = ['NetSales','OperatingProfit','OrdinaryProfit','Profit','EarningsPerShare']
        
        def adjust_item(df1):
            df_to_subtract = df1[items_to_adjust].mul(df1.TypeMark,axis=0)
            df_to_shift = df_to_subtract.shift()
            df1[items_to_adjust] = df1[items_to_adjust] - df_to_shift
            return df1
        df = df.groupby("SecuritiesCode").apply(adjust_item)
        return df
    
    financials = adjust_to_year(financials)
    return financials



In [None]:
def generate_fin_features(stock_prices, financials):
    """
    Args:
        prices (pd.DataFrame)  : RowId renamed as DateCode
        financials (pd.DataFrame)  : cleaned
        ...
    Returns:
        feature DataFrame (pd.DataFrame): with Date and SecuritiesCode
        - Will not drop Nan.
        - Standardize 
    """
    # financial indicators
    stock_prices = stock_prices.rename(columns = {'RowId':'DateCode'})
    financials['netsales_growth_rate'] = financials.groupby('SecuritiesCode')['NetSales'].transform(lambda x:(x-x.shift(1))/x.shift(1))
    pricesfin = pd.merge(stock_prices[['DateCode','Close']],financials,on='DateCode').sort_values(['DateCode'])
    features = pricesfin[['DateCode']].copy(deep=True)
    #PE
    features['PE'] = pricesfin['Close']/pricesfin['EarningsPerShare']
    #PEcut
    features['PEcut'] = pricesfin['Close']/(pricesfin['EarningsPerShare']-pricesfin['OrdinaryProfit']/pricesfin['NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock'])
    #PEG
    features['PEG'] = features['PE']/pricesfin['netsales_growth_rate']
    #PS
    features['PS'] = pricesfin['Close']/(pricesfin['OperatingProfit']/pricesfin['NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock'])
    #PB
    features['PB'] = pricesfin['Close']/(pricesfin['TotalAssets']/pricesfin['NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock'])
    # ROE, ROA, GPM, NPM, Em
    features["ROE"] = pricesfin.Profit/pricesfin.Equity
    features["ROA"] = pricesfin.Profit/pricesfin.TotalAssets
    features["GPM"] = pricesfin.OrdinaryProfit/pricesfin.NetSales
    features["NPM"] = pricesfin.Profit/pricesfin.NetSales
    features["EM"] = pricesfin.TotalAssets/pricesfin.Equity
    # RG, NPG, ROEG, ROAG
    def calculate_for_single_company(df):
        df['shift1_NetSales'] = df.NetSales.shift()
        df['shift1_Profit'] = df.Profit.shift()
        df['shift1_ROE'] = df.ROE.shift()
        df['shift1_ROA'] = df.ROA.shift()
        return df
    pricesfin = pd.merge(features,pricesfin,on='DateCode',how='left').groupby("SecuritiesCode").apply(calculate_for_single_company)

    features['RG'] = pricesfin.NetSales/pricesfin.shift1_NetSales - 1
    features['NPG'] = pricesfin.Profit/pricesfin.shift1_Profit - 1
    features['ROEG'] = pricesfin.ROE/pricesfin.shift1_ROE - 1
    features['ROAG'] = pricesfin.ROA/pricesfin.shift1_ROA - 1
    
    features = pd.merge(stock_prices[['DateCode','Date','SecuritiesCode']], features, on='DateCode', how='left').sort_values(['DateCode'])
    features = features.groupby('SecuritiesCode').apply(lambda x: x.ffill())
    features = features.replace([np.inf,-np.inf],np.nan)
    return features



# Load model

In [None]:
# pre_load model
from xgboost import Booster
model = Booster()
model.load_model("../input/d/junxum/models/xgbreg.txt")


# Load Time API

In [None]:
# load Time Series API
import jpx_tokyo_market_prediction
# make Time Series API environment (this function can be called only once in a session)
env = jpx_tokyo_market_prediction.make_env()
# get iterator to fetch data day by day
iter_test = env.iter_test()

In [None]:
def minmaxscale(df):
    # by saving values
    code = df.SecuritiesCode.unique()[0]
    if code in feature_min.index:
        df_min = feature_min[feature_min.index==code].values
        df_max = feature_max[feature_max.index==code].values
        df.iloc[:,2:] = (df.iloc[:,2:]-df_min)/(df_max-df_min)
    return df
counter = 0
# fetch data day by day
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    current_date = prices["Date"].iloc[0]
    sample_prediction_date = sample_prediction["Date"].iloc[0]
    print(f"current_date: {current_date}, sample_prediction_date: {sample_prediction_date}")
    # filter data to reduce culculation cost
    threshold = (pd.Timestamp(current_date) - pd.offsets.BDay(50)).strftime("%Y-%m-%d")
    threshold_fin = (pd.Timestamp(current_date) - pd.offsets.BDay(250)).strftime("%Y-%m-%d")
    print(f"threshold: {threshold}")

    stock_prices_raw = stock_prices_raw.loc[(stock_prices_raw["Date"] < current_date) & (stock_prices_raw["Date"] >= threshold)]
    financials_raw = financials_raw.loc[(financials_raw["Date"] < current_date) & (financials_raw["Date"] >= threshold_fin)]
    options_raw = options_raw.loc[(options_raw["Date"] < current_date) & (options_raw["Date"] >= threshold)]
    secondary_stock_prices_raw = secondary_stock_prices_raw.loc[(secondary_stock_prices_raw["Date"] < current_date) & (secondary_stock_prices_raw["Date"] >= threshold)]
    trades_raw = trades_raw.loc[(trades_raw["Date"] < current_date) & (trades_raw["Date"] >= threshold)]


    # to generate AdjustedClose, increment price data
    stock_prices_raw = pd.concat([stock_prices_raw, prices])
    financials_raw = pd.concat([financials_raw, financials])
#     options = pd.concat([options_raw, options])
#     secondary_stock_prices = pd.concat([secondary_stock_prices_raw,secondary_prices])
#     trades = pd.concat([trades_raw,trades])
    
    # generate AdjustedClose and fin data
    stock_prices = prep_prices(stock_prices_raw)
    financials = prep_financials(financials_raw)

    # get target SecuritiesCodes
    codes = sorted(prices["SecuritiesCode"].unique())

    # generate feature
    features_ta = generate_ta_features(stock_prices,False)
    features_fin = generate_fin_features(stock_prices,financials)
    feature = pd.merge(features_ta,features_fin,on=['Date','SecuritiesCode'],how='left')
    # filter feature for this iteration
    feature = feature.loc[feature.Date == current_date]
    feature = feature.drop('DateCode',axis=1)
    
    
    feature = feature.groupby('SecuritiesCode',as_index=False).apply(minmaxscale)

        

    # prediction
    X_test = xgb.DMatrix(feature.drop(['Date','SecuritiesCode'],axis=1))
    feature.loc[:, "predict"] = model.predict(X_test)

    # set rank by predict
    feature = feature.sort_values("predict", ascending=False).drop_duplicates(subset=['SecuritiesCode'])
    feature.loc[:, "Rank"] = np.arange(len(feature))
    feature_map = feature.set_index('SecuritiesCode')['Rank'].to_dict()
    sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(feature_map)

    # check Rank
    assert sample_prediction["Rank"].notna().all()
    assert sample_prediction["Rank"].min() == 0
    assert sample_prediction["Rank"].max() == len(sample_prediction["Rank"]) - 1
    assert sample_prediction["Rank"].max() == 1999

    # register your predictions
    env.predict(sample_prediction)
    counter += 1

In [None]:
stock_prices_raw

In [None]:
feature