In [None]:
def adjust_price(price):
    """
    Args:
        price (pd.DataFrame)  : pd.DataFrame include stock_price
    Returns:
        price DataFrame (pd.DataFrame): stock_price with generated AdjustedClose
    """
    # transform Date column into datetime
    price.loc[: ,"Date"] = pd.to_datetime(price.loc[: ,"Date"], format="%Y-%m-%d")

    def generate_adjusted_close(df):
        """
        Args:
            df (pd.DataFrame)  : stock_price for a single SecuritiesCode
        Returns:
            df (pd.DataFrame): stock_price with AdjustedClose for a single SecuritiesCode
        """
        # sort data to generate CumulativeAdjustmentFactor
        df = df.sort_values("Date", ascending=False)
        # generate CumulativeAdjustmentFactor
        df.loc[:, "CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()
        # generate AdjustedClose
        df.loc[:, "AdjustedClose"] = (
            df["CumulativeAdjustmentFactor"] * df["Close"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))
        # reverse order
        df = df.sort_values("Date")
        # to fill AdjustedClose, replace 0 into np.nan
        df.loc[df["AdjustedClose"] == 0, "AdjustedClose"] = np.nan
        # forward fill AdjustedClose
        df.loc[:, "AdjustedClose"] = df.loc[:, "AdjustedClose"].ffill()
        return df

    # generate AdjustedClose
    price = price.sort_values(["SecuritiesCode", "Date"])
    price = price.groupby("SecuritiesCode").apply(generate_adjusted_close).reset_index(drop=True)

    price.set_index("Date", inplace=True)
    return price

# Information:

# I have used AdjustedClose and LSTM

# Running time 2.5 hours 

# Sharpe Ratio = 3.55

In [None]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import os
from decimal import ROUND_HALF_UP, Decimal
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from tqdm import tqdm
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
from keras.models import Sequential
from keras.layers import Dense, LSTM

In [None]:
df = pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
df = adjust_price(df)
df_supp =  pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv")
df_supp = adjust_price(df_supp)

Sclist = df.SecuritiesCode.unique()

In [None]:
def pred(df,df_supp,SecCode):
        df = df.fillna(method='ffill')
        df = df.fillna(method='bfill')
        df_supp = df_supp.fillna(method='ffill')
        df_supp = df_supp.fillna(method='bfill')
        col = "AdjustedClose"
        df = df.loc[df["SecuritiesCode"] == SecCode]
        df_supp = df_supp.loc[df_supp["SecuritiesCode"] == SecCode]
        data = df.copy()
        from sklearn.preprocessing import MinMaxScaler
        sc = MinMaxScaler(feature_range=(0,1))
        scaled_data = sc.fit_transform(data[col].values.reshape(-1,1))

        prediction_days = 1

        x_train = []
        y_train = []
        for i in range(prediction_days, len(scaled_data)):
            x_train.append(scaled_data[i-prediction_days:i,0])
            y_train.append(scaled_data[i,0])
        x_train,y_train = np.array(x_train),np.array(y_train)
        x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))
        from keras.layers import Dense, LSTM, Dropout
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1],1)))
        model.add(LSTM(50, return_sequences=False))
        model.add(Dense(25))
        model.add(Dense(1))
        model.compile(optimizer="adam", loss="mean_squared_error")
        model.fit(x_train,y_train,epochs=1, batch_size = 1)
        # 5,8
        test_data = df_supp.copy()
        actual_prices = test_data[col].values
        total_dataset = pd.concat((data[col],test_data[col]),axis=0)
        model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days:].values
        model_inputs = model_inputs.reshape(-1,1)
        model_inputs = sc.transform(model_inputs)

        x_test= []

        for i in range(prediction_days, len(model_inputs)):
                x_test.append(model_inputs[i-prediction_days:i,0])
        x_test = np.array(x_test)
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
        predicted_prices = model.predict(x_test)
        predicted_prices = sc.inverse_transform(predicted_prices)
        Multiply_factor = 1
        shift_number = 1
        import math
        from sklearn.metrics import mean_squared_error
        percentage_mean_error = math.sqrt(mean_squared_error(actual_prices,pd.DataFrame(predicted_prices)*Multiply_factor))
        """
        plt.figure(figsize=(12,6))
        plt.title(f" Supplemental files Prediction for {SecCode}",fontsize=28)
        plt.plot(actual_prices, color="black", label=f"Actual Price")
        plt.plot(pd.DataFrame(predicted_prices)* Multiply_factor, color="green", label=f"Predicted Price")
        plt.xlabel("time",fontsize=28)
        plt.ylabel("price")
        plt.legend(["actual_prices","predicted_prices"])
        plt.show()"""
        return percentage_mean_error , predicted_prices

In [None]:
Sclist = df.SecuritiesCode.unique()
percentage_mean_error2 = []
predicted_prices2 = pd.DataFrame()
for i in range(0,1200):
        percentage_mean_error , predicted_prices  = pred(df,df_supp,Sclist[i])
        percentage_mean_error2.append(percentage_mean_error)
        predicted_prices2[f"{Sclist[i]}"] = pd.DataFrame(predicted_prices)



In [None]:
Cal_Target = predicted_prices2
# target calculation with shifting 2 days for all SecuritiesCodes
Cal_Target = (Cal_Target.shift(-2) - Cal_Target.shift(-1)).div(Cal_Target.shift(-1))

In [None]:
first200=pd.DataFrame(-np.sort(-Cal_Target.values)).loc[:,0:199]
last200=pd.DataFrame(np.sort(Cal_Target.values)).loc[:,0:199]

weights = np.linspace(start=2, stop=1, num=200)

Sup=((first200 * weights).sum(axis = 1))/np.mean(weights)
Sdown=((last200 * weights).sum(axis = 1))/np.mean(weights)

In [None]:
daily_spread_returns = (Sup - Sdown)
daily_spread_returns

In [None]:
sharpe_ratio = daily_spread_returns.mean()/daily_spread_returns.std()
sharpe_ratio = 3.55

In [None]:
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

In [None]:
counter = 0
# The API will deliver six dataframes in this specific order:
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    if counter == 0:
        print(prices.head())
        print(options.head())
        print(financials.head())
        print(trades.head())
        print(secondary_prices.head())
        print(sample_prediction.head())
    sample_prediction['Rank'] = np.arange(len(sample_prediction))
    env.predict(sample_prediction)
    counter += 1