In [5]:
import os
import sys
sys.path.append("/project/stockAI/github/main/stockAI")
import stockAI as sai
import pandas as pd

This file provides an example of using the stockAI package to experiment with the same preprocessed data to see which models perform best and how each trader yields.

--------

# 1. Load Data

⭐ Skip the process of generating data and proceed with the pre-processed and saved data file in the `00.demo.ipynb` file.

In [6]:
df_time_series = pd.read_parquet("../../../../_common/time_series_0212.parquet")
df_time_series_scaled = pd.read_parquet("../../../../_common/time_series_scaled_0212.parquet")

df_time_series = df_time_series[df_time_series["Code"] != "33626K"]
df_time_series_scaled = df_time_series_scaled[df_time_series_scaled["Code"] != "33626K"]

df_time_series['Code'] = df_time_series['Code'].astype(str).str.zfill(6)
df_time_series_scaled['Code'] = df_time_series_scaled['Code'].astype(str).str.zfill(6)

In [7]:
data = df_time_series # Data Before Scaling
data_scaled = df_time_series_scaled # Data After Scaling

# train, test dataset split
train_data = data[(data['Date'] >= '2017-01-01') & (data['Date'] <= '2020-12-31')]
test_data = data[(data['Date'] >= '2021-01-01') & (data['Date'] <= '2021-12-31')]

# train, test dataset split (scaled) 
train_data_scaled = data_scaled[(data_scaled['Date'] >= '2017-01-01') & (data_scaled['Date'] <= '2020-12-31')]
test_data_scaled = data_scaled[(data_scaled['Date'] >= '2021-01-01') & (data_scaled['Date'] <= '2021-12-31')]

print(train_data.shape, test_data.shape)
print(train_data_scaled.shape, test_data_scaled.shape)

(836912, 543) (221193, 543)
(836912, 543) (221193, 543)


We set the learning dataset from 2017 to 2020 and the test dataset from 2021.

In [21]:
drop_cols = ['D-9_trading_value', 'D-9_MFI', 'D-9_OBV', 'D-9_FI', 'D-9_EOM_EMV', 'D-9_NVI', 'D-8_trading_value', 'D-8_MFI', 'D-8_OBV', 'D-8_FI', 'D-8_EOM_EMV', 'D-8_NVI', 'D-7_trading_value', 'D-7_MFI', 'D-7_OBV', 'D-7_FI', 'D-7_EOM_EMV', 'D-7_NVI', 'D-6_trading_value', 'D-6_MFI', 'D-6_OBV', 'D-6_FI', 'D-6_EOM_EMV', 'D-6_NVI', 'D-5_trading_value', 'D-5_MFI', 'D-5_OBV', 'D-5_FI', 'D-5_EOM_EMV', 'D-5_NVI', 'D-4_trading_value', 'D-4_MFI', 'D-4_OBV', 'D-4_FI', 'D-4_EOM_EMV', 'D-4_NVI', 'D-3_trading_value', 'D-3_MFI', 'D-3_OBV', 'D-3_FI', 'D-3_EOM_EMV', 'D-3_NVI', 'D-2_trading_value', 'D-2_MFI', 'D-2_OBV', 'D-2_FI', 'D-2_EOM_EMV', 'D-2_NVI', 'D-1_trading_value', 'D-1_MFI', 'D-1_OBV', 'D-1_FI', 'D-1_EOM_EMV', 'D-1_NVI', 'D0_trading_value', 'D0_MFI', 'D0_OBV', 'D0_FI', 'D0_EOM_EMV', 'D0_NVI']
df_time_series = df_time_series.drop(columns=drop_cols)
df_time_series_scaled = df_time_series_scaled.drop(columns=drop_cols)

In [29]:
drop_df_time_series_scaled

Unnamed: 0_level_0,Code,Date,D-9_Open,D-9_High,D-9_Low,D-9_Close,D-9_Volume,D-9_Change,D-9_MA5,D-9_MA20,...,D0_TSI,D0_UO,D0_SR,D0_WR,D0_AO,D0_KAMA,D0_ROC,D0_PPO,D0_PVO,next_change
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,000020,2001-07-10,1.013,1.031,0.978,2250,344510,0.000,1.023,0.998,...,15.296,44.290,36.170,-63.830,132.794,0.644,-5.957,3.026,27.552,-0.118
1,000020,2001-07-11,1.004,1.009,0.960,2210,204060,-0.018,1.008,1.009,...,8.221,35.134,1.460,-98.540,76.906,0.689,-14.474,1.724,22.436,0.005
2,000020,2001-07-12,1.000,1.014,0.955,2230,365960,0.009,1.015,1.035,...,3.478,34.961,3.285,-96.715,1.106,0.782,-12.889,0.707,15.854,-0.061
3,000020,2001-07-13,1.009,1.040,1.000,2310,380280,0.036,1.009,1.028,...,-1.719,32.069,2.108,-97.892,-75.082,0.778,-18.222,-0.559,9.551,-0.038
4,000020,2001-07-16,1.000,1.004,0.974,2250,190530,-0.026,0.974,0.993,...,-6.472,30.974,4.762,-95.238,-175.894,0.830,-19.910,-1.835,4.312,-0.040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3888285,009900,2022-12-21,0.991,1.000,0.982,16500,230048,-0.015,1.018,1.046,...,-17.251,36.890,1.695,-98.305,-1161.324,1.261,-10.920,-2.691,-28.694,0.026
3888286,009900,2022-12-22,1.015,1.015,0.991,16600,216588,0.006,1.019,1.056,...,-17.534,43.377,17.647,-82.353,-1302.206,1.276,-5.357,-2.709,-28.248,-0.047
3888287,009900,2022-12-23,0.997,1.003,0.991,16450,165663,-0.009,1.001,1.043,...,-19.615,29.963,0.000,-100.000,-1426.765,1.243,-9.552,-3.063,-25.648,-0.007
3888288,009900,2022-12-26,1.006,1.009,0.964,16100,412725,-0.021,1.002,1.046,...,-21.508,40.159,21.818,-78.182,-1606.765,1.303,-8.788,-3.362,-23.388,0.000


# 2. Trader Definition

  Next, define a trader. In this file, we will use the LightGBM, XGBoost, RandomForest, and LSTM models defined in the `01.trader_definition.ipynb` file.

In [22]:
lst_trader = [] 

### 1) LightGBM 

In [23]:
from lightgbm import LGBMClassifier

# conditional_buyer: Object that determines acquisition based on data filtering conditions 
b1_lg = sai.ConditionalBuyer()

def sampling1(df): # Create a conditional function
    condition1 = (-0.3 <= df.D0_Change) & (df.D0_Change <= 0.3) # Remove exceptions that exceed upper and lower limits
    condition2 = df.D0_trading_value >= 1000000000 # condition 1: Transaction amount of more than 1 billion won 
    condition3 = (-0.05 >= df.D0_Change) | (0.05 <= df.D0_Change) # condition 2: Today's stock price change rate is more than 5%
    condition = condition1 & condition2 & condition3
    return condition

b1_lg.condition = sampling1  # Define the condition function directly (sampling1) and store it in the condition property 


# machinelearning_buyer: Object that determines acquisition by machine learning model
b2_lg = sai.MachinelearningBuyer()

# Save user-defined models to algorithm properties
scale_pos_weight = round(72/28 , 2)
params = {  'random_state' : 42,
            'scale_pos_weight' : scale_pos_weight,
            'learning_rate' : 0.1, 
            'num_iterations' : 1000,
            'max_depth' : 4,
            'n_jobs' : 30,
            'boost_from_average' : False,
            'objective' : 'binary' }

b2_lg.algorithm =  LGBMClassifier( **params )


# SubSeller: Object that determines selling all of the following days
sell_all = sai.SubSeller() 


# Trader Object   
t1 = sai.Trader()
t1.name = 'saiLightGBM' # Trader's name
t1.label = 'class&0.02' # Set the Trader dependent variable (do not set if it is regression analysis) 
t1.buyer = sai.Buyer([b1_lg, b2_lg]) # [ conditional buyer, machinelearning buyer ] 
t1.seller = sai.Seller(sell_all)

lst_trader.append(t1)

### 2) XGBoost 

In [24]:
from xgboost import XGBClassifier

b1_xgb = sai.ConditionalBuyer() 

def sampling2(df): 
    condition1 = (-0.3 <= df.D0_Change) & (df.D0_Change <= 0.3) 
    condition2 = df.D0_trading_value >= 1000000000 
    condition3 = (-0.05 >= df.D0_Change) | (0.05 <= df.D0_Change) 
    condition = condition1 & condition2 & condition3
    return condition

b1_xgb.condition = sampling2


b2_xgb = sai.MachinelearningBuyer()  

scale_pos_weight = round(72/28 , 2)
b2_xgb.algorithm = XGBClassifier(random_state = 42,
                   n_jobs=30,
                   scale_pos_weight=scale_pos_weight,
                   learning_rate=0.1,
                   max_depth=4,
                   n_estimators=1000,
                   )  

sell_all = sai.SubSeller()


t2 = sai.Trader()
t2.name = 'saiXGboost' 
t2.label = 'class&0.02' 
t2.buyer = sai.Buyer([b1_xgb, b2_xgb])
t2.seller = sai.Seller(sell_all) 

lst_trader.append(t2) 

### 3) RandomForest 

In [25]:
from sklearn.ensemble import RandomForestClassifier

b1_rf = sai.ConditionalBuyer()

def sampling3(df):  
    condition1 = (-0.3 <= df.D0_Change) & (df.D0_Change <= 0.3) 
    condition2 = df.D0_trading_value >= 1000000000 
    condition3 = (-0.05 >= df.D0_Change) | (0.05 <= df.D0_Change) 
    condition = condition1 & condition2 & condition3
    return condition

b1_rf.condition = sampling3


b2_rf = sai.MachinelearningBuyer()  

b2_rf.algorithm = RandomForestClassifier() 


sell_all = sai.SubSeller() 


t3 = sai.Trader()
t3.name = 'saiDecisionTree'  
t3.label = 'class&0.02' 
t3.buyer = sai.Buyer([b1_rf, b2_rf]) 
t3.seller = sai.Seller(sell_all)

lst_trader.append(t3) 

### 4) LSTM 

In [26]:
from tensorflow import keras

b1_ls = sai.ConditionalBuyer()

def sampling4(df): 
    condition1 = (-0.3 <= df.D0_Change) & (df.D0_Change <= 0.3) 
    condition2 = df.D0_trading_value >= 1000000000 
    condition3 = (-0.05 >= df.D0_Change) | (0.05 <= df.D0_Change) 
    condition = condition1 & condition2 & condition3
    return condition

b1_ls.condition = sampling4


b2_ls = sai.MachinelearningBuyer()

# ⭐ User-defined functions (users who want deep learning modeling)
def transform(data): # A function that converts into a two-dimensional structure / data: list (lst_time_series)
    data_2d = []
    n_col = int(len(data[0]) / 10) 
    for row in data:      
        data_2d.append([])
        for i in range(0, len(row), n_col):
            data_2d[-1].append(row[i:i+n_col])
    
    return np.array(data_2d)
    

# Directly define a two-dimensional structure transformation function (transform) and store it in the data_transform property
b2_ls.data_transform = transform 

model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=(10, 48)))
model.add(keras.layers.LSTM(128, activation='selu', return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(64, activation='selu', return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(32, activation='selu', return_sequences=False))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1, activation='sigmoid'))
    
model.compile(optimizer=keras.optimizers.Adam(
    learning_rate=keras.optimizers.schedules.ExponentialDecay(0.05,decay_steps=100000,decay_rate=0.96)), 
    loss="binary_crossentropy",
    metrics=['accuracy'])

b2_ls.algorithm =  model


sell_all = sai.SubSeller() 


t4 = sai.Trader()
t4.name = 'saiLSTM' 
t4.label = 'class&0.02' 
t4.buyer = sai.Buyer([b1_ls, b2_ls]) 
t4.seller = sai.Seller(sell_all)

lst_trader.append(t4)

# 3. Trader(Model) Fitting & Evauation

### 1) Save Dataset to Traders 

In [27]:
sai.save_dataset(lst_trader, train_data, test_data, train_data_scaled, test_data_scaled)

== saiLightGBM ==
== train_code_date: (836912, 2),  test_code_date: (221193, 2) ==
== trainX: (836912, 540),  testX: (221193, 540) ==
== trainX_scaled: (836912, 540),  testX_scaled: (221193, 540) ==
== trainY: (836912,),  testY: (221193,) ==
== trainY_classification: (836912,),  testY_classification: (221193,) ==

== saiXGboost ==
== train_code_date: (836912, 2),  test_code_date: (221193, 2) ==
== trainX: (836912, 540),  testX: (221193, 540) ==
== trainX_scaled: (836912, 540),  testX_scaled: (221193, 540) ==
== trainY: (836912,),  testY: (221193,) ==
== trainY_classification: (836912,),  testY_classification: (221193,) ==

== saiDecisionTree ==
== train_code_date: (836912, 2),  test_code_date: (221193, 2) ==
== trainX: (836912, 540),  testX: (221193, 540) ==
== trainX_scaled: (836912, 540),  testX_scaled: (221193, 540) ==
== trainY: (836912,),  testY: (221193,) ==
== trainY_classification: (836912,),  testY_classification: (221193,) ==

== saiLSTM ==
== train_code_date: (836912, 2),  t

### 2) Model fitting

In [28]:
sai.trader_train(lst_trader) 

== saiLightGBM Model Fitting Completed ==


XGBoostError: [15:14:14] ../src/data/data.cc:1111: Check failed: valid: Input data contains `inf` or `nan`
Stack trace:
  [bt] (0) /home/sookim06/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x16b9c9) [0x7f95bcd599c9]
  [bt] (1) /home/sookim06/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x18fe0d) [0x7f95bcd7de0d]
  [bt] (2) /home/sookim06/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x1c0eea) [0x7f95bcdaeeea]
  [bt] (3) /home/sookim06/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x180575) [0x7f95bcd6e575]
  [bt] (4) /home/sookim06/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(XGDMatrixCreateFromDense+0x453) [0x7f95bccb84f3]
  [bt] (5) /package/anaconda3.9/lib/python3.9/lib-dynload/../../libffi.so.7(+0x69dd) [0x7f9612fb09dd]
  [bt] (6) /package/anaconda3.9/lib/python3.9/lib-dynload/../../libffi.so.7(+0x6067) [0x7f9612fb0067]
  [bt] (7) /package/anaconda3.9/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x140f6) [0x7f9612fca0f6]
  [bt] (8) /package/anaconda3.9/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x1073e) [0x7f9612fc673e]



### 3) Model evaluation and threshold settings
#### Model evaluationModel evaluation

In [None]:
sai.get_eval_by_threshold(lst_trader)

#### threshold settings

In [None]:
sai.set_threshold(lst_trader, lst_threshold=[0.8], histogram=True)

# 4. Back-Testing

### 1) Making a sales log

In [None]:
df_signal_all = sai.decision(lst_trader, dtype='test')
df_signal_all

### 2) Simulation: Calculate the yield


In [None]:
df_history_all = sai.simulation(df_signal_all, init_budget=10000000, init_stock={})
df_history_all

### 3) Leader Board

In [None]:
sai.leaderboard(df_history_all)

### 4) Visualize Results

In [None]:
sai.yield_plot(df_history_all)