In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import polars as pl
import lightgbm as lgb
import joblib
from lightgbm import LGBMRegressor

pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)

In [2]:
multi_index = ['date_id', 'time_id', 'symbol_id']
feature_col = [f'feature_{i:02d}' for i in range(79)]

weight = ['weight']
target = ['responder_6']
start_date = 1100
interest_col = multi_index + feature_col + weight + target

In [None]:
df = (pl.scan_parquet('data/train_with_lag.parquet')
      .sort(['date_id', 'symbol_id', 'time_id', ])
      .collect())
print(df.shape)

In [None]:
df.head()

In [5]:
X = df.drop(multi_index+weight+target).to_pandas()
y = df[target].to_pandas()
sample_weight = df[weight].to_pandas().values.flatten()

In [6]:
parm = {'n_estimators':1000, 'max_depth':6, 'num_leaves':64, 'learning_rate':0.05,
        'colsample_bytree':0.5, 'subsample':0.5, 'subsample_freq':100,
        'importance_type':'gain', 'boosting_type':'dart' ,'random_state':0, 'n_jobs':-1}
model = LGBMRegressor(**parm)

In [None]:
model.fit(X, y, sample_weight=sample_weight)

In [None]:
lgb.plot_importance(model, max_num_features=30, importance_type='gain')  # 或 'gain'
plt.title("Feature Importance")
plt.show()

In [None]:
data = {
    'model' : model, 
    'feature' : X.columns.tolist(),
    'parmaters' : parm
    }

joblib.dump(data, './model/lgbm_lag05_d7_n2000.pkl')