# Other Solutions From Kaggle

| Date | User | Change Type | Remarks |  
| ---- | ---- | ----------- | ------- |
| 10/07/2025   | Martin | Create  | Notebook to explore other's solutions on Kaggle | 

# Content

* [Extending with Lots of Lag Features](#extending-with-lots-of-lag-features)

# Extending with Lots of Lag Features

_Notebook adapted from: https://www.kaggle.com/code/ruanpengju/drw-crypto-all-u-need-is-give-up_

Exploring the addition of many lag variables and exponential decay to see interactions with the label. I want to learn more about what each function is doing.

In [1]:
%load_ext watermark

In [1]:
import polars as pl
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [11]:
train = pl.read_parquet("data/train.parquet")
test = pl.read_parquet("data/test.parquet")
# submission = pl.read_csv("data/sample_submission.csv")

feat_impt = pl.read_csv("data/clean/perm_impt_45.csv")

In [12]:
train.shape, test.shape
# train.shape
# test.shape

((525887, 897), (538150, 896))

Unsure how these features were selected

_NOTE: Might try using my own permutation importance based on what I found with 45 features_

In [13]:
# Define global variables

# # Baseline
# FEATURES = [
#   "X863", "X856", "X344", "X598", "X862", "X385", "X852", "X603", "X860", "X674",
#   "X415", "X345", "X137", "X855", "X174", "X302", "X178", "X532", "X168", "X612",
#   "bid_qty", "ask_qty", "buy_qty", "sell_qty", "volume", "X888", "X421", "X333",
#   "X21", "X20", "X28", "X29", "X19", "X27", "X22", "X39",
# ]

# Feature Importance
FEATURES = list(feat_impt.columns)[:-2] + ["bid_qty", "ask_qty", "buy_qty", "sell_qty", "volume"]

RANDOM_STATE = 123
LAGS = [1, 2, 3, 4, 20, 
  60, # 1 hour
  180, # 3 hours
  240, # 4 hours
  60*24, # 1 day
  60*24*7, # 1 week
  60*24*14, # 2 weeks
  60*24*21 # 3 weeks
]

len(FEATURES)

58

In [5]:
def create_time_decay_weights(n: int, decay: float=0.9) -> np.ndarray:
  """Creates a set of exponentially decaying weights that is normalised

  Args:
      n (int): number of weights to create
      decay (float, optional): exponential decay factor. Defaults to 0.9.

  Returns:
      np.ndarray: numpy array of weights
  """
  positions = np.arange(n)
  normalized = positions / (n - 1)
  weights = decay ** (1.0 - normalized)

  return weights * n / weights.sum()

In [25]:
def add_features(df: pl.DataFrame) -> pl.DataFrame:
  # Create interaction features
  df = df.with_columns(
    bid_ask_interaction = pl.col('bid_qty') * pl.col('ask_qty'),
    bid_buy_interaction = pl.col('bid_qty') * pl.col('buy_qty'),
    bid_sell_interaction = pl.col('bid_qty') * pl.col('sell_qty'),
    ask_buy_interaction = pl.col('ask_qty') * pl.col('buy_qty'),
    ask_sell_interaction = pl.col('ask_qty') * pl.col('sell_qty'),
  )

  # Additional financial metrics
  df = df.with_columns(
    volume_weighted_sell = pl.col('sell_qty') * pl.col('volume'),
    buy_sell_ratio = pl.col('buy_qty') / (pl.col('sell_qty') + 1e-6),
    selling_pressure = pl.col('sell_qty') / (pl.col('volume') + 1e-6),
    log_volume = np.log1p(pl.col('volume')),
    effective_spread_proxy = np.abs(pl.col('buy_qty') - pl.col('sell_qty')) / (pl.col('volume') + 1e-6),
    bid_ask_imbalance = (pl.col('bid_qty') - pl.col('ask_qty')) / (pl.col('bid_qty') + pl.col('ask_qty') + 1e-6),
    order_flow_imbalance = (pl.col('buy_qty') - pl.col('sell_qty')) / (pl.col('buy_qty') + pl.col('sell_qty') + 1e-6),
    liquidity_ratio = (pl.col('bid_qty') + pl.col('ask_qty')) / (pl.col('volume') + 1e-6)
  )

  new_features = [
    "bid_ask_interaction",
    "bid_buy_interaction",
    "bid_sell_interaction",
    "ask_buy_interaction",
    "ask_sell_interaction",
    "volume_weighted_sell",
    "buy_sell_ratio",
    "selling_pressure",
    "log_volume",
    "effective_spread_proxy",
    "bid_ask_imbalance",
    "order_flow_imbalance",
    "liquidity_ratio"
  ]

  # Generate volality features - Moving Average and Standard Deviation
  print("Generating volatility and trend features")
  features_for_stats = ['bid_qty', 'ask_qty', 'buy_qty', 'sell_qty', 'volume', 'bid_ask_imbalance', 'order_flow_imbalance']
  windows = [10, 60]

  for col in features_for_stats:
    if col in df.columns:
      for w in windows:
        df = df.with_columns(
          pl.col(col).rolling_mean(window_size=w, min_samples=1).alias(f"{col}_ma_{w}"),
          pl.col(col).rolling_std(window_size=w, min_samples=1).alias(f"{col}_std_{w}"),
        )
  
  # Generate lead and lag features - For all original and newly created features
  # print("Generate lead/ lag features")
  # features_for_lags = FEATURES + new_features
  # for col in features_for_lags:
  #   if col in df.columns:
  #     for lag in LAGS:
  #       df = df.with_columns(
  #         pl.col(col).shift(lag).alias(f"{col}_lag_{lag}"),
  #         pl.col(col).shift(-lag).alias(f"{col}_lead_{lag}"),
  #       )
  
  # Handle infinite and missing values
  df = df.with_columns(
    pl.when(pl.col(pl.Float64).is_infinite())
    .then(None)
    .otherwise(pl.col(pl.Float64))
    .name.keep()
  )
  df = df.with_columns(pl.all().fill_null(strategy='forward'))
  df = df.with_columns(pl.all().fill_null(strategy='backward'))
  df = df.with_columns(pl.all().fill_null(0))

  return df

In [8]:
# Function to shuffle the data according to random_state 700
def sort_test_df_by_time(df):
  assert len(df.shape) == 2
  assert df.shape[0] == 538150

  n = df.shape[0]
  t = pd.Series(np.arange(n))
  t = t.sample(n=n, random_state=700)

  t = pd.Series(np.arange(n), index=t.to_numpy()).sort_index()
  return df.iloc[t.to_numpy()]

In [26]:
# Apply transformations
df_train = train.select(FEATURES)
df_train = add_features(df_train)
df_train = df_train.with_columns(
  label=train['label']
)
df_train.shape

df_test = test.select(FEATURES)
df_test = add_features(df_test)
df_test.shape

# # Uncomment: To save
# df_train.write_parquet("data/clean/others.parquet")
# df_test.write_parquet("data/clean/test_others.parquet")

Generating volatility and trend features
Generating volatility and trend features


(538150, 77)

In [27]:
df_train.head()

X863,X856,X344,X598,X862,X385,X852,X603,X860,X674,X415,X345,X137,X855,X174,X302,X178,X532,X168,X612,bid_qty,ask_qty,buy_qty,sell_qty,volume,X888,X421,X333,X21,X20,X28,X29,X19,X27,X22,X39,bid_ask_interaction,…,volume_weighted_sell,buy_sell_ratio,selling_pressure,log_volume,effective_spread_proxy,bid_ask_imbalance,order_flow_imbalance,liquidity_ratio,bid_qty_ma_10,bid_qty_std_10,bid_qty_ma_60,bid_qty_std_60,ask_qty_ma_10,ask_qty_std_10,ask_qty_ma_60,ask_qty_std_60,buy_qty_ma_10,buy_qty_std_10,buy_qty_ma_60,buy_qty_std_60,sell_qty_ma_10,sell_qty_std_10,sell_qty_ma_60,sell_qty_std_60,volume_ma_10,volume_std_10,volume_ma_60,volume_std_60,bid_ask_imbalance_ma_10,bid_ask_imbalance_std_10,bid_ask_imbalance_ma_60,bid_ask_imbalance_std_60,order_flow_imbalance_ma_10,order_flow_imbalance_std_10,order_flow_imbalance_ma_60,order_flow_imbalance_std_60,label
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
0.21857,-0.216525,-0.362607,0.075641,-1.027483,-0.589209,0.878094,0.839766,0.541286,0.165674,-1.035101,0.260984,0.656033,0.418618,-0.167949,-0.454341,-0.033564,0.594644,-0.258087,0.817685,15.283,8.425,176.405,44.984,221.389,0.210153,-0.62566,0.362469,0.816321,0.58567,1.183991,1.474789,0.883229,0.953631,0.529973,0.966243,128.759275,…,9958.962776,3.921505,0.20319,5.404428,0.59362,0.289269,0.59362,0.107088,15.283,16.480538,15.283,16.480538,8.425,4.305573,8.425,4.305573,176.405,247.092101,176.405,247.092101,44.984,195.844537,44.984,195.844537,221.389,442.936637,221.389,442.936637,0.289269,0.421841,0.289269,0.421841,0.59362,0.249693,0.59362,0.249693,0.562539
0.088014,-0.180112,-0.376922,0.067653,-1.024055,-0.588391,0.891413,0.839766,0.450331,0.165214,-1.029366,0.260621,0.655122,0.424977,-0.167483,-0.450767,-0.033518,0.578271,-0.256658,0.817685,38.59,2.336,525.846,321.95,847.796,0.209573,-0.623925,0.36046,0.674792,0.442391,1.015943,1.312735,0.591695,0.776628,0.460741,0.953242,90.14624,…,272947.9222,1.633316,0.379749,6.743819,0.240501,0.885843,0.240501,0.048273,26.9365,16.480538,26.9365,16.480538,5.3805,4.305573,5.3805,4.305573,351.1255,247.092101,351.1255,247.092101,183.467,195.844537,183.467,195.844537,534.5925,442.936637,534.5925,442.936637,0.587556,0.421841,0.587556,0.421841,0.417061,0.249693,0.417061,0.249693,0.533686
-0.147363,-0.265966,-0.368205,0.067288,-1.024056,-0.587575,0.859856,0.833221,0.420681,0.164756,-1.023663,0.26026,0.654213,0.409942,-0.167019,-0.45531,-0.033471,0.577898,-0.255236,0.809375,0.442,60.25,159.227,136.369,295.596,0.208993,-0.622194,0.358463,0.610116,0.376524,0.917205,1.219124,0.457268,0.670816,0.429751,0.952246,26.6305,…,40310.130924,1.167619,0.461336,5.692371,0.077329,-0.985435,0.077329,0.205321,18.105,19.229931,18.105,19.229931,23.670333,31.82488,23.670333,31.82488,287.159333,206.887082,287.159333,206.887082,167.767667,141.127419,167.767667,141.127419,454.927,342.251674,454.927,342.251674,0.063226,0.955898,0.063226,0.955898,0.303817,0.263905,0.303817,0.263905,0.546505
-0.09459,-0.322244,-0.356326,0.069881,-1.024058,-0.647419,0.839141,0.826309,0.386584,0.164299,-1.21445,0.259899,0.653305,0.400075,-0.18045,-0.44363,-0.033425,0.577524,-0.281572,0.807937,4.865,21.016,335.742,124.963,460.705,0.208416,-0.718833,0.356477,0.618529,0.383696,1.044941,1.353001,0.468778,0.789646,0.435326,1.003724,102.24284,…,57571.078915,2.686731,0.271243,6.134926,0.457514,-0.624049,0.457514,0.056177,14.795,17.039696,14.795,17.039696,23.00675,26.018776,23.00675,26.018776,299.305,170.660224,299.305,170.660224,157.0665,117.200792,157.0665,117.200792,456.3715,279.462255,456.3715,279.462255,-0.108593,0.852788,-0.108593,0.852788,0.342241,0.228771,0.342241,0.228771,0.357703
0.162221,-0.369625,-0.347715,0.072288,-1.02406,-0.646521,0.82168,0.800184,0.389969,0.163843,-1.207722,0.259538,0.652398,0.391759,-0.179949,-0.438645,-0.033378,0.577151,-0.280012,0.795718,27.158,3.451,98.411,44.407,142.818,0.207839,-0.716839,0.354503,0.623192,0.3871,1.047708,1.36188,0.472732,0.78375,0.439034,0.993904,93.722258,…,6342.118926,2.216115,0.310934,4.968549,0.378132,0.774511,0.378132,0.214322,17.2676,15.758559,17.2676,15.758559,19.0956,24.170602,19.0956,24.170602,259.1262,172.960584,259.1262,172.960584,134.5346,113.315717,134.5346,113.315717,393.6608,279.709712,393.6608,279.709712,0.068028,0.837502,0.068028,0.837502,0.349419,0.198771,0.349419,0.198771,0.362452


In [28]:
df_test.head()

X863,X856,X344,X598,X862,X385,X852,X603,X860,X674,X415,X345,X137,X855,X174,X302,X178,X532,X168,X612,bid_qty,ask_qty,buy_qty,sell_qty,volume,X888,X421,X333,X21,X20,X28,X29,X19,X27,X22,X39,bid_ask_interaction,…,ask_sell_interaction,volume_weighted_sell,buy_sell_ratio,selling_pressure,log_volume,effective_spread_proxy,bid_ask_imbalance,order_flow_imbalance,liquidity_ratio,bid_qty_ma_10,bid_qty_std_10,bid_qty_ma_60,bid_qty_std_60,ask_qty_ma_10,ask_qty_std_10,ask_qty_ma_60,ask_qty_std_60,buy_qty_ma_10,buy_qty_std_10,buy_qty_ma_60,buy_qty_std_60,sell_qty_ma_10,sell_qty_std_10,sell_qty_ma_60,sell_qty_std_60,volume_ma_10,volume_std_10,volume_ma_60,volume_std_60,bid_ask_imbalance_ma_10,bid_ask_imbalance_std_10,bid_ask_imbalance_ma_60,bid_ask_imbalance_std_60,order_flow_imbalance_ma_10,order_flow_imbalance_std_10,order_flow_imbalance_ma_60,order_flow_imbalance_std_60
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
-0.270279,0.888982,0.183128,1.002431,0.242743,-0.28028,1.145126,1.244533,0.168711,1.030041,-0.707083,-0.008769,-0.078846,0.602466,0.020813,-0.606213,0.26272,0.010969,0.034217,0.874687,0.114,12.121,10.587,10.971,21.558,1.567979,-0.744742,-4.2232e-08,-0.041204,-0.079453,-0.243297,-0.186772,-0.227923,-0.276421,0.048349,0.475384,1.381794,…,132.979491,236.512818,0.964999,0.508906,3.11609,0.017812,-0.981365,-0.017812,0.567539,0.114,1.634831,0.114,1.634831,12.121,6.476391,12.121,6.476391,10.587,88.850795,10.587,88.850795,10.971,0.942573,10.971,0.942573,21.558,89.793369,21.558,89.793369,-0.981365,0.623587,-0.981365,0.623587,-0.017812,0.602563,-0.017812,0.602563
-0.356306,0.174188,1.252857,0.395232,0.955962,0.164827,0.831423,-0.247774,-0.502716,0.80456,-0.427253,-0.101519,-0.359345,1.187569,0.388479,0.340249,0.870314,-0.26096,0.223421,-0.43828,2.426,2.962,136.241,12.304,148.545,0.050925,-0.2114,0.002842,-0.227728,-0.047501,0.128473,0.023283,0.139376,0.179227,-0.145745,0.140607,7.185812,…,36.444448,1827.69768,11.072902,0.08283,5.007597,0.83434,-0.09948,0.83434,0.036272,1.27,1.634831,1.27,1.634831,7.5415,6.476391,7.5415,6.476391,73.414,88.850795,73.414,88.850795,11.6375,0.942573,11.6375,0.942573,85.0515,89.793369,85.0515,89.793369,-0.540423,0.623587,-0.540423,0.623587,0.408264,0.602563,0.408264,0.602563
0.268433,-0.26551,0.084397,0.092053,-0.488119,-0.27619,0.983454,0.625488,0.273964,0.795365,-0.193592,0.593242,0.055477,0.589635,0.043473,-0.43891,-0.099887,-0.121893,0.10822,0.085317,1.085,2.343,23.39,57.171,80.561,0.072535,-0.170362,0.208982,0.060063,0.10225,0.044225,0.003683,0.197985,0.046509,-0.139541,0.179177,2.542155,…,133.951653,4605.752931,0.409123,0.709661,4.401351,0.419322,-0.366978,-0.419322,0.042552,1.208333,1.160924,1.208333,1.160924,5.808667,5.475395,5.808667,5.475395,56.739333,69.147418,56.739333,69.147418,26.815333,26.297226,26.815333,26.297226,83.554667,63.546409,83.554667,63.546409,-0.482608,0.45217,-0.482608,0.45217,0.132402,0.640188,0.132402,0.640188
-0.364048,1.247777,0.488409,0.320689,0.145365,-0.155654,1.79358,0.781413,-0.312154,0.82964,0.318894,0.001211,-0.021331,0.945045,2.992349,-0.622701,0.550969,-0.175071,3.234477,0.081534,14.793,1.117,116.518,13.082,129.6,1.381752,0.257872,7.2901e-09,-0.021846,-0.043811,0.156303,0.179812,-0.101949,0.151133,0.030501,0.133269,16.523781,…,14.612594,1695.4272,8.906741,0.100941,4.872139,0.798117,0.859585,0.798117,0.122762,4.6045,6.858155,4.6045,6.858155,4.63575,5.04872,4.63575,5.04872,71.684,63.882308,71.684,63.882308,23.382,22.542859,23.382,22.542859,95.066,56.763902,95.066,56.763902,-0.147059,0.765947,-0.147059,0.765947,0.298831,0.619694,0.298831,0.619694
0.184046,0.286078,0.038548,0.220992,-0.38339,0.086839,0.673395,1.182823,0.065162,1.010712,-0.323887,0.002133,-0.483244,0.528421,1.913289,-0.103435,-0.147323,-0.177636,2.550381,0.511809,0.033,14.178,43.8,49.836,93.636,1.050577,-0.237891,1.0734e-09,-0.064359,0.023939,-0.308402,-0.341364,0.220629,-0.259154,-0.030559,0.406146,0.467874,…,706.574808,4666.443696,0.878883,0.532231,4.550038,0.064462,-0.995356,-0.064462,0.151769,3.6902,6.281356,3.6902,6.281356,6.5442,6.109671,6.5442,6.109671,66.1072,56.711687,66.1072,56.711687,28.6728,22.827576,28.6728,22.827576,94.78,49.163141,94.78,49.163141,-0.316719,0.764152,-0.316719,0.764152,0.226172,0.560725,0.226172,0.560725


---

# Model Testing

In [26]:
import optuna
import lightgbm as lgb
from scipy.stats import pearsonr

In [30]:
# Create weights
weights = create_time_decay_weights(len(df_train))

In [31]:
y = df_train['label'].to_numpy()
X = df_train.drop('label').to_numpy()

# num_test = int(X.shape[0] * 0.2)
# train_X, train_y = X[:-num_test], y[:-num_test]
# valid_X, valid_y = X[-num_test:], y[-num_test:]

# train_weights = weights[:-num_test]

Using Optuna to find the best hyperparameters

In [32]:
X.shape, y.shape

((525887, 77), (525887,))

In [29]:
X = df_train_add.to_numpy()
y = labels.to_numpy()

In [30]:
# Define Optuna hyperparameter search
def objective(trial):
  num_test = int(X.shape[0] * 0.2)
  train_X, train_y = X[:-num_test], y[:-num_test]
  valid_X, valid_y = X[-num_test:], y[-num_test:]

  dtrain = lgb.Dataset(train_X, label=train_y)

  params = {
    "objective": "regression",
    "metric": trial.suggest_categorical("metric", ['rmse', 'l1', 'l2']),
    "verbosity": -1,
    "boosting_type": "gbdt",
    "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
    "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
    "num_leaves": trial.suggest_int("num_leaves", 2, 256),
    "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
    "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
    "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
    "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
  }

  gbm = lgb.train(params, dtrain)
  preds = gbm.predict(valid_X)
  correlation = pearsonr(preds, valid_y).statistic
  return correlation

In [31]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-07-11 18:55:55,783] A new study created in memory with name: no-name-76c6b632-7f89-42f3-b77a-41c818fb9fe9
[I 2025-07-11 18:55:57,712] Trial 0 finished with value: 0.025552969417324708 and parameters: {'metric': 'l1', 'lambda_l1': 3.2469963065435246e-07, 'lambda_l2': 1.31301932373552e-07, 'num_leaves': 96, 'feature_fraction': 0.7092315778966206, 'bagging_fraction': 0.6116219737428109, 'bagging_freq': 5, 'min_child_samples': 27}. Best is trial 0 with value: 0.025552969417324708.
[I 2025-07-11 18:56:00,176] Trial 1 finished with value: 0.01868931957686936 and parameters: {'metric': 'l2', 'lambda_l1': 0.8905710683121216, 'lambda_l2': 1.7885688834874514, 'num_leaves': 223, 'feature_fraction': 0.42333073556158085, 'bagging_fraction': 0.9527305293421438, 'bagging_freq': 6, 'min_child_samples': 85}. Best is trial 0 with value: 0.025552969417324708.
[I 2025-07-11 18:56:02,687] Trial 2 finished with value: 0.0058303368238378185 and parameters: {'metric': 'rmse', 'lambda_l1': 7.0001802481

Number of finished trials: 30
Best trial:
  Value: 0.050368266287051565
  Params: 
    metric: l2
    lambda_l1: 0.0004025744541416658
    lambda_l2: 2.658383514719199e-06
    num_leaves: 23
    feature_fraction: 0.8718197529063584
    bagging_fraction: 0.45952494695797863
    bagging_freq: 1
    min_child_samples: 99


Train the full model

In [35]:
dtrain = lgb.Dataset(X, label=y, weight=weights)

params = {
  "objective": "regression",
  "metric": "l2",
  "verbosity": -1,
  "boosting_type": "gbdt",
  "lambda_l1": 1.9944400955988304e-07,
  "lambda_l2": 0.010561219417605745,
  "num_leaves": 62,
  "feature_fraction": 0.6126623123764985,
  "bagging_fraction": 0.8564319694337191,
  "bagging_freq": 3,
  "min_child_samples": 80,
}

gbm = lgb.train(params, dtrain)
# preds = gbm.predict(valid_X)
# correlation = pearsonr(preds, valid_y).statistic

In [17]:
preds = gbm.predict(valid_X)
correlation = pearsonr(preds, valid_y).statistic
correlation

0.9551323843131287

In [22]:
import pickle
with open('models/other_lgbm.pkl', 'wb') as f:
  pickle.dump(gbm, f)

standard = 0.8401975707471385

Make prediction on test set

In [11]:
import pickle
with open('models/other_lgbm.pkl', 'rb') as f:
  loaded_model = pickle.load(f)

In [36]:
preds = gbm.predict(df_test.to_numpy())

In [37]:
preds

array([ 0.31340408,  0.57086042, -0.04478809, ..., -0.09120982,
        0.0136488 ,  0.53911284])

In [38]:
scores = pl.DataFrame({
  'ID': range(1, df_test.shape[0]+1),
  'prediction': preds
})
scores.write_csv("results/lgbm_rolling.csv")

In [2]:
%watermark

Last updated: 2025-07-10T16:47:55.154475+08:00

Python implementation: CPython
Python version       : 3.11.9
IPython version      : 8.31.0

Compiler    : MSC v.1938 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 183 Stepping 1, GenuineIntel
CPU cores   : 20
Architecture: 64bit

