---

**In this notebook, we will use Ensembling Gradient Boosting trees for a multi-target regression problem, leveraging lagged targets to predict 424 outputs. To speed up inference, we adopt the long-format multi-output prediction method, which is much faster than the standard multi-output approach.**

**You will also find several useful techniques in this notebook, including:**

* How to create lagged targets and use them in the prediction step
* How to run Ensembling Gradient Boosting trees with lags targets
* How to build an optimized prediction function for API inference



---

In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings("ignore")

# Data loading
p = '/kaggle/input/mitsui-commodity-prediction-challenge/'
train = pd.read_csv(p+'train.csv')
trainl = pd.read_csv(p+'train_labels.csv')
traint = pd.read_csv(p+'target_pairs.csv')

def _handle_missing_values(data):
    data.interpolate(method='polynomial', order=3, inplace=True)
    data.clip(lower=-10, upper=10, inplace=True)
    return data

train = _handle_missing_values(train)
trainl = _handle_missing_values(trainl)

target_lag_1 = traint.loc[traint["lag"]==1,"target"].values
target_lag_2 = traint.loc[traint["lag"]==2,"target"].values
target_lag_3 = traint.loc[traint["lag"]==3,"target"].values
target_lag_4 = traint.loc[traint["lag"]==4,"target"].values

Features = [i for i in trainl.columns]

def create_lagged_labels(df):
    dt = pd.DataFrame()
    dt["date_id"] = df["date_id"]
    for f in Features[1:]:
        if f in target_lag_1:
            lag = 1
        elif f in target_lag_2:
            lag = 2
        elif f in target_lag_3:
            lag = 3
        elif f in target_lag_4:
            lag = 4    
        dt[f] = df[f].shift(lag).fillna(0)
    return df, dt

_, train_lagged = create_lagged_labels(trainl)

# Create training data
import gc
training_df = []
target_cols = [f"target_{i}" for i in range(424)]
for j, target_col in enumerate(target_cols):
    temp_train_df = pd.DataFrame()
    temp_train_df[Features] = train_lagged[Features]                     
    temp_train_df['target_id'] = j
    y = trainl[target_col].values
    temp_train_df['target'] = y
    mask = ~(np.isnan(y) | np.isinf(y) | (np.abs(y) > 1e10))
    training_df.append(temp_train_df[mask].copy())
    del temp_train_df, y
    gc.collect()

training_df = pd.concat(training_df).reset_index(drop=True)
Features2 = Features + ["target_id"]
X_train = training_df[Features2]
y_train = training_df["target"]

In [2]:
# Deeper Gradient Boosting models for regression
xgb_model = XGBRegressor(
    n_estimators=2000,
    max_depth=6,
    learning_rate=0.01,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=1,
    reg_lambda=1,
    random_state=42,
    tree_method="hist",
    device="cuda"
)

lgbm_model = LGBMRegressor(
    n_estimators=2000,
    max_depth=6,
    learning_rate=0.01,
    num_leaves=256,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=1,
    reg_lambda=1,
    random_state=42,
    device="gpu",
    verbose=-1
)

catboost_model = CatBoostRegressor(
    iterations=2000,
    depth=6,
    learning_rate=0.01,
    l2_leaf_reg=3,
    random_seed=42,
    loss_function='RMSE',
    task_type="GPU",
    verbose=False
)

# Append models to a list for later training / ensembling
models = [xgb_model, lgbm_model, catboost_model]
Models = []

# Train all models on the entire dataset (no target-specific training)
print("Training models on entire dataset...")
for model in models:
    model.fit(X_train, y_train)
    Models.append(model)

print(f"Models list created with {len(Models)} models.")

Training models on entire dataset...




Models list created with 3 models.


In [None]:
def ensemble_predict(models, X):
    """
    Predict using a list of trained models and return the averaged prediction.
    
    Parameters:
        models : list of trained models
        X      : numpy array or DataFrame of features
        
    Returns:
        ensemble_pred : averaged prediction across models
    """
    preds = [model.predict(X) for model in models]
    ensemble_pred = np.mean(preds, axis=0)
    return ensemble_pred

# Test the predictions
X_data = X_train.copy()
X_data["preds"] = ensemble_predict(Models, X_train)

# Convert to wide format (90 rows × 424 columns)
df_preds = X_data.copy()
df_preds['row'] = df_preds.groupby('target_id').cumcount()

# Pivot the table to wide format
df_wide = df_preds.pivot(index='row', columns='target_id', values='preds')
df_wide = df_wide.sort_index(axis=1)
df_wide.index = [i for i in df_wide.index]

# Rename columns
df_wide.columns = [f'target_{i}' for i in df_wide.columns]
print(f"Wide format shape: {df_wide.shape}")

In [4]:
def rank_correlation_sharpe_ratio(merged_df: pd.DataFrame) -> float:
    prediction_cols = [col for col in merged_df.columns if col.startswith('prediction_')]
    target_cols = [col for col in merged_df.columns if col.startswith('target_')]
    
    def _compute_rank_correlation(row):
        non_null_targets = [col for col in target_cols if not pd.isnull(row[col])]
        matching_predictions = [col for col in prediction_cols if col.replace('prediction', 'target') in non_null_targets]
        
        if not non_null_targets:
            raise ValueError('No non-null target values found')
        if row[non_null_targets].std(ddof=0) == 0 or row[matching_predictions].std(ddof=0) == 0:
            raise ZeroDivisionError('Denominator is zero, unable to compute rank correlation.')
        
        return np.corrcoef(
            row[matching_predictions].rank(method='average'), 
            row[non_null_targets].rank(method='average')
        )[0, 1]
    
    daily_rank_corrs = merged_df.apply(_compute_rank_correlation, axis=1)
    std_dev = daily_rank_corrs.std(ddof=0)
    
    if std_dev == 0:
        raise ZeroDivisionError('Denominator is zero, unable to compute Sharpe ratio.')
    
    sharpe_ratio = daily_rank_corrs.mean() / std_dev
    return float(sharpe_ratio)

def score(solution: pd.DataFrame, submission: pd.DataFrame) -> float:
    assert all(solution.columns == submission.columns)
    submission = submission.rename(columns={col: col.replace('target_', 'prediction_') for col in submission.columns})
    solution = solution.replace(0, None)
    return rank_correlation_sharpe_ratio(pd.concat([solution, submission], axis='columns'))

# Test scoring
score_value = score(trainl[Features[1:]], df_wide[Features[1:]])
print(f"SCORE: {score_value:.6f}")

SCORE: 1.316328


In [6]:
import polars as pl

def predict(
    test: pl.DataFrame,
    lag1: pl.DataFrame, 
    lag2: pl.DataFrame,
    lag3: pl.DataFrame,
    lag4: pl.DataFrame,
) -> pl.DataFrame:
    """
    Predicts target values using lag features.
    This is your working version from the notebook.
    """
    # Convert to pandas
    test_pd = test.to_pandas()
    lag1_pd = lag1.to_pandas()
    lag2_pd = lag2.to_pandas()
    lag3_pd = lag3.to_pandas()
    lag4_pd = lag4.to_pandas()
    
    # Combine lag features
    X_pred = pd.concat([
        test_pd[["date_id"]],
        lag1_pd[target_lag_1],
        lag2_pd[target_lag_2],
        lag3_pd[target_lag_3],
        lag4_pd[target_lag_4],
    ], axis=1)
    
    # If no rows, return all zeros
    if len(X_pred) == 0:
        return pl.DataFrame(0, schema=[(f"target_{i}", pl.Float64) for i in range(424)])
    
    # Fill nulls with 0
    X_pred = X_pred.fillna(0)
    
    # Prepare features for prediction
    n_targets = 424
    n_rows = X_pred.shape[0]
    
    # Create features for all targets
    features_array = np.tile(X_pred[Features[1:]].values, (n_targets, 1))
    target_ids = np.repeat(np.arange(n_targets), n_rows)
    
    # Create prediction DataFrame
    X_pred2 = pd.DataFrame({
        "date_id": np.tile(X_pred["date_id"].values, n_targets),
        **{feat: features_array[:, i] for i, feat in enumerate(Features[1:])},
        "target_id": target_ids,
        "row": np.tile(np.arange(n_rows), n_targets)
    })
    
    # Make predictions
    preds = ensemble_predict(Models, X_pred2[Features2])
    X_pred2 = X_pred2.assign(preds=preds)
    
    # Pivot to wide format
    df_wide = (
        X_pred2.groupby(["target_id", "row"])
        .agg({"preds": "first"})
        .reset_index()
        .pivot(index="row", columns="target_id", values="preds")
        .sort_index()
    )
    
    # Ensure correct column order
    df_wide = df_wide.reindex(columns=range(424), fill_value=0)
    df_wide.columns = [f"target_{i}" for i in range(424)]
    
    # Return last row as predictions
    result_df = df_wide.tail(1)
    return pl.from_pandas(result_df.reset_index(drop=True))

# Test the prediction function
def test_prediction():
    """Test the prediction function"""
    sample_test = pl.from_pandas(trainl[Features].iloc[:5])
    sample_lag1 = pl.from_pandas(trainl[target_lag_1].iloc[:5])
    sample_lag2 = pl.from_pandas(trainl[target_lag_2].iloc[:5])
    sample_lag3 = pl.from_pandas(trainl[target_lag_3].iloc[:5])
    sample_lag4 = pl.from_pandas(trainl[target_lag_4].iloc[:5])
    
    result = predict(sample_test, sample_lag1, sample_lag2, sample_lag3, sample_lag4)
    print(f"Prediction result shape: {result.shape}")
    return result

test_result = test_prediction()

Prediction result shape: (1, 424)


In [7]:
import joblib

def save_models():
    """Save the trained models"""
    joblib.dump(Models, '/kaggle/working/models_list.joblib')
    print(f"Saved {len(Models)} models")

def load_models():
    """Load the trained models"""
    Models = joblib.load('/kaggle/working/models_list.joblib')
    print(f"Loaded {len(Models)} models")
    return Models

# Save models
save_models()

Saved 3 models


In [8]:
# submission through the API
import os
import kaggle_evaluation.mitsui_inference_server

inference_server = kaggle_evaluation.mitsui_inference_server.MitsuiInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print('there')
    inference_server.serve()
else:
    print('here')
    inference_server.run_local_gateway(('/kaggle/input/mitsui-commodity-prediction-challenge/',))

here


In [9]:
display(pl.read_parquet('/kaggle/working/submission.parquet'))

date_id,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19,target_20,target_21,target_22,target_23,target_24,target_25,target_26,target_27,target_28,target_29,target_30,target_31,target_32,target_33,target_34,target_35,…,target_387,target_388,target_389,target_390,target_391,target_392,target_393,target_394,target_395,target_396,target_397,target_398,target_399,target_400,target_401,target_402,target_403,target_404,target_405,target_406,target_407,target_408,target_409,target_410,target_411,target_412,target_413,target_414,target_415,target_416,target_417,target_418,target_419,target_420,target_421,target_422,target_423
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1827,-0.000471,-0.000476,-0.000476,-0.000477,-0.000477,-0.000479,-0.000479,-0.000473,-0.000473,-0.000357,-0.00035,-0.000324,-0.000324,-0.000324,-0.000324,-0.000381,-0.000381,-0.000431,-0.000431,-0.000433,-0.000434,-0.000451,-0.00044,-0.000454,-0.000454,-0.000468,-0.000452,-0.000449,-0.000462,-0.000605,-0.000673,-0.000673,-0.000673,-0.000632,-0.000632,-0.000593,…,-0.000826,-0.000861,-0.000541,-0.000497,-0.00093,-0.00095,0.000239,-0.001212,-0.001614,-0.001569,-0.001341,-0.001352,-0.001373,-0.001356,-0.001386,-0.001434,-0.001619,0.001855,-0.000247,-0.000393,-0.000393,-0.00024,-0.000254,-0.000237,-0.000798,-0.000003,-0.000872,0.000785,0.000325,0.000213,0.000455,0.000573,0.000564,0.000852,0.001328,0.001897,0.001562
1828,-0.000471,-0.000476,-0.000476,-0.000477,-0.000477,-0.000479,-0.000479,-0.000473,-0.000473,-0.000357,-0.00035,-0.000324,-0.000324,-0.000324,-0.000324,-0.000381,-0.000381,-0.000431,-0.000431,-0.000433,-0.000434,-0.000451,-0.00044,-0.000454,-0.000454,-0.000468,-0.000452,-0.000449,-0.000462,-0.000605,-0.000673,-0.000673,-0.000673,-0.000632,-0.000632,-0.000593,…,-0.000826,-0.000861,-0.000541,-0.000497,-0.00093,-0.00095,0.000239,-0.001212,-0.001614,-0.001569,-0.001341,-0.001352,-0.001373,-0.001356,-0.001386,-0.001434,-0.001619,0.001855,-0.000247,-0.000393,-0.000393,-0.00024,-0.000254,-0.000237,-0.000798,-0.000003,-0.000872,0.000785,0.000325,0.000213,0.000455,0.000573,0.000564,0.000852,0.001328,0.001897,0.001562
1829,-0.00053,-0.000535,-0.000535,-0.000536,-0.000536,-0.000538,-0.000538,-0.000532,-0.000532,-0.000413,-0.000406,-0.00038,-0.00038,-0.00038,-0.00038,-0.000431,-0.000431,-0.000478,-0.000478,-0.000481,-0.000482,-0.000504,-0.000492,-0.000506,-0.000506,-0.00052,-0.000504,-0.000495,-0.000508,-0.000652,-0.000722,-0.000722,-0.000722,-0.000674,-0.000674,-0.000636,…,-0.000896,-0.000931,-0.000611,-0.000566,-0.000999,-0.00102,0.000169,-0.001282,-0.001684,-0.00164,-0.001408,-0.001419,-0.00144,-0.001423,-0.001437,-0.001477,-0.001661,0.001818,-0.000289,-0.000426,-0.000426,-0.00029,-0.000303,-0.000286,-0.000836,-0.000041,-0.00091,0.000716,0.000256,0.00012,0.000363,0.000477,0.000467,0.000756,0.001119,0.001407,0.001222
1830,-0.000476,-0.000481,-0.000481,-0.000482,-0.000482,-0.00049,-0.00049,-0.000484,-0.000484,-0.000376,-0.000368,-0.000373,-0.000373,-0.000373,-0.000373,-0.000393,-0.000393,-0.000691,-0.000691,-0.000558,-0.000559,-0.000553,-0.000548,-0.000488,-0.000439,-0.000489,-0.00047,-0.000462,-0.000476,-0.000731,-0.000773,-0.000773,-0.000773,-0.000501,-0.000501,-0.00049,…,-0.000531,-0.000602,-0.000282,-0.00023,-0.000613,-0.000638,0.00054,-0.000877,-0.001273,-0.001211,-0.000948,-0.000948,-0.000962,-0.000945,-0.00092,-0.000917,-0.001482,0.002278,0.000142,0.00001,0.00001,0.00018,0.000165,0.000159,-0.000305,0.000484,-0.000325,0.000511,0.000438,0.000173,0.000391,0.000529,0.00053,0.000642,0.001024,0.001662,0.001468
1831,0.000032,0.000027,0.000027,0.000027,0.000027,0.000018,0.000018,0.000019,0.000019,0.000064,0.000059,0.000069,0.000069,0.000069,0.000069,0.00007,0.00007,0.000026,0.000026,0.000085,0.000084,0.00005,0.000061,0.000046,0.000098,0.000047,0.000066,0.000108,0.000095,-0.000191,-0.000258,-0.000247,-0.000241,-0.000025,-0.000025,0.000003,…,-0.00036,-0.000448,-0.000128,-0.000113,-0.000418,-0.000434,-0.001345,-0.00096,-0.001062,-0.000995,-0.000718,-0.000724,-0.000725,-0.000708,-0.000692,-0.00058,-0.001297,0.002094,0.000151,0.000149,0.000149,0.000501,0.000465,0.000278,0.000145,0.000446,0.000117,0.000341,0.000233,-0.000014,0.000236,0.000173,0.000174,0.000455,0.000466,0.001116,0.000277
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1956,-0.000294,-0.000294,-0.000294,-0.000294,-0.000294,-0.000301,-0.000301,-0.000299,-0.000299,-0.000293,-0.000286,-0.00025,-0.00025,-0.00025,-0.00025,-0.000346,-0.000346,-0.000371,-0.000371,-0.000373,-0.000374,-0.000368,-0.000362,-0.000383,-0.000383,-0.000423,-0.000427,-0.000482,-0.000495,-0.000671,-0.00073,-0.00073,-0.000731,-0.000731,-0.000731,-0.000695,…,-0.000058,-0.000098,-0.000078,-0.000538,-0.000545,-0.000577,-0.00029,-0.000239,-0.000466,-0.000496,-0.000346,-0.00034,-0.000317,-0.000317,-0.000327,-0.000115,0.000059,0.001538,0.000389,0.000249,0.000238,0.000394,0.000362,0.000107,-0.000617,0.00054,-0.000243,0.000151,0.000038,-0.000464,-0.000497,-0.000496,-0.000595,-0.000568,-0.000151,-0.000669,-0.006195
1957,-0.000138,-0.000143,-0.000143,-0.000143,-0.000143,-0.000145,-0.000145,-0.00014,-0.00014,-0.000008,-5.2223e-7,0.000036,0.000036,0.000036,0.000036,-0.000015,-0.000015,-0.000043,-0.000043,-0.000046,-0.000047,-0.000105,-0.000087,-0.000102,-0.000102,-0.000136,-0.000138,-0.00014,-0.000153,-0.000548,-0.000634,-0.000634,-0.000634,-0.000357,-0.000357,-0.000389,…,0.000103,0.000054,0.000166,0.000183,-0.00035,-0.000443,-0.000049,-0.0001,-0.000481,-0.000677,-0.000346,-0.000361,-0.000428,-0.000364,-0.000354,-0.000475,-0.000457,0.001138,0.000292,0.000139,0.000139,0.000328,0.000314,0.000274,-0.000631,0.00083,-0.00051,0.001091,0.000608,0.000499,0.000595,0.00031,0.000277,0.000343,0.000769,0.000844,-0.003759
1958,0.000171,0.000166,0.000166,0.000157,0.000157,0.000166,0.000166,0.000172,0.000172,0.000295,0.000303,0.000363,0.000363,0.00038,0.00038,0.000338,0.000338,0.000381,0.000381,0.000384,0.000383,0.000307,0.000287,0.000257,0.000257,0.000233,0.000232,0.000294,0.000281,-0.000029,-0.000103,-0.000099,-0.000099,-0.000146,-0.000146,-0.000098,…,0.000052,0.00006,0.00011,-0.000159,-0.000491,-0.00049,-0.000267,-0.000199,-0.000563,-0.000748,-0.000454,-0.000465,-0.000638,-0.000615,-0.000607,-0.000693,-0.000879,0.000373,-0.000305,-0.000347,-0.000347,-0.000159,-0.00015,-0.000131,-0.000357,0.000362,-0.000843,0.000959,0.000428,0.000452,0.000816,0.000442,0.000418,0.000808,0.001136,0.00125,-0.001071
1959,0.000655,0.000655,0.000655,0.000655,0.000655,0.000643,0.000643,0.000643,0.000643,0.000602,0.000602,0.000648,0.000648,0.000668,0.000668,0.000643,0.000643,0.001973,0.001973,0.001973,0.00197,0.001945,0.002137,0.002121,0.002131,0.001396,0.00135,0.001428,0.001417,0.000233,0.00012,0.000129,0.000129,0.000254,0.000254,0.000315,…,-0.000426,-0.000384,-0.000361,-0.000703,-0.000875,-0.001016,-0.001035,-0.000924,-0.001117,-0.001269,-0.000938,-0.000979,-0.001073,-0.00105,-0.00103,-0.000991,-0.001083,0.000236,-0.000638,-0.000641,-0.000652,-0.000487,-0.000533,-0.000498,-0.000765,-0.000386,-0.000717,-0.000287,-0.000399,-0.000646,-0.0002,-0.000708,-0.000712,-0.000567,-0.000639,-0.000874,-0.001153
