In [1]:
!git clone https://github.com/sn09/ranking.git

Cloning into 'ranking'...
remote: Enumerating objects: 342, done.[K
remote: Counting objects: 100% (342/342), done.[K
remote: Compressing objects: 100% (198/198), done.[K
remote: Total 342 (delta 166), reused 286 (delta 119), pack-reused 0 (from 0)[K
Receiving objects: 100% (342/342), 169.45 KiB | 3.20 MiB/s, done.
Resolving deltas: 100% (166/166), done.


In [None]:
import sys

import lightgbm as lgb
import numpy as np
import pandas as pd
from lightgbm import LGBMClassifier, LGBMRanker
from sklearn.metrics import log_loss, roc_auc_score
from torch import nn

sys.path.append("./ranking/models/")

from rankfx.dcnv2.model import DCNv2
from rankfx.finalnet.model import FinalNet

In [23]:
base_path = "/kaggle/input/frappe-x1"

df_train = pd.read_csv(f"{base_path}/train.csv")
df_test = pd.read_csv(f"{base_path}/test.csv")
df_val = pd.read_csv(f"{base_path}/valid.csv")

df_train.head()

Unnamed: 0,label,user,item,daytime,weekday,isweekend,homework,cost,weather,country,city
0,0,451,4149,5041,5046,5053,5055,5058,5060,5069,5149
1,0,91,3503,5041,5047,5053,5056,5058,5065,5095,5149
2,1,168,983,5040,5050,5054,5055,5058,5060,5069,5207
3,0,620,1743,5045,5051,5054,5055,5058,5061,5073,5149
4,0,46,2692,5040,5049,5054,5055,5058,5060,5086,5211


In [24]:
feature_columns = df_train.columns.difference(["label"])
for df in [df_train, df_val, df_test]:
    df[feature_columns] = df[feature_columns].astype("category")

# LightGBM

## LGBMClassifier

In [26]:
booster_clf = LGBMClassifier(
    objective="binary",
    max_depth=5,
    learning_rate=1e-1,
    n_estimators=1000,
)

In [27]:
feature_names = df_train.columns.difference(["label"]).tolist()

booster_clf = booster_clf.fit(
    X=df_train.drop(columns="label"),
    y=df_train["label"],
    eval_set=(df_val.drop(columns="label"), df_val["label"]),
    feature_name=df_train.drop(columns="label").columns.tolist(),
    categorical_feature=feature_names,
    callbacks=[lgb.early_stopping(stopping_rounds=10), lgb.log_evaluation()],
)

[LightGBM] [Info] Number of positive: 67604, number of negative: 134423
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4989
[LightGBM] [Info] Number of data points in the train set: 202027, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.334629 -> initscore=-0.687324
[LightGBM] [Info] Start training from score -0.687324
[1]	valid_0's binary_logloss: 0.583418
Training until validation scores don't improve for 10 rounds
[2]	valid_0's binary_logloss: 0.543931
[3]	valid_0's binary_logloss: 0.512499
[4]	valid_0's binary_logloss: 0.486551
[5]	valid_0's binary_logloss: 0.465163
[6]	valid_0's binary_logloss: 0.447226
[7]	valid_0's binary_logloss: 0.431919
[8]	valid_0's binary_logloss: 0.418697
[9]	valid_0's binary_logloss: 0.407334
[10]	valid_0's binary_logloss: 0.39735
[11]	valid_0's binary_logloss: 0.38861
[12]	valid_0's

In [28]:
booster_clf_preds = booster_clf.predict(
    df_test.drop(columns="label"),
    raw_score=True,
)

In [29]:
roc_auc = roc_auc_score(df_test["label"], booster_clf_preds)
logloss = log_loss(df_test["label"], booster_clf_preds)

print("LightGBM Classifier metrics")
print(f"ROC AUC: {roc_auc}, logloss: {logloss}")

LightGBM Classifier metrics
ROC AUC: 0.9840659433569412, logloss: 1.265528146918096


## LGBMRanker

In [31]:
# dont want to recompile lightgbm, so must ensure group has less than 10000 queries

df_train_lgb = (
    df_train
    .groupby("user", as_index=False)
    .sample(n=10_000, replace=True)
    .drop_duplicates()
    .sort_values(by="user")
)
group = df_train_lgb["user"].value_counts(sort=False).values

df_val_lgb = (
    df_val
    .groupby("user", as_index=False)
    .sample(n=10_000, replace=True)
    .drop_duplicates()
    .sort_values(by="user")
)
group_val = df_val_lgb["user"].value_counts(sort=False).values

  .groupby("user", as_index=False)
  .groupby("user", as_index=False)


In [32]:
booster_rnk = LGBMRanker(
    objective="lambdarank",
    max_depth=5,
    learning_rate=1e-1,
    n_estimators=1000,
)

In [33]:
feature_names = df_train_lgb.columns.difference(["label"]).tolist()

booster_rnk = booster_rnk.fit(
    X=df_train_lgb.drop(columns="label"),
    y=df_train_lgb["label"],
    group=group,
    eval_set=[(df_val_lgb.drop(columns="label"), df_val_lgb["label"])],
    eval_group=[group_val],
    feature_name=feature_names,
    categorical_feature=feature_names,
    callbacks=[lgb.early_stopping(stopping_rounds=10), lgb.log_evaluation()],
    eval_metric="auc",
)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009847 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4989
[LightGBM] [Info] Number of data points in the train set: 201908, number of used features: 10
[1]	valid_0's auc: 0.835433	valid_0's ndcg@1: 0.924208	valid_0's ndcg@2: 0.927151	valid_0's ndcg@3: 0.925552	valid_0's ndcg@4: 0.92443	valid_0's ndcg@5: 0.924703
Training until validation scores don't improve for 10 rounds
[2]	valid_0's auc: 0.842651	valid_0's ndcg@1: 0.947964	valid_0's ndcg@2: 0.943952	valid_0's ndcg@3: 0.94096	valid_0's ndcg@4: 0.940157	valid_0's ndcg@5: 0.940012
[3]	valid_0's auc: 0.847559	valid_0's ndcg@1: 0.954751	valid_0's ndcg@2: 0.949474	valid_0's ndcg@3: 0.94512	valid_0's ndcg@4: 0.945366	valid_0's ndcg@5: 0.944235
[4]	valid_0's auc: 0.850928	valid_0's ndcg@1: 0.958145	valid_0's ndcg@2: 0.952268	valid_0's ndcg@3:

In [34]:
booster_rnk_preds = booster_rnk.predict(
    df_test.drop(columns="label"),
    raw_score=True,
)

In [35]:
roc_auc = roc_auc_score(df_test["label"], booster_rnk_preds)
logloss = log_loss(df_test["label"], booster_rnk_preds)

print("LightGBM Ranker metrics")
print(f"ROC AUC: {roc_auc}, logloss: {logloss}")

LightGBM Ranker metrics
ROC AUC: 0.8495760984054672, logloss: 5.0028704232239285


# DCNv2

In [40]:
dcnv2_model = DCNv2(
    model_structure="stacked_parallel",
    use_low_rank_mixture=True,
    cross_low_rank_dim=32,
    num_cross_layers=5,
    num_cross_experts=4,
    parallel_hidden_dims=[256, 512, 1024],
    parallel_dropout=0.2,
    parallel_use_batch_norm=True,
    parallel_activation=nn.ReLU,
    stacked_hidden_dims=[256, 512, 1024],
    stacked_dropout=0.2,
    stacked_use_batch_norm=True,
    stacked_activation=nn.ReLU,
    output_dim=1,
    proj_output_embeddings=True,
)

In [41]:
train_metrics_dcnv2, val_metrics_dcnv2 = dcnv2_model.fit(
    features=df_train.drop(columns="label"),
    target=df_train["label"],
    val_features=df_val.drop(columns="label"),
    val_target=df_val["label"],
    optimizer_cls="torch.optim.Adam",
    optimizer_params=dict(lr=1e-2),
    scheduler_cls="torch.optim.lr_scheduler.ReduceLROnPlateau",
    scheduler_params=dict(mode="max", factor=0.1, patience=2, min_lr=1e-6),
    grad_clip_threshold=10.,
    num_epochs=15,
    seed=42,
    artifacts_path="./dcnv2_artifacts",
    device="cuda:0",
    batch_size=4096,
    num_workers=2,
    eval_metric_name="AUC",
    eval_mode="max",
    default_embedding_size=20,
    oov_masking_proba=0.05,
    l2_net_reg=0,
    l2_embedding_reg=0,
)

[2025-05-03 23:26:29,173]{model.py:660} - INFO - Used features config: FeaturesConfig(features=[Feature(name='user', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=955, embedding_padding_idx=None), Feature(name='item', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=4083, embedding_padding_idx=None), Feature(name='daytime', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=8, embedding_padding_idx=None), Feature(name='weekday', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=8, embedding_padding_idx=None), Feature(name='isweekend', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=3, embeddi

Train epoch #0:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:26:56,824]{model.py:567} - INFO - Finished Train Epoch #0, average metrics - [loss: 0.68184]


Validation epoch #0:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:26:59,501]{model.py:567} - INFO - Finished Validation Epoch #0, average metrics - [AUC: 0.85563, log_loss: 0.40141]
[2025-05-03 23:27:09,904]{model.py:747} - INFO - Best model with AUC = 0.8556335798603146 was saved to dcnv2_artifacts/best_model.pt


Train epoch #1:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:27:38,554]{model.py:567} - INFO - Finished Train Epoch #1, average metrics - [loss: 0.36361]


Validation epoch #1:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:27:51,090]{model.py:567} - INFO - Finished Validation Epoch #1, average metrics - [AUC: 0.92327, log_loss: 0.30344]
[2025-05-03 23:28:01,491]{model.py:747} - INFO - Best model with AUC = 0.9232720844770345 was saved to dcnv2_artifacts/best_model.pt


Train epoch #2:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:28:29,381]{model.py:567} - INFO - Finished Train Epoch #2, average metrics - [loss: 0.31124]


Validation epoch #2:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:28:42,021]{model.py:567} - INFO - Finished Validation Epoch #2, average metrics - [AUC: 0.93531, log_loss: 0.28816]
[2025-05-03 23:28:52,425]{model.py:747} - INFO - Best model with AUC = 0.9353061360651886 was saved to dcnv2_artifacts/best_model.pt


Train epoch #3:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:29:20,973]{model.py:567} - INFO - Finished Train Epoch #3, average metrics - [loss: 0.29293]


Validation epoch #3:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:29:33,693]{model.py:567} - INFO - Finished Validation Epoch #3, average metrics - [AUC: 0.94101, log_loss: 0.27587]
[2025-05-03 23:29:44,088]{model.py:747} - INFO - Best model with AUC = 0.9410140084684558 was saved to dcnv2_artifacts/best_model.pt


Train epoch #4:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:30:11,893]{model.py:567} - INFO - Finished Train Epoch #4, average metrics - [loss: 0.28079]


Validation epoch #4:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:30:24,553]{model.py:567} - INFO - Finished Validation Epoch #4, average metrics - [AUC: 0.94513, log_loss: 0.26575]
[2025-05-03 23:30:34,956]{model.py:747} - INFO - Best model with AUC = 0.9451311886700559 was saved to dcnv2_artifacts/best_model.pt


Train epoch #5:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:31:02,066]{model.py:567} - INFO - Finished Train Epoch #5, average metrics - [loss: 0.27091]


Validation epoch #5:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:31:14,590]{model.py:567} - INFO - Finished Validation Epoch #5, average metrics - [AUC: 0.94759, log_loss: 0.25907]
[2025-05-03 23:31:24,991]{model.py:747} - INFO - Best model with AUC = 0.9475922260021565 was saved to dcnv2_artifacts/best_model.pt


Train epoch #6:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:31:53,323]{model.py:567} - INFO - Finished Train Epoch #6, average metrics - [loss: 0.26035]


Validation epoch #6:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:32:06,023]{model.py:567} - INFO - Finished Validation Epoch #6, average metrics - [AUC: 0.94967, log_loss: 0.25604]
[2025-05-03 23:32:16,412]{model.py:747} - INFO - Best model with AUC = 0.9496696896326913 was saved to dcnv2_artifacts/best_model.pt


Train epoch #7:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:32:43,964]{model.py:567} - INFO - Finished Train Epoch #7, average metrics - [loss: 0.25134]


Validation epoch #7:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:32:56,548]{model.py:567} - INFO - Finished Validation Epoch #7, average metrics - [AUC: 0.95203, log_loss: 0.24997]
[2025-05-03 23:33:06,950]{model.py:747} - INFO - Best model with AUC = 0.9520292078600154 was saved to dcnv2_artifacts/best_model.pt


Train epoch #8:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:33:34,820]{model.py:567} - INFO - Finished Train Epoch #8, average metrics - [loss: 0.24269]


Validation epoch #8:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:33:47,442]{model.py:567} - INFO - Finished Validation Epoch #8, average metrics - [AUC: 0.95196, log_loss: 0.24950]


Train epoch #9:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:34:26,162]{model.py:567} - INFO - Finished Train Epoch #9, average metrics - [loss: 0.23307]


Validation epoch #9:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:34:38,791]{model.py:567} - INFO - Finished Validation Epoch #9, average metrics - [AUC: 0.95573, log_loss: 0.23934]
[2025-05-03 23:34:49,209]{model.py:747} - INFO - Best model with AUC = 0.9557284710367377 was saved to dcnv2_artifacts/best_model.pt


Train epoch #10:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:35:16,991]{model.py:567} - INFO - Finished Train Epoch #10, average metrics - [loss: 0.22392]


Validation epoch #10:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:35:29,721]{model.py:567} - INFO - Finished Validation Epoch #10, average metrics - [AUC: 0.95822, log_loss: 0.23255]
[2025-05-03 23:35:40,106]{model.py:747} - INFO - Best model with AUC = 0.9582157149864974 was saved to dcnv2_artifacts/best_model.pt


Train epoch #11:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:36:08,077]{model.py:567} - INFO - Finished Train Epoch #11, average metrics - [loss: 0.21456]


Validation epoch #11:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:36:20,654]{model.py:567} - INFO - Finished Validation Epoch #11, average metrics - [AUC: 0.95918, log_loss: 0.23027]
[2025-05-03 23:36:31,095]{model.py:747} - INFO - Best model with AUC = 0.959180522346654 was saved to dcnv2_artifacts/best_model.pt


Train epoch #12:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:36:59,407]{model.py:567} - INFO - Finished Train Epoch #12, average metrics - [loss: 0.20913]


Validation epoch #12:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:37:12,026]{model.py:567} - INFO - Finished Validation Epoch #12, average metrics - [AUC: 0.95480, log_loss: 0.25946]


Train epoch #13:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:37:50,778]{model.py:567} - INFO - Finished Train Epoch #13, average metrics - [loss: 0.20814]


Validation epoch #13:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:38:03,382]{model.py:567} - INFO - Finished Validation Epoch #13, average metrics - [AUC: 0.95860, log_loss: 0.23898]


Train epoch #14:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:38:41,400]{model.py:567} - INFO - Finished Train Epoch #14, average metrics - [loss: 0.20168]


Validation epoch #14:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:38:54,118]{model.py:567} - INFO - Finished Validation Epoch #14, average metrics - [AUC: 0.96037, log_loss: 0.23079]
[2025-05-03 23:39:04,524]{model.py:747} - INFO - Best model with AUC = 0.9603660385816352 was saved to dcnv2_artifacts/best_model.pt
[2025-05-03 23:39:04,526]{model.py:763} - INFO - Loading best model from dcnv2_artifacts/best_model.pt


In [42]:
test_metrics_dcnv2 = dcnv2_model.test(
    features=df_test.drop(columns="label"),
    target=df_test["label"],
    device="cuda:0",
    batch_size=4096,
    num_workers=2,
)
test_metrics_dcnv2

[2025-05-03 23:39:04,895]{model.py:789} - INFO - Building test dataloader
[2025-05-03 23:39:04,897]{model.py:297} - INFO - Encoding feature user
[2025-05-03 23:39:04,901]{model.py:297} - INFO - Encoding feature item
[2025-05-03 23:39:04,906]{model.py:297} - INFO - Encoding feature daytime
[2025-05-03 23:39:04,908]{model.py:297} - INFO - Encoding feature weekday
[2025-05-03 23:39:04,911]{model.py:297} - INFO - Encoding feature isweekend
[2025-05-03 23:39:04,913]{model.py:297} - INFO - Encoding feature homework
[2025-05-03 23:39:04,916]{model.py:297} - INFO - Encoding feature cost
[2025-05-03 23:39:04,918]{model.py:297} - INFO - Encoding feature weather
[2025-05-03 23:39:04,920]{model.py:297} - INFO - Encoding feature country
[2025-05-03 23:39:04,923]{model.py:297} - INFO - Encoding feature city


Test epoch #-1:   0%|          | 0/8 [00:00<?, ?it/s]

[2025-05-03 23:39:06,407]{model.py:567} - INFO - Finished Test Epoch #-1, average metrics - [AUC: 0.96226, log_loss: 0.22253]


{'AUC': 0.9622580272467824, 'log_loss': 0.22252739437375815}

# FinalNet

In [43]:
finalnet_model = FinalNet(
    block_type="2B",
    use_field_gate=True,
    use_batch_norm=True,
    add_bias=True,
    block1_hidden_dims=[256, 512, 1024],
    block1_hidden_activations=nn.ReLU,
    block1_dropout_rates=0.2,
    block2_hidden_dims=[256, 512, 1024],
    block2_hidden_activations=nn.ReLU,
    block2_dropout_rates=0.2,
    residual_type="concat",
    proj_output_embeddings=True,
)

In [44]:
train_metrics_final, val_metrics_final = finalnet_model.fit(
    features=df_train.drop(columns="label"),
    target=df_train["label"],
    val_features=df_val.drop(columns="label"),
    val_target=df_val["label"],
    optimizer_cls="torch.optim.Adam",
    optimizer_params=dict(lr=1e-2),
    scheduler_cls="torch.optim.lr_scheduler.ReduceLROnPlateau",
    scheduler_params=dict(mode="max", factor=0.1, patience=2, min_lr=1e-6),
    grad_clip_threshold=10.,
    num_epochs=15,
    seed=42,
    artifacts_path="./finalnet_artifacts",
    device="cuda:0",
    batch_size=4096,
    num_workers=2,
    eval_metric_name="AUC",
    eval_mode="max",
    embedded_features=["user_id", "item_id", "tag_id"],
    oov_masking_proba=0.05,
    default_embedding_size=20, # should be equal for all features if using field gate
    l2_net_reg=0.,
    l2_embedding_reg=0,
)

[2025-05-03 23:39:16,851]{model.py:660} - INFO - Used features config: FeaturesConfig(features=[Feature(name='user', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=955, embedding_padding_idx=None), Feature(name='item', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=4083, embedding_padding_idx=None), Feature(name='daytime', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=8, embedding_padding_idx=None), Feature(name='weekday', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=8, embedding_padding_idx=None), Feature(name='isweekend', feature_type=<FeatureType.CATEGORICAL: 'categorical'>, feature_size=1, needs_embed=True, embedding_size=20, embedding_vocab_size=3, embeddi

Train epoch #0:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:39:44,040]{model.py:567} - INFO - Finished Train Epoch #0, average metrics - [loss: 0.54483]


Validation epoch #0:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:39:56,453]{model.py:567} - INFO - Finished Validation Epoch #0, average metrics - [AUC: 0.88964, log_loss: 0.37530]
[2025-05-03 23:40:06,860]{model.py:747} - INFO - Best model with AUC = 0.8896420227921912 was saved to finalnet_artifacts/best_model.pt


Train epoch #1:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:40:34,071]{model.py:567} - INFO - Finished Train Epoch #1, average metrics - [loss: 0.32816]


Validation epoch #1:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:40:46,553]{model.py:567} - INFO - Finished Validation Epoch #1, average metrics - [AUC: 0.93395, log_loss: 0.28450]
[2025-05-03 23:40:56,952]{model.py:747} - INFO - Best model with AUC = 0.9339485091764155 was saved to finalnet_artifacts/best_model.pt


Train epoch #2:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:41:25,353]{model.py:567} - INFO - Finished Train Epoch #2, average metrics - [loss: 0.28574]


Validation epoch #2:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:41:37,863]{model.py:567} - INFO - Finished Validation Epoch #2, average metrics - [AUC: 0.94627, log_loss: 0.26039]
[2025-05-03 23:41:48,254]{model.py:747} - INFO - Best model with AUC = 0.946272337309106 was saved to finalnet_artifacts/best_model.pt


Train epoch #3:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:42:16,424]{model.py:567} - INFO - Finished Train Epoch #3, average metrics - [loss: 0.25802]


Validation epoch #3:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:42:28,997]{model.py:567} - INFO - Finished Validation Epoch #3, average metrics - [AUC: 0.95505, log_loss: 0.23759]
[2025-05-03 23:42:39,396]{model.py:747} - INFO - Best model with AUC = 0.9550451845512352 was saved to finalnet_artifacts/best_model.pt


Train epoch #4:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:43:05,854]{model.py:567} - INFO - Finished Train Epoch #4, average metrics - [loss: 0.23172]


Validation epoch #4:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:43:18,451]{model.py:567} - INFO - Finished Validation Epoch #4, average metrics - [AUC: 0.96068, log_loss: 0.22334]
[2025-05-03 23:43:28,840]{model.py:747} - INFO - Best model with AUC = 0.9606803226937818 was saved to finalnet_artifacts/best_model.pt


Train epoch #5:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:43:56,935]{model.py:567} - INFO - Finished Train Epoch #5, average metrics - [loss: 0.20534]


Validation epoch #5:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:44:09,500]{model.py:567} - INFO - Finished Validation Epoch #5, average metrics - [AUC: 0.96621, log_loss: 0.21066]
[2025-05-03 23:44:19,911]{model.py:747} - INFO - Best model with AUC = 0.966213229610507 was saved to finalnet_artifacts/best_model.pt


Train epoch #6:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:44:46,281]{model.py:567} - INFO - Finished Train Epoch #6, average metrics - [loss: 0.18287]


Validation epoch #6:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:44:58,802]{model.py:567} - INFO - Finished Validation Epoch #6, average metrics - [AUC: 0.96915, log_loss: 0.20561]
[2025-05-03 23:45:09,184]{model.py:747} - INFO - Best model with AUC = 0.9691492270251004 was saved to finalnet_artifacts/best_model.pt


Train epoch #7:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:45:36,636]{model.py:567} - INFO - Finished Train Epoch #7, average metrics - [loss: 0.16323]


Validation epoch #7:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:45:49,256]{model.py:567} - INFO - Finished Validation Epoch #7, average metrics - [AUC: 0.97109, log_loss: 0.20013]
[2025-05-03 23:45:59,660]{model.py:747} - INFO - Best model with AUC = 0.9710921853232892 was saved to finalnet_artifacts/best_model.pt


Train epoch #8:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:46:26,921]{model.py:567} - INFO - Finished Train Epoch #8, average metrics - [loss: 0.14548]


Validation epoch #8:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:46:39,438]{model.py:567} - INFO - Finished Validation Epoch #8, average metrics - [AUC: 0.97244, log_loss: 0.20860]
[2025-05-03 23:46:49,836]{model.py:747} - INFO - Best model with AUC = 0.9724386645018092 was saved to finalnet_artifacts/best_model.pt


Train epoch #9:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:47:17,796]{model.py:567} - INFO - Finished Train Epoch #9, average metrics - [loss: 0.12974]


Validation epoch #9:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:47:30,885]{model.py:567} - INFO - Finished Validation Epoch #9, average metrics - [AUC: 0.97377, log_loss: 0.20811]
[2025-05-03 23:47:41,285]{model.py:747} - INFO - Best model with AUC = 0.9737687322267072 was saved to finalnet_artifacts/best_model.pt


Train epoch #10:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:48:08,847]{model.py:567} - INFO - Finished Train Epoch #10, average metrics - [loss: 0.11996]


Validation epoch #10:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:48:21,326]{model.py:567} - INFO - Finished Validation Epoch #10, average metrics - [AUC: 0.97503, log_loss: 0.20831]
[2025-05-03 23:48:31,710]{model.py:747} - INFO - Best model with AUC = 0.9750308021872567 was saved to finalnet_artifacts/best_model.pt


Train epoch #11:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:48:59,053]{model.py:567} - INFO - Finished Train Epoch #11, average metrics - [loss: 0.10892]


Validation epoch #11:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:49:11,564]{model.py:567} - INFO - Finished Validation Epoch #11, average metrics - [AUC: 0.97523, log_loss: 0.21765]
[2025-05-03 23:49:21,961]{model.py:747} - INFO - Best model with AUC = 0.9752312228252276 was saved to finalnet_artifacts/best_model.pt


Train epoch #12:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:49:49,316]{model.py:567} - INFO - Finished Train Epoch #12, average metrics - [loss: 0.10138]


Validation epoch #12:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:50:01,873]{model.py:567} - INFO - Finished Validation Epoch #12, average metrics - [AUC: 0.97575, log_loss: 0.22144]
[2025-05-03 23:50:12,279]{model.py:747} - INFO - Best model with AUC = 0.9757495307606006 was saved to finalnet_artifacts/best_model.pt


Train epoch #13:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:50:39,360]{model.py:567} - INFO - Finished Train Epoch #13, average metrics - [loss: 0.09603]


Validation epoch #13:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:50:51,918]{model.py:567} - INFO - Finished Validation Epoch #13, average metrics - [AUC: 0.97569, log_loss: 0.22362]


Train epoch #14:   0%|          | 0/50 [00:00<?, ?it/s]

[2025-05-03 23:51:29,422]{model.py:567} - INFO - Finished Train Epoch #14, average metrics - [loss: 0.08862]


Validation epoch #14:   0%|          | 0/15 [00:00<?, ?it/s]

[2025-05-03 23:51:41,933]{model.py:567} - INFO - Finished Validation Epoch #14, average metrics - [AUC: 0.97659, log_loss: 0.22249]
[2025-05-03 23:51:52,328]{model.py:747} - INFO - Best model with AUC = 0.9765901647765726 was saved to finalnet_artifacts/best_model.pt
[2025-05-03 23:51:52,329]{model.py:763} - INFO - Loading best model from finalnet_artifacts/best_model.pt


In [45]:
test_metrics_final = finalnet_model.test(
    features=df_test.drop(columns="label"),
    target=df_test["label"],
    device="cuda:0",
    batch_size=4096,
    num_workers=2,
)
test_metrics_final

[2025-05-03 23:51:52,427]{model.py:789} - INFO - Building test dataloader
[2025-05-03 23:51:52,429]{model.py:297} - INFO - Encoding feature user
[2025-05-03 23:51:52,434]{model.py:297} - INFO - Encoding feature item
[2025-05-03 23:51:52,440]{model.py:297} - INFO - Encoding feature daytime
[2025-05-03 23:51:52,443]{model.py:297} - INFO - Encoding feature weekday
[2025-05-03 23:51:52,446]{model.py:297} - INFO - Encoding feature isweekend
[2025-05-03 23:51:52,449]{model.py:297} - INFO - Encoding feature homework
[2025-05-03 23:51:52,452]{model.py:297} - INFO - Encoding feature cost
[2025-05-03 23:51:52,455]{model.py:297} - INFO - Encoding feature weather
[2025-05-03 23:51:52,457]{model.py:297} - INFO - Encoding feature country
[2025-05-03 23:51:52,460]{model.py:297} - INFO - Encoding feature city


Test epoch #-1:   0%|          | 0/8 [00:00<?, ?it/s]

[2025-05-03 23:51:53,916]{model.py:567} - INFO - Finished Test Epoch #-1, average metrics - [AUC: 0.97602, log_loss: 0.23068]


{'AUC': 0.976019737143411, 'log_loss': 0.2306835336606623}