In [1]:
from pathlib import Path
import joblib

import pandas as pd

In [2]:
def load_data(filename: str) -> pd.DataFrame:
    p = Path(f"./data/{filename}")
    assert p.exists()

    df = pd.read_csv(filepath_or_buffer=p)

    df["Genre"] = df.Genre.astype("category").cat.codes
    df["Episode_Sentiment"] = df.Episode_Sentiment.astype("category").cat.codes
    df["Publication_Day"] = df.Publication_Day.astype("category").cat.codes
    df["Publication_Time"] = df.Publication_Time.astype("category").cat.codes
    df["Episode_Title"] = df.Episode_Title.astype("category").cat.codes
    df["Podcast_Name"] = df.Podcast_Name.astype("category").cat.codes

    return df


test_df = load_data("test.csv")

In [3]:
modeldir = Path("./models")
modelpath = modeldir / "xgb_tuned_scnd_model_bundle.pkl"

assert modelpath.exists()

In [4]:
bundle = joblib.load(modelpath)

In [5]:
model = bundle.get("model")

In [6]:
model.get_params()

{'objective': 'reg:squarederror',
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': 0.8,
 'device': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'feature_weights': None,
 'gamma': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': 0.2,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': 8,
 'max_leaves': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'multi_strategy': None,
 'n_estimators': 180,
 'n_jobs': None,
 'num_parallel_tree': None,
 'random_state': 42,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': 0.6,
 'tree_method': 'hist',
 'validate_parameters': None,
 'verbosity': 0}

In [7]:
test_df["Listening_Time_minutes"] = model.predict(test_df.drop(columns=["id"]))

In [8]:
submission_dir = Path("./submissions")
test_df.filter(items=["id", "Listening_Time_minutes"]).to_csv(
    submission_dir / "xgboost_scnd.csv", index=False
)