In [7]:
from pathlib import Path
import joblib

import pandas as pd

In [8]:
def load_data(filename: str) -> pd.DataFrame:
    p = Path(f"./data/{filename}")
    assert p.exists()

    df = pd.read_csv(filepath_or_buffer=p)

    df["Genre"] = df.Genre.astype("category").cat.codes
    df["Episode_Sentiment"] = df.Episode_Sentiment.astype(
        "category"
    ).cat.codes
    df["Publication_Day"] = df.Publication_Day.astype("category").cat.codes
    df["Publication_Time"] = df.Publication_Time.astype(
        "category"
    ).cat.codes
    df["Episode_Title"] = df.Episode_Title.astype("category").cat.codes
    df["Podcast_Name"] = df.Podcast_Name.astype("category").cat.codes

    return df


test_df = load_data("test.csv")

In [9]:
modeldir = Path("./models")
model = modeldir / "rf_tuned_model_bundle.pkl"

In [10]:
bundle = joblib.load(model)

In [11]:
model = bundle.get("model")

In [16]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 175,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

In [13]:
test_df["Listening_Time_minutes"] = model.predict(test_df.drop(columns=["id"]))

In [14]:
submission_dir = Path("./submissions")
test_df.filter(items=["id", "Listening_Time_minutes"]).to_csv(
    submission_dir / "tuned_rf.csv",
    index=False
)