In [1]:
from pathlib import Path
import joblib

import pandas as pd

In [2]:
def load_data(filename: str) -> pd.DataFrame:
    p = Path(f"./data/{filename}")
    assert p.exists()

    df = pd.read_csv(filepath_or_buffer=p)

    df["Genre"] = df.Genre.astype("category").cat.codes
    df["Episode_Sentiment"] = df.Episode_Sentiment.astype("category").cat.codes
    df["Publication_Day"] = df.Publication_Day.astype("category").cat.codes
    df["Publication_Time"] = df.Publication_Time.astype("category").cat.codes
    df["Episode_Title"] = df.Episode_Title.astype("category").cat.codes
    df["Podcast_Name"] = df.Podcast_Name.astype("category").cat.codes

    return df


test_df = load_data("test.csv")

In [9]:
modeldir = Path("./models")
modelpath = modeldir / "rf_ga_tuned_fst_model_with_pipeline.pkl"

assert modelpath.exists()

In [4]:
bundle = joblib.load(modelpath)

In [5]:
model = bundle.get("model")

In [6]:
model.get_params()

{'memory': None,
 'steps': [('imputer',
   OptionalTransformer(transformer=SimpleImputer(strategy='median'))),
  ('scaler', OptionalTransformer()),
  ('regressor',
   RandomForestRegressor(bootstrap=False, max_depth=32, max_features=None,
                         min_samples_leaf=5, min_samples_split=8, n_estimators=187,
                         random_state=42))],
 'transform_input': None,
 'verbose': False,
 'imputer': OptionalTransformer(transformer=SimpleImputer(strategy='median')),
 'scaler': OptionalTransformer(),
 'regressor': RandomForestRegressor(bootstrap=False, max_depth=32, max_features=None,
                       min_samples_leaf=5, min_samples_split=8, n_estimators=187,
                       random_state=42),
 'imputer__transformer__add_indicator': False,
 'imputer__transformer__copy': True,
 'imputer__transformer__fill_value': None,
 'imputer__transformer__keep_empty_features': False,
 'imputer__transformer__missing_values': nan,
 'imputer__transformer__strategy': 'med

In [7]:
test_df["Listening_Time_minutes"] = model.predict(test_df.drop(columns=["id"]))

In [None]:
submission_dir = Path("./submissions")
test_df.filter(items=["id", "Listening_Time_minutes"]).to_csv(
    submission_dir / "rf_ga_tuned_pp.csv", index=False
)