In [1]:
from pathlib import Path
import joblib

import pandas as pd

In [2]:
def load_data(filename: str) -> pd.DataFrame:
    p = Path(f"./data/{filename}")
    assert p.exists()

    df = pd.read_csv(filepath_or_buffer=p)

    df["Genre"] = df.Genre.astype("category").cat.codes
    df["Episode_Sentiment"] = df.Episode_Sentiment.astype("category").cat.codes
    df["Publication_Day"] = df.Publication_Day.astype("category").cat.codes
    df["Publication_Time"] = df.Publication_Time.astype("category").cat.codes
    df["Episode_Title"] = df.Episode_Title.astype("category").cat.codes
    df["Podcast_Name"] = df.Podcast_Name.astype("category").cat.codes

    return df


test_df = load_data("test.csv")

In [3]:
modeldir = Path("./models")
modelpath = modeldir / "rf_ga_tuned_fst_model_with_pipeline.pkl"

assert modelpath.exists()

In [4]:
bundle = joblib.load(modelpath)

In [5]:
model = bundle.get("model")

In [6]:
model.get_params()

{'memory': None,
 'steps': [('imputer', OptionalTransformer()),
  ('scaler', OptionalTransformer(transformer=MinMaxScaler())),
  ('regressor',
   RandomForestRegressor(max_depth=22, max_features=None, min_samples_split=5,
                         min_weight_fraction_leaf=np.float64(0.012656909394764736),
                         n_estimators=213, random_state=42))],
 'transform_input': None,
 'verbose': False,
 'imputer': OptionalTransformer(),
 'scaler': OptionalTransformer(transformer=MinMaxScaler()),
 'regressor': RandomForestRegressor(max_depth=22, max_features=None, min_samples_split=5,
                       min_weight_fraction_leaf=np.float64(0.012656909394764736),
                       n_estimators=213, random_state=42),
 'imputer__transformer': None,
 'scaler__transformer__clip': False,
 'scaler__transformer__copy': True,
 'scaler__transformer__feature_range': (0, 1),
 'scaler__transformer': MinMaxScaler(),
 'regressor__bootstrap': True,
 'regressor__ccp_alpha': 0.0,
 'regres

In [7]:
test_df["Listening_Time_minutes"] = model.predict(test_df.drop(columns=["id"]))

In [8]:
submission_dir = Path("./submissions")
test_df.filter(items=["id", "Listening_Time_minutes"]).to_csv(
    submission_dir / "rf_ga_tuned_pp_fst.csv", index=False
)