# Installations

In [None]:
%%writefile requirements.txt
mlflow
optuna

In [None]:
!pip install -qU -r requirements.txt

# Imports

In [None]:
# Standard library imports
import os
import sys
from pathlib import Path

# Third-party imports
import joblib
import yaml
from sklearn.pipeline import Pipeline

# Constants

In [None]:
config_yaml_path = os.path.join(
    '/content',
    'drive',
    'MyDrive',
    'Colab',
    'Machine Learning',
    'Regression',
    'Radancy',
    'config.yaml'
)

# Load config
with open(config_yaml_path, "r") as file:
  config = yaml.safe_load(file)

# Cross-platform path assembly
PATH = Path().joinpath(*config["PATH_PARTS"])

# Other parameters
RANDOM_STATE = config["RANDOM_STATE"]
COMPRESS = config["COMPRESS"]
N_JOBS = config["N_JOBS"]
TARGET = config["TARGET"]

# Add Project Directory to sys.path

In [None]:
# Add the project directory to sys.path so we can import local modules
sys.path.append(str(PATH))

In [None]:
from ml_utils import *

# MLflow Tracking URI

In [None]:
mlflow.set_tracking_uri(f"file://{PATH / 'ml_experiments'}")

# Load Data

In [None]:
df_test = joblib.load(PATH / 'df_test.pkl')

# Pipeline

In [None]:
from data_loader import DataLoader
from temporal_feature_engineer_transformer import (
    TemporalFeatureEngineerTransformer
)
from drop_columns_transformer import DropColumnsTransformer
from log_transformer import LogTransformer
from boolean_to_string_transformer import BooleanToStringTransformer
from target_encoder_transformer import TargetEncoderTransformer
from standard_scaler_transformer import StandardScalerTransformer
from interaction_transformer import InteractionTransformer
from column_selector import ColumnSelector

In [None]:
data_loader = load_model(
    experiment_name="Data_Loader",
    model_name="data_loader"
)

temporal_feature_engineer_transformer = load_model(
    experiment_name="Temporal_Feature_Engineer_Transformer",
    model_name="temporal_feature_engineer_transformer"
)

drop_columns_transformer = load_model(
    experiment_name="Drop_Columns",
    model_name="drop_columns_transformer"
)

log_transformer = load_model(
    experiment_name="Log_Transformer",
    model_name="log_transformer"
)

boolean_to_string_transformer = load_model(
    experiment_name="Boolean_To_String",
    model_name="boolean_to_string_transformer"
)

target_encoder_transformer = load_model(
    experiment_name="Target_Encoder_Transformer",
    model_name="target_encoder_transformer"
)

standard_scaler_transformer = load_model(
    experiment_name="Standard_Scaler_Transformer",
    model_name="standard_scaler_transformer"
)

interaction_transformer = load_model(
    experiment_name="Interaction_Transformer",
    model_name="interaction_transformer"
)

model = load_model(
    experiment_name="LinearSVR",
    model_name="LinearSVR",
    criteria='min_rmse'
)

In [None]:
column_selector = ColumnSelector(columns=[
    'category_id_encoded_scaled',
    'day_of_week_encoded_scaled',
    'market_id_encoded_scaled',
    'market_popularity_transformed_scaled',
    'publisher_avg_clicks_transformed_scaled',
    'publisher_encoded_scaled'
])

In [None]:
# List of (estimator, filename) tuples
estimators = [
    ("data_loader", "data_loader.pkl"),
    (
        "temporal_feature_engineer_transformer",
        "temporal_feature_engineer_transformer.pkl"
    ),
    ("drop_columns_transformer", "drop_columns_transformer.pkl"),
    ("log_transformer", "log_transformer.pkl"),
    ("boolean_to_string_transformer", "boolean_to_string_transformer.pkl"),
    ("target_encoder_transformer", "target_encoder_transformer.pkl"),
    ("standard_scaler_transformer", "standard_scaler_transformer.pkl"),
    ("interaction_transformer", "interaction_transformer.pkl"),
    ("column_selector", "column_selector.pkl"),
    ("model", "model.joblib")
]

In [None]:
# Define the inference pipeline with the fitted transformers and model
inference_pipeline = Pipeline([
    ('data_loader', data_loader),
    ('temporal_feature_engineer', temporal_feature_engineer_transformer),
    ('drop_columns', drop_columns_transformer),
    ('log_transform', log_transformer),
    ('boolean_to_string', boolean_to_string_transformer),
    ('target_encoder', target_encoder_transformer),
    ('standard_scaler', standard_scaler_transformer),
    ('interaction', interaction_transformer),
    ('column_selector', column_selector),
    ('model', model)
])

In [None]:
save_object(inference_pipeline, "inference_pipeline.pkl")

In [None]:
y_pred = inference_pipeline.predict(df_test)

In [None]:
df_test.rename(columns={"converions": "conversions"}, inplace=True)
zero_replacement=0.1
y_test = df_test['cost'] / df_test['conversions'].replace(0, zero_replacement)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Hold-out Test RMSE: {rmse:.4f}")
print(f"Hold-out Test R²: {r2:.4f}")
print(f"Hold-out Test MAE: {mae:.4f}")