In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("dynamic_pricing.csv")
df.head()

## Categorical encoding

### Label encoding

Convert ordered categories into numerical column based on defined mapping.

In [None]:
customer_loyalty_mapping = {"Regular": 0, "Silver": 1, "Gold": 2}
vehicle_type_mapping = {"Economy": 0, "Premium": 1}

In [None]:
def label_encode(df: pd.DataFrame, column: str, mapping: dict) -> pd.DataFrame:
    df[column + "_Encoded"] = df[column].map(mapping)
    df = df.drop(column, axis=1)
    return df

In [None]:
df = label_encode(df, "Customer_Loyalty_Status", customer_loyalty_mapping)
df = label_encode(df, "Vehicle_Type", vehicle_type_mapping)
df.head()

### One-hot encoding

Convert categories into binary columns.

In [None]:
def category_to_columns(df: pd.DataFrame, column: str) -> pd.DataFrame:
    df_dummies = pd.get_dummies(df[column], prefix=column, dtype=int)
    df = df.drop(column, axis=1)
    return pd.concat([df, df_dummies], axis=1)

In [None]:
df = category_to_columns(df, "Location_Category")
df = category_to_columns(df, "Time_of_Booking")
df.head()

In [None]:
df.corr(method="pearson").round(3)["Historical_Cost_of_Ride"]

In [None]:
PERCENTILE = 0.8
PERCENTILE_WINDOW = 0.1
price_lambda = df["Historical_Cost_of_Ride"].max() * PERCENTILE_WINDOW / 2
price_percentile = df["Historical_Cost_of_Ride"].quantile(PERCENTILE)
df_selected = df[(df["Historical_Cost_of_Ride"] < price_percentile + price_lambda) & (df["Historical_Cost_of_Ride"] > price_percentile - price_lambda)]
df_selected.corr(method="pearson").round(3).sort_values(by="Historical_Cost_of_Ride", ascending=False)["Historical_Cost_of_Ride"].iloc[1:]