In [None]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

## Input data

In [None]:
df = pd.read_csv("dynamic_pricing.csv")
df.head()

## Categorical encoding

### Label encoding
Convert ordered categories into numerical column based on defined mapping.

In [None]:
customer_loyalty_mapping = {"Regular": 0, "Silver": 1, "Gold": 2}
vehicle_type_mapping = {"Economy": 0, "Premium": 1}

In [None]:
def label_encode(df: pd.DataFrame, column: str, mapping: dict) -> pd.DataFrame:
    df[column + "_Encoded"] = df[column].map(mapping)
    df = df.drop(column, axis=1)
    return df

In [None]:
df = label_encode(df, "Customer_Loyalty_Status", customer_loyalty_mapping)
df = label_encode(df, "Vehicle_Type", vehicle_type_mapping)
df.head()

### One-hot encoding
Convert categories into binary columns.

In [None]:
def category_to_columns(df: pd.DataFrame, column: str) -> pd.DataFrame:
    df_dummies = pd.get_dummies(df[column], prefix=column, dtype=int)
    df = df.drop(column, axis=1)
    return pd.concat([df, df_dummies], axis=1)

In [None]:
df = category_to_columns(df, "Location_Category")
df = category_to_columns(df, "Time_of_Booking")
df.head()

## Combine variables
Create new variable by calculating difference between variables.

In [None]:
df["Drivers_Riders_Difference"] = df["Number_of_Drivers"] - df["Number_of_Riders"]
df = df.drop(["Number_of_Drivers", "Number_of_Riders"], axis=1)
df.head()

## Correlation analysis

Pearson correlations for the while dataset:

In [None]:
df.corr(method="pearson").round(3)["Historical_Cost_of_Ride"]

Pearson correlations for specific cost of ride percentile:

In [None]:
PERCENTILE = 0.8
PERCENTILE_WINDOW = 0.1
price_lambda = df["Historical_Cost_of_Ride"].max() * PERCENTILE_WINDOW / 2
price_percentile = df["Historical_Cost_of_Ride"].quantile(PERCENTILE)
df_selected = df[(df["Historical_Cost_of_Ride"] < price_percentile + price_lambda) & (df["Historical_Cost_of_Ride"] > price_percentile - price_lambda)]
df_selected.corr(method="pearson").round(3).sort_values(by="Historical_Cost_of_Ride", ascending=False)["Historical_Cost_of_Ride"].iloc[1:]

## Prepare data for model training

In [None]:
# Shuffle input data
df_shuffled = df.sample(frac=1, random_state=1234).reset_index(drop=True)

# Define the split ratios
train_ratio = 0.7
validation_ratio = 0.15
test_ratio = 1 - (train_ratio + validation_ratio)

# Split the data
train_end = int(train_ratio * len(df_shuffled))
validation_end = int((train_ratio + validation_ratio) * len(df_shuffled))

train_df = df_shuffled.iloc[:train_end]
validation_df = df_shuffled.iloc[train_end:validation_end]
test_df = df_shuffled.iloc[validation_end:]

# Separate features (X) and target (y)
X_train = train_df.drop(columns=["Historical_Cost_of_Ride"])
y_train = train_df["Historical_Cost_of_Ride"]

X_validation = validation_df.drop(columns=["Historical_Cost_of_Ride"])
y_validation = validation_df["Historical_Cost_of_Ride"]

X_test = test_df.drop(columns=["Historical_Cost_of_Ride"])
y_test = test_df["Historical_Cost_of_Ride"]

## Define model structure

In [None]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(16, activation="relu"))
model.add(Dense(1))  # Output layer with one neuron since we're predicting a single value

# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mae"])

## Train the model

In [None]:
model.fit(X_train, y_train, validation_data=(X_validation, y_validation), epochs=100, batch_size=32)

## Evaluate results

In [None]:
loss, mae = model.evaluate(X_test, y_test)