In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")



## DATA LOADING, CLEANING & INTEGRATION

In [4]:
# Load datasets

orders = pd.read_csv("Orders.csv", encoding="latin-1", sep=";", decimal =",")
leadtime = pd.read_csv("LeadtimeService.csv", encoding="latin-1", sep=";")
airports = pd.read_csv("airports.csv", encoding="latin-1", sep=",", decimal =",")


FileNotFoundError: [Errno 2] No such file or directory: 'Orders.csv'

In [None]:
orders

In [None]:
# Select relevant columns for analysis from orders dataset

cols_keep = [
    "Type",
    "Direction",
    "DSV-CW Ref.",
    "Goods Description",
    "Pcs",
    "Gwgt",
    "Cwgt",
    "NOTIFICATION date & Time",
    "ACTUAL Delivery & Time",
    "Service Level",
    "DGR",
    "Real Origin Airport",
    "Real Destination Airport",
    "ZONE",
    "AIRPORT ORIGIN-DESTINATION"
]

df_analysis = orders[cols_keep]

In [None]:
df_analysis["Service Level"].value_counts()

In [None]:
#drop rows with service level: "ERROR en Service Level (columna AK)"

df_analysis = df_analysis[df_analysis["Service Level"] != "ERROR en Service Level (columna AK)"]

In [None]:
df_analysis["Service Level"].value_counts()

In [None]:
# Service Level mapping used in the analysis
service_level_mapping = {
    "ROV": "Normal",
    "CRV": "Urgent",
    "AOV": "Critical"
}

df_analysis["Service Type"] = (
    df_analysis["Service Level"]
    .map(service_level_mapping)
)

df_analysis[["Service Level", "Service Type"]].drop_duplicates()

In [None]:
df_analysis["ZONE"].value_counts()

In [None]:
leadtime

In [None]:
#Drop spaces in column names
df_analysis.columns = df_analysis.columns.str.strip()
leadtime.columns = leadtime.columns.str.strip()

# Clean string columns to ensure proper merging
df_analysis["ZONE"] = df_analysis["ZONE"].astype(str).str.strip()
df_analysis["Service Level"] = df_analysis["Service Level"].astype(str).str.strip()

leadtime["Zone"] = leadtime["Zone"].astype(str).str.strip()
leadtime["SLA"] = leadtime["SLA"].astype(str).str.strip()

# Merge lead time information into the main dataframe

df_analysis = df_analysis.merge(
    leadtime[["Zone", "SLA", "Leadtime"]],
    how="left",
    left_on=["ZONE", "Service Level"],
    right_on=["Zone", "SLA"]
)

# Limpiar columnas auxiliares del join
df_analysis.drop(columns=["Zone", "SLA"], inplace=True)


In [None]:
df_analysis

In [None]:
airports

In [None]:
#Drop spaces in column names
airports.columns = airports.columns.str.strip()

# Clean string columns to ensure proper merging
df_analysis["Real Origin Airport"] = df_analysis["Real Origin Airport"].astype(str).str.strip()
df_analysis["Real Destination Airport"] = df_analysis["Real Destination Airport"].astype(str).str.strip()

airports["iata_code"] = airports["iata_code"].astype(str).str.strip()

# Merge information into the main dataframe for ORIGIN airport
df_analysis = df_analysis.merge(
    airports[["iata_code", "iso_country"]],
    how="left",
    left_on="Real Origin Airport",
    right_on="iata_code"
)

# Rename columns to avoid confusion
df_analysis.rename(
    columns={
        "iata_code": "iata_code_origin",
        "iso_country": "iso_country_origin"
    },
    inplace=True
)

# Merge information into the main dataframe for DESTINATION airport
df_analysis = df_analysis.merge(
    airports[["iata_code", "iso_country"]],
    how="left",
    left_on="Real Destination Airport",
    right_on="iata_code"
)

# Rename columns to avoid confusion
df_analysis.rename(
    columns={
        "iata_code": "iata_code_destination",
        "iso_country": "iso_country_destination"
    },
    inplace=True
)


In [None]:
# Final dataframe for analysis
df_analysis = df_analysis.drop(columns=["Real Origin Airport", "Real Destination Airport"])
df_analysis

## TARGET ENGINEERING (leadtime_real)

In [None]:
#Convert date columns to datetime format

notification_col = "NOTIFICATION date & Time"
delivery_col = "ACTUAL Delivery & Time"

df_analysis[notification_col] = pd.to_datetime(
    df_analysis[notification_col],
    errors="coerce",
    dayfirst=True
)

df_analysis[delivery_col] = pd.to_datetime(
    df_analysis[delivery_col],
    errors="coerce",
    dayfirst=True
)

print("\nMissing timestamps after datetime conversion:")
display(df_analysis[[notification_col, delivery_col]].isna().sum())

In [None]:
#Drop rows with missing timestamps
df_analysis = df_analysis.dropna(subset=[notification_col, delivery_col])

df_analysis

In [None]:
#Creating leadtime real in hours

df_analysis["leadtime_real_hours"] = (
    (df_analysis[delivery_col] - df_analysis[notification_col])
    .dt.total_seconds() / 3600
)

df_analysis[["leadtime_real_hours", notification_col, delivery_col]]


In [None]:
# Negative lead times check because of data errors
print("Negatives:", (df_analysis["leadtime_real_hours"] < 0).sum())


In [None]:
# Dealing with negative and 0 lead times

negatives_df = df_analysis[df_analysis["leadtime_real_hours"] <= 0]

print(f"Negative lead times: {len(negatives_df)}")

display(
    negatives_df[
        [
            "NOTIFICATION date & Time",
            "ACTUAL Delivery & Time",
            "leadtime_real_hours",
            "ZONE",
            "Service Level",
            "Service Type"
        ]
    ].sort_values("leadtime_real_hours")
)

In [None]:
#Flagging invalid lead times

df_analysis["invalid_leadtime_flag"] = (df_analysis["leadtime_real_hours"] <= 0).astype(int)

df_analysis["invalid_leadtime_flag"].value_counts()

In [None]:
df_analysis = df_analysis[df_analysis["leadtime_real_hours"] > 0].copy()

#Statistics summary
print(df_analysis["leadtime_real_hours"].describe())


In [None]:
#Creating a target with 99th percentile lead time for avoid outliers

leadtime_99th_percentile = df_analysis["leadtime_real_hours"].quantile(0.99)

df_analysis["leadtime_ml_hours"] = df_analysis["leadtime_real_hours"].clip(upper=leadtime_99th_percentile)

#Statistics summary
print(df_analysis["leadtime_ml_hours"].describe())

In [None]:
# leadtime_real_hours: real operational lead time (EDA)
# leadtime_ml_hours: capped target used for ML training with 99th percentile

In [None]:
#Calculating on_time comparing leadtime_real_hours with SLA
df_analysis["on_time"] = (df_analysis["leadtime_real_hours"] <= df_analysis["Leadtime"]).astype(int)

In [None]:
df_analysis

## Exploratory Data Analysis


In [None]:
# SLA RECOMMENDATION (Business logic)

# Goal: downgrade ONLY as needed based on real leadtime:
# - If real <= SLA(Critical)  -> recommend Critical
# - elif real <= SLA(Urgent)  -> recommend Urgent
# - else                      -> Normal   (fallback if nothing reaches)

# IMPORTANT: This avoids jumping to Normal if Urgent already covers.

In [None]:
# Ranking SLA service levels (1=cheapest, 3=most expensive)
sla_rank = {
    "Normal": 1,
    "Urgent": 2,
    "Critical": 3
}

# Create a lookup dictionary for SLA targets by ZONE + Service Type
sla_targets_lookup = (
    df_analysis
    .groupby(["ZONE", "Service Type"])["Leadtime"]
    .first()
    .to_dict()
)

# Vectorized: create SLA target columns for each service type per zone
df_analysis['_sla_critical'] = df_analysis['ZONE'].map(lambda z: sla_targets_lookup.get((z, 'Critical'), float('inf')))
df_analysis['_sla_urgent'] = df_analysis['ZONE'].map(lambda z: sla_targets_lookup.get((z, 'Urgent'), float('inf')))
df_analysis['_sla_normal'] = df_analysis['ZONE'].map(lambda z: sla_targets_lookup.get((z, 'Normal'), float('inf')))

# Step 1: Check if contracted SLA was met (on_time)
met_sla = df_analysis['leadtime_real_hours'] <= df_analysis['Leadtime']

# Step 2: Find best fit - check from MOST EXPENSIVE to CHEAPEST (Critical → Urgent → Normal)
# This ensures we recommend the CLOSEST lower tier, not always Normal

conditions_best_fit = [
    df_analysis['leadtime_real_hours'] <= df_analysis['_sla_critical'],
    df_analysis['leadtime_real_hours'] <= df_analysis['_sla_urgent'],
    df_analysis['leadtime_real_hours'] <= df_analysis['_sla_normal']
]
choices_best_fit = ['Critical', 'Urgent', 'Normal']
best_fit = np.select(conditions_best_fit, choices_best_fit, default='Normal')  # Out of SLA → Normal

# Step 3: Cap best_fit so it NEVER recommends a more expensive tier than contracted
# If delivery was faster than contracted → that's a win, keep contracted level
best_fit_rank = pd.Series(best_fit).map(sla_rank).values
contracted_rank = df_analysis['Service Type'].map(sla_rank).values
best_fit_capped = np.where(best_fit_rank > contracted_rank, df_analysis['Service Type'], best_fit)

# Step 4: Final recommendation
# - If SLA met → keep contracted level (no change needed)
# - If SLA NOT met → use capped best_fit (closest lower tier, never an upgrade)
df_analysis['sla_recommended'] = np.where(met_sla, df_analysis['Service Type'], best_fit_capped)

# Map to ranks for comparison
df_analysis['sla_contracted_rank'] = df_analysis['Service Type'].map(sla_rank)
df_analysis['sla_recommended_rank'] = df_analysis['sla_recommended'].map(sla_rank)

# Calculate levels to downgrade
df_analysis['levels_to_downgrade'] = (
    df_analysis['sla_contracted_rank'] - df_analysis['sla_recommended_rank']
)

# Downgrade is only possible if there are actual savings (levels_to_downgrade > 0)
df_analysis['downgrade_possible'] = (df_analysis['levels_to_downgrade'] > 0)

# Clean up temp columns
df_analysis.drop(['_sla_critical', '_sla_urgent', '_sla_normal'], axis=1, inplace=True)

# Summary statistics
print("="*60)
print("DOWNGRADE ANALYSIS SUMMARY")
print("="*60)
print(f"\nTotal orders analyzed: {len(df_analysis)}")
print(f"Orders that MET SLA (keep contracted): {met_sla.sum()} ({met_sla.mean():.1%})")
print(f"Orders with downgrade opportunity: {df_analysis['downgrade_possible'].sum()}")
print(f"Downgrade rate: {df_analysis['downgrade_possible'].mean():.1%}")

print("\nBreakdown by levels_to_downgrade:")
print(df_analysis['levels_to_downgrade'].value_counts().sort_index())

print("\nDowngrade opportunities by contracted Service Type:")
downgrade_by_service = df_analysis.groupby("Service Type").agg({
    'downgrade_possible': ['sum', 'mean'],
    'levels_to_downgrade': 'mean'
})
print(downgrade_by_service)

print("\nRecommended SLA distribution:")
print(pd.crosstab(df_analysis['Service Type'], df_analysis['sla_recommended'], margins=True))

In [None]:
# Crosstab: Contracted Service Type vs Recommended SLA
sla_distribution = pd.crosstab(
    df_analysis["Service Type"],        # SLA contratado
    df_analysis["sla_recommended"],     # SLA recomendado
    margins=True,                       # añade totales
    margins_name="All"
)

sla_distribution


In [None]:
# Show examples
print("\n" + "="*60)
print("EXAMPLES:")
print("="*60)

print("\n1. Downgrade possible (levels > 0):")
print(df_analysis[df_analysis["downgrade_possible"]][
    ["DSV-CW Ref.", "ZONE", "Service Type", "leadtime_real_hours", "Leadtime", 
     "sla_recommended", "levels_to_downgrade", "downgrade_possible"]
].head(3))

print("\n2. No downgrade (levels = 0, already at minimum or met SLA):")
print(df_analysis[df_analysis["levels_to_downgrade"] == 0][
    ["DSV-CW Ref.", "ZONE", "Service Type", "leadtime_real_hours", "Leadtime", 
     "sla_recommended", "levels_to_downgrade", "downgrade_possible"]
].head(3))

print("\n3. Specific order SBCN0260531:")
specific_order = df_analysis[df_analysis["DSV-CW Ref."] == "SBCN0260531"][
    ["DSV-CW Ref.", "ZONE", "Service Type", "leadtime_real_hours", "Leadtime", 
     "sla_recommended", "levels_to_downgrade", "downgrade_possible"]
]
if len(specific_order) > 0:
    print(specific_order)
else:
    print("Order not found in filtered dataset")

In [None]:
df_analysis

## Machine Learning

In [None]:
df_analysis

In [None]:
# 0) Start from df_analysis ONCE
df_ml = df_analysis.copy()

# 1) Create time features
df_ml["NOTIFICATION date & Time"] = pd.to_datetime(
    df_ml["NOTIFICATION date & Time"], errors="coerce"
)

df_ml["day_of_week"] = df_ml["NOTIFICATION date & Time"].dt.dayofweek
df_ml["month"] = df_ml["NOTIFICATION date & Time"].dt.month
df_ml["hour"] = df_ml["NOTIFICATION date & Time"].dt.hour

# 2) Drop leakage / not-needed columns (ONE single drop)
cols_to_drop = [
    # Post-delivery / leakage
    "ACTUAL Delivery & Time",
    "leadtime_real_hours",
    "leadtime_ml_hours",
    "invalid_leadtime_flag",
    "on_time",

    # SLA / contracted / EDA logic (leakage)
    "Service Level",
    "Service Type",
    "Leadtime",
    "sla_contracted_rank",
    "sla_recommended_rank",
    "sla_required",
    "levels_to_downgrade",
    "downgrade_possible",

    # Identifiers / descriptive
    "Type",
    "DSV-CW Ref.",
    "Goods Description",

    # Raw datetime (already engineered)
    "NOTIFICATION date & Time",
]

df_ml.drop(columns=[c for c in cols_to_drop if c in df_ml.columns], inplace=True)

# 3) Final check
df_ml.columns.sort_values()


In [None]:
df_ml

In [None]:
df_ml.dtypes

## Principal Component Analysis (PCA)

PCA will be used as an exploratory technique to analyze feature relevance and redundancy.
It helps to identify which variables explain most of the variance and which ones are highly correlated or low-impact.
The results will guide our feature selection while keeping the final models interpretable.

In [None]:
#Select numerical features for PCA

num_features = df_ml.select_dtypes(include=["int64", "float64"]).columns.tolist()
X_num = df_ml[num_features]

#PCA pipeline with scaling

pca_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("pca", PCA())
])

X_pca = pca_pipeline.fit_transform(X_num)

# Explained variance ratio of the PCA components

pca = pca_pipeline.named_steps["pca"]

explained_var = pd.DataFrame({
    "PC": [f"PC{i+1}" for i in range(len(pca.explained_variance_ratio_))],
    "Explained Variance Ratio": pca.explained_variance_ratio_,
    "Cumulative Variance": np.cumsum(pca.explained_variance_ratio_)
})

explained_var

In [None]:
# PCA loadings to understand feature contributions to each principal component

loadings = pd.DataFrame(
    pca.components_.T,
    index=num_features,
    columns=[f"PC{i+1}" for i in range(len(num_features))]
)

loadings


In [None]:
# Calculate overall importance of original features based on loadings and explained variance

importance = (
    loadings.abs()
    .mul(pca.explained_variance_ratio_, axis=1)
    .sum(axis=1)
    .sort_values(ascending=False)
)

importance


- PC1 captures shipment physical characteristics, mainly driven by weight-related features
- PC2 represents delivery time behavior combining target and real lead time.
- Higher-order components explain residual variance and were not considered for interpretation.

In [None]:
#numenical features for ML model
num_features = [
    "Gwgt",
    "Pcs"
]

#catergorical features for ML model
cat_features = [
    "ZONE",
    "DGR", # Dangerous goods
    "Direction",
    "AIRPORT ORIGIN-DESTINATION",
]

#temporal features for ML model
time_features = [
    "day_of_week",
    "month",
    "hour" #order time of day could impact leadtime and SLA performance
]


In [None]:
#X and Y for ML model

features = num_features + cat_features + time_features

X = df_ml[features]
y = df_ml["sla_recommended"]  # Using recommended SLA as target for classification



In [None]:
#Train-test split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify = y
)


In [None]:
#check target distribution in train and test sets to ensure stratification worked

def pct(s):
    return (pd.Series(s).value_counts(normalize=True) * 100).round(2)

print("\nTarget distribution (%)")
print("TRAIN:\n", pct(y_train))
print("\nTEST:\n", pct(y_test))

print("\nCounts")
print("TRAIN:\n", pd.Series(y_train).value_counts())
print("TEST:\n", pd.Series(y_test).value_counts())

print("\nSplit sizes")
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test :", X_test.shape, "y_test :", y_test.shape)


In [None]:
# Build route_grouped using TRAIN only to avoid data leakage

route_counts = X_train["AIRPORT ORIGIN-DESTINATION"].value_counts()
common_routes = route_counts[route_counts >= 20].index

X_train = X_train.copy()
X_test = X_test.copy()

X_train["route_grouped"] = X_train["AIRPORT ORIGIN-DESTINATION"].where(
    X_train["AIRPORT ORIGIN-DESTINATION"].isin(common_routes),
    "OTHER"
)

X_test["route_grouped"] = X_test["AIRPORT ORIGIN-DESTINATION"].where(
    X_test["AIRPORT ORIGIN-DESTINATION"].isin(common_routes),
    "OTHER"
)

# Drop original route column after creating route_grouped to avoid data leakage
X_train.drop(columns=["AIRPORT ORIGIN-DESTINATION"], inplace=True)
X_test.drop(columns=["AIRPORT ORIGIN-DESTINATION"], inplace=True)

# Now define final categorical features
cat_features = ["ZONE", "DGR", "Direction", "route_grouped"]

In [None]:
#pipelines for numerical and categorical features

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features + time_features),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False),
         cat_features)
    ],
    remainder="drop"
)


## MODEL 0 - BASELINE (Dummy)

A Dummy Regressor is used as a baseline model to establish a minimum performance benchmark.

This model does not learn from the data and always predicts the median historical lead time, regardless of shipment characteristics. Any machine learning model must outperform this baseline to be considered meaningful and add predictive value.


In [None]:
dummy_clf = Pipeline(steps=[
    ("prep", preprocessor),
    ("model", DummyClassifier(strategy="most_frequent", random_state=42))
])

dummy_clf.fit(X_train, y_train)
y_pred_dummy = dummy_clf.predict(X_test)

print("=== DummyClassifier (most_frequent) ===")
print(classification_report(y_test, y_pred_dummy))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_dummy))


## MODEL 1 - LINEAR REGRESSION (Control)

In Linear Regression models the relationship between *shipment features* and leadtime are considered as a linear combination of inputs.

It serves as a control model to verify whether simple linear patterns can explain delivery times. Its performance provides a reference point before moving to more complex, non-linear models.

*In this project, shipment features refer to all operational, physical, geographical and temporal variables available at booking time, including zone, service type, shipment weight, origin and destination countries and booking time characteristics.*

In [None]:
logreg_clf = Pipeline(steps=[
    ("prep", preprocessor),
    ("model", LogisticRegression(
        max_iter=2000,
        class_weight="balanced",   # helps if classes are imbalanced
        n_jobs=None                # keep default for compatibility
    ))
])

logreg_clf.fit(X_train, y_train)
y_pred_logreg = logreg_clf.predict(X_test)

print("\n=== LogisticRegression ===")
print(classification_report(y_test, y_pred_logreg))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_logreg))


## MODEL 2 - RANDOM FOREST REGRESSOR (Core Model)

The Random Forest Regressor combines multiple decision trees to capture non-linear relationships and feature interactions.

It is well suited for logistics data, where delivery times depend on complex interactions between zones, service levels and weights. This model is expected to significantly outperform linear approaches in predictive accuracy.

In [None]:
rf_clf = Pipeline(steps=[
    ("prep", preprocessor),
    ("model", RandomForestClassifier(
        n_estimators=300,
        max_depth=None,
        min_samples_leaf=5,
        random_state=42,
        n_jobs=-1,
        class_weight="balanced"
    ))
])

rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)

print("\n=== RandomForestClassifier ===")
print(classification_report(y_test, y_pred_rf))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_rf))


## MODEL 3 - GRADIENT BOOSTING 

Gradient Boosting builds trees sequentially, each one correcting the errors of the previous model.

It focuses on difficult cases and often achieves high predictive performance on structured tabular data.
However, it requires careful tuning to avoid overfitting.

In [None]:

gb_clf = Pipeline(steps=[
    ("prep", preprocessor),
    ("model", GradientBoostingClassifier(random_state=42))
])

gb_clf.fit(X_train, y_train)
y_pred_gb = gb_clf.predict(X_test)

print("\n=== GradientBoostingClassifier ===")
print(classification_report(y_test, y_pred_gb))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_gb))


In [None]:
results = []

models = {
    "Dummy": dummy_clf,
    "Logistic Regression": logreg_clf,
    "Random Forest": rf_clf,
    "Gradient Boosting": gb_clf
}

for name, model in models.items():
    y_pred = model.predict(X_test)

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Macro Recall": recall_score(y_test, y_pred, average="macro"),
        "Recall Normal": recall_score(y_test, y_pred, labels=["Normal"], average=None)[0],
        "Recall Urgent": recall_score(y_test, y_pred, labels=["Urgent"], average=None)[0],
        "Recall Critical": recall_score(y_test, y_pred, labels=["Critical"], average=None)[0],
    })

results_df = pd.DataFrame(results).set_index("Model").round(3)
results_df


Several models were tested. Gradient Boosting showed the best trade-off between overall performance and recall for minority classes, while remaining stable and interpretable. Therefore, it was selected as the final model.

In [None]:
#CONFUSION MATRIX FOR FINAL MODEL (Gradient Boosting)

y_pred_final = y_pred_gb # Gradient Boosting is the final chosen model

labels = ["Normal", "Urgent", "Critical"]

cm = confusion_matrix(y_test, y_pred_final, labels=labels)

plt.figure(figsize=(6, 5))
plt.imshow(cm)
plt.title("Confusion Matrix – Final Model")
plt.xlabel("Predicted SLA")
plt.ylabel("Actual SLA")
plt.xticks(range(len(labels)), labels)
plt.yticks(range(len(labels)), labels)

# Annotate cells with counts
for i in range(len(labels)):
    for j in range(len(labels)):
        plt.text(j, i, cm[i, j], ha="center", va="center")

plt.colorbar()
plt.tight_layout()
plt.show()


The confusion matrix shows strong performance for Normal shipments, which represent the majority of the data.
Urgent and especially Critical shipments remain challenging due to strong class imbalance, with the model tending to under-predict higher SLA levels.
This behavior reflects a conservative recommendation strategy, prioritizing cost efficiency over minority class recall.

In [None]:
#SLA distribution in actual vs predicted to check if model is capturing the overall class distribution

df_dist = pd.DataFrame({
    "Actual": pd.Series(y_test).value_counts(normalize=True),
    "Predicted": pd.Series(y_pred_final).value_counts(normalize=True)
}).fillna(0)

df_dist = df_dist.loc[["Normal", "Urgent", "Critical"]]

df_dist.plot(kind="bar", figsize=(7, 5))
plt.title("SLA Distribution – Actual vs Predicted")
plt.ylabel("Proportion")
plt.xlabel("SLA Level")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
