In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

  from pandas.core import (


In [2]:
df = pd.read_csv("FleetIQ_dataset.csv")
print("Loaded dataset shape:", df.shape)
df.head()

Loaded dataset shape: (41705, 26)


Unnamed: 0,Vehicle_Model,Mileage,Maintenance_History,Reported_Issues,Vehicle_Age,Fuel_Type,Transmission_Type,Engine_Size,Odometer_Reading,Last_Service_Date,...,Tire_Condition,Brake_Condition,Battery_Status,Need_Maintenance,Vehicle_Tag,Vehicle_Model.1,Vehicle_Service_Tyle,Type_Maintenance,Type_Maint_Observation,Health_Score
0,Truck,58765,Good,0,4,Electric,Automatic,2000,28524,23/11/2023,...,New,New,Weak,1,V00001,2013 Ford F-150,Construction Support,Engine,Schedule immediate engine inspection and maint...,0.4074
1,Van,60353,Average,1,7,Electric,Automatic,2500,133630,21/09/2023,...,New,New,Weak,1,V00002,2023 Toyota Sienna,School Shuttle,Brakes,Brake performance reduced; replace pads and in...,0.7077
2,Bus,68072,Poor,0,2,Electric,Automatic,1500,34022,27/06/2023,...,New,Good,Weak,1,V00003,2023 Blue Bird Vision,Public Transit,Exhaust System,Exhaust system showing wear; schedule replacem...,0.6883
3,Bus,60849,Average,4,5,Petrol,Automatic,2500,81636,24/08/2023,...,New,Worn Out,New,1,V00004,2016 Blue Bird Vision,Shuttle Service,HVAC System,HVAC system requires diagnostic and refrigeran...,0.9762
4,Bus,45742,Poor,5,1,Petrol,Manual,2000,97162,25/05/2023,...,Good,Good,Weak,1,V00005,2012 Freightliner C2,Public Transit,Transmission,Transmission needs diagnostic testing and serv...,0.9915


In [3]:
#FIX DATE COLUMNS (DD/MM/YYYY)

date_cols = ["Last_Service_Date", "Warranty_Expiry_Date"]

for col in date_cols:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], dayfirst=True, errors="coerce")

# Convert to numeric days
TODAY = pd.Timestamp("2024-10-15")

if "Last_Service_Date" in df.columns:
    df["Days_since_last_service"] = (TODAY - df["Last_Service_Date"]).dt.days

if "Warranty_Expiry_Date" in df.columns:
    df["Days_until_warranty_expiry"] = (df["Warranty_Expiry_Date"] - TODAY).dt.days

df = df.drop(columns=date_cols, errors="ignore")  # remove original date columns

In [4]:
#Creaing Health_Status from Health_Score
def classify(score):
    if score <= 40:
        return "critical"
    elif score <= 70:
        return "warning"
    else:
        return "healthy"

df["Health_Status"] = df["Health_Score"].apply(classify)

In [5]:
text_cols = [
    "Vehicle_Model", "Fuel_Type", "Transmission_Type",
    "Maintenance_History", "Tire_Condition", "Brake_Condition",
    "Battery_Status", "Owner_Type"
]

for col in text_cols:
    if col in df.columns:
        df[col] = (
            df[col]
            .astype(str)
            .str.strip()
            .str.title()
            .replace({"Nan": None})
        )

In [6]:
#MAP/ENCODE CATEGORICAL FIELDS

# Maintenance History
if "Maintenance_History" in df.columns:
    df["Maintenance_History"] = df["Maintenance_History"].map(
        {"Good": 1, "Average": 0, "Poor": -1}
    ).astype("Int64")

# Transmission
if "Transmission_Type" in df.columns:
    df["Transmission_Type"] = df["Transmission_Type"].map(
        {"Automatic": 0, "Manual": 1}
    ).astype("Int64")

# Tire / Brake
condition_map = {"New": 2, "Good": 1, "Weak": -1, "Worn Out": -2}
for col in ["Tire_Condition", "Brake_Condition"]:
    if col in df.columns:
        df[col] = df[col].map(condition_map).astype("Int64")

# Battery
if "Battery_Status" in df.columns:
    df["Battery_Status"] = df["Battery_Status"].map(
        {"New": 1, "Good": 0, "Weak": -1}
    ).astype("Int64")

# Owner_Type label encoding
if "Owner_Type" in df.columns:
    le_owner = LabelEncoder()
    df["Owner_Type"] = le_owner.fit_transform(df["Owner_Type"].astype(str))


# One hot encode small categories
one_hot_cols = ["Vehicle_Model", "Fuel_Type"]
df = pd.get_dummies(df, columns=[c for c in one_hot_cols if c in df.columns])

In [7]:
DROP_COLS = [
    "Health_Score",
    "Vehicle_Model.1",
    "Vehicle_Tag",
    "Vehicle_Service_Tyle",
    "Type_Maintenance",
    "Type_Maint_Observation",
    "Need_Maintenance",
    "Reported_Issues"
]

df = df.drop(columns=[c for c in DROP_COLS if c in df.columns], errors="ignore")

In [8]:
print("Remaining object columns:", df.select_dtypes(include="object").columns)

Remaining object columns: Index(['Health_Status'], dtype='object')


In [9]:
target = "Health_Status"
y = df[target]
X = df.drop(columns=[target], errors="ignore")

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=314, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=314, stratify=y_temp
)

In [10]:
numeric_cols = X_train.select_dtypes(include=[np.number]).columns

scaler = StandardScaler()

X_train_scaled = X_train.copy()
X_val_scaled = X_val.copy()
X_test_scaled = X_test.copy()

X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_val_scaled[numeric_cols] = scaler.transform(X_val[numeric_cols])
X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])

In [11]:
model = RandomForestClassifier(
    n_estimators=350,
    max_depth=18,
    random_state=42,
    class_weight="balanced"
)

model.fit(X_train_scaled, y_train)

print("Validation performance:")
preds = model.predict(X_val_scaled)
print(classification_report(y_val, preds))

Validation performance:
              precision    recall  f1-score   support

    critical       1.00      1.00      1.00      4170

    accuracy                           1.00      4170
   macro avg       1.00      1.00      1.00      4170
weighted avg       1.00      1.00      1.00      4170



In [12]:
joblib.dump(model, "fleet_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Model and scaler saved.")

Model and scaler saved.


In [13]:
from sklearn.pipeline import Pipeline
import joblib

pipeline = Pipeline([
    ('scaler', joblib.load('scaler.pkl')),
    ('model', joblib.load('fleet_model.pkl'))
])

joblib.dump(pipeline, 'fleet_pipeline.pkl')

['fleet_pipeline.pkl']