In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [3]:
df_driving_data = pd.read_csv('/content/drive/MyDrive/Fleet Insight Dashboard/Cleaned Data/driving_data_final.csv')
df_vehicle_data = pd.read_csv('/content/drive/MyDrive/Fleet Insight Dashboard/Cleaned Data/vehicle_data_final.csv')

In [10]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


def get_sensor_data(df_driving_data):
    df_driving_data['tire_sensor'] = 0
    df_driving_data.loc[df_driving_data['maximum_rolling_power_density_demand'] >= 310, ['tire_sensor']] = 1

    df_driving_data['engine_sensor'] = 0
    df_driving_data.loc[df_driving_data['maximum_kinetic_power_density_demand'] >= 60, ['engine_sensor']] = 1

    df_driving_data['break_sensor'] = 0
    df_driving_data.loc[df_driving_data['max_deceleration_event_duration'] >= 1200, ['break_sensor']] = 1

    df_driving_data['maintenance_need'] = 0
    df_driving_data.loc[(df_driving_data['break_sensor'] + df_driving_data['engine_sensor'] +
                         df_driving_data['tire_sensor']) >= 2, ['maintenance_need']] = 1

    return df_driving_data


def data_preparation(df_driving_data):
    df_driving_data = df_driving_data.sort_values('day_id', ascending=False)
    vid_all = df_driving_data['vid'].unique()

    df_driving_data['vid'] = pd.Categorical(df_driving_data['vid'])
    df_driving_data['pid'] = pd.Categorical(df_driving_data['pid'])
    df_driving_data = pd.get_dummies(df_driving_data, columns=['vid', 'pid'])

    x_pred = pd.DataFrame()
    drop_indices = []
    for i in range(len(vid_all)):
        col_name = 'vid_' + str(vid_all[i])
        match_rows = df_driving_data[df_driving_data[col_name] == 1]
        if not match_rows.empty:
            x_pred = pd.concat([x_pred, match_rows.iloc[[0]]], axis=0)
            drop_indices.append(match_rows.index[0])

    df_driving_data = df_driving_data.drop(drop_indices)

    x = df_driving_data.drop('maintenance_need', axis=1)
    y = df_driving_data['maintenance_need']
    x_pred = x_pred.drop('maintenance_need', axis=1)

    if 'month' in x.columns:
        x = x.drop(columns=['month'])
        x_pred = x_pred.drop(columns=['month'])

    x_pred = x_pred[x.columns.tolist()]

    return x, y, x_pred


def predict_maintenance(x, y, dmatrix, df_vehicle_data, x_pred):
    params = {
        "objective": "binary:logistic",
        'colsample_bytree': 0.3,
        'learning_rate': 0.1,
        'max_depth': 5,
        'alpha': 10
    }

    cv_results = xgb.cv(
        dtrain=dmatrix, params=params, nfold=10,
        num_boost_round=50, early_stopping_rounds=10,
        metrics="rmse", as_pandas=True, seed=123
    )

    xg_class = xgb.XGBClassifier(**params, n_estimators=10)
    xg_class.fit(x, y)

    pred_prob = xg_class.predict_proba(x_pred)
    df_vehicle_data['predicted_maintenance_probability'] = pred_prob[:, 1]

    df_vehicle_data['predicted_weeks_until_maintenance'] = 30
    df_vehicle_data.loc[df_vehicle_data['predicted_maintenance_probability'] >= 0.50, 'predicted_weeks_until_maintenance'] = 1
    df_vehicle_data.loc[(df_vehicle_data['predicted_maintenance_probability'] >= 0.40) &
                        (df_vehicle_data['predicted_maintenance_probability'] < 0.50), 'predicted_weeks_until_maintenance'] = 2
    df_vehicle_data.loc[(df_vehicle_data['predicted_maintenance_probability'] >= 0.35) &
                        (df_vehicle_data['predicted_maintenance_probability'] < 0.40), 'predicted_weeks_until_maintenance'] = 3
    df_vehicle_data.loc[(df_vehicle_data['predicted_maintenance_probability'] >= 0.30) &
                        (df_vehicle_data['predicted_maintenance_probability'] < 0.35), 'predicted_weeks_until_maintenance'] = 4

    return cv_results, df_vehicle_data, xg_class


In [11]:
# Step 1: Add sensor columns and define maintenance need
df_driving_data_with_sensors = get_sensor_data(df_driving_data)

# Step 2: Prepare the dataset for training and prediction
x, y, x_pred = data_preparation(df_driving_data_with_sensors)

# Step 3: Convert to DMatrix for XGBoost
dmatrix = xgb.DMatrix(data=x, label=y)

# Step 4: Train and predict maintenance using XGBoost
cv_results, df_vehicle_data_updated, xgboost_model = predict_maintenance(x, y, dmatrix, df_vehicle_data, x_pred)


In [12]:
df_vehicle_data_updated.to_csv("/content/drive/MyDrive/Fleet Insight Dashboard/Cleaned Data/vehicle_data_with_predictions.csv", index=False)
cv_results.to_csv("/content/drive/MyDrive/Fleet Insight Dashboard/Cleaned Data/xgboost_cv_results.csv", index=False)