In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, confusion_matrix, mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import warnings
import pickle

# Suppress all warnings
warnings.filterwarnings("ignore")


In [2]:
# Function to convert minutes to human-readable format
def convert_minutes(total_minutes):
    """
    Convert minutes into days, hours, and minutes based on the given conditions.

    Args:
        total_minutes (int): Total minutes to be converted.
    
    Returns:
        str: Converted time as a human-readable string.
    """
    if total_minutes < 60:
        # Case 1: Less than 1 hour, show only in minutes
        return f"{total_minutes} minute(s)"
    elif total_minutes < 1440:  # 1440 minutes = 24 hours
        # Case 2: Between 1 hour and less than 24 hours, show only in hours
        hours = total_minutes // 60
        return f"{hours} hour(s)"
    else:
        # Case 3: 24 hours or more, show in days and hours
        days = total_minutes // 1440
        remaining_minutes = total_minutes % 1440
        hours = remaining_minutes // 60
        return f"{days} day(s) and {hours} hour(s)"

In [3]:
# Generate synthetic data
np.random.seed(42)

samples_per_class = 10000
num_classes = 5
total_samples = samples_per_class * num_classes

features = []
labels = []
time_to_failure = []

for class_label in range(num_classes):
    voltage = np.random.uniform(200 + class_label * 15, 220 + class_label * 15, samples_per_class)
    current = np.random.uniform(10 + class_label * 4, 20 + class_label * 6, samples_per_class)
    temperature = np.random.uniform(20 + class_label * 6, 40 + class_label * 20, samples_per_class)
    vibration = np.random.uniform(0 + class_label * 0.4, 1 + class_label * 0.6, samples_per_class)
    ambient_temp = np.random.uniform(10 + class_label * 3, 25 + class_label * 8, samples_per_class)
    humidity = np.random.uniform(20 + class_label * 4, 40 + class_label * 4, samples_per_class)

    noise_factor = np.random.normal(0, 0.5, size=samples_per_class)
    voltage += noise_factor * 1.5
    current += noise_factor * 0.3
    temperature += noise_factor * 1.5
    vibration += noise_factor * 0.05
    ambient_temp += noise_factor * 0.75
    humidity += noise_factor * 1.5

    if class_label == 0:
        time_to_fail = np.random.uniform(8000, 10000, samples_per_class)
    else:
        time_to_fail = np.random.uniform(1000 / class_label, 3000 / class_label, samples_per_class)

    for i in range(samples_per_class):
        features.append([voltage[i], current[i], temperature[i], vibration[i], ambient_temp[i], humidity[i]])
        labels.append(class_label)
        time_to_failure.append(time_to_fail[i])

data = pd.DataFrame(features, columns=["Voltage", "Current", "Temperature", "Vibration", "Ambient_Temperature", "Humidity"])
data["Failure_Class"] = labels
data["Time_to_Failure"] = time_to_failure

data = data.sample(frac=1).reset_index(drop=True)
data.to_csv("ev_predictive_maintenance_data.csv", index=False)
print("Synthetic high-accuracy data saved as 'ev_predictive_maintenance_data.csv'.")

Synthetic high-accuracy data saved as 'ev_predictive_maintenance_data.csv'.


In [11]:
# Data Preprocessing
data = pd.read_csv("ev_predictive_maintenance_data.csv")

data["Time_to_Failure"].fillna(9999, inplace=True)

X = data.drop(["Failure_Class", "Time_to_Failure"], axis=1)
y_class = data["Failure_Class"]
y_time = data["Time_to_Failure"]

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_class_train, y_class_test, y_time_train, y_time_test = train_test_split(
    X_scaled, y_class, y_time, test_size=0.2, random_state=42
)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("Data preprocessing complete.")


Data preprocessing complete.


In [6]:
# Random Forest Models
clf_model = RandomForestClassifier(n_estimators=200, random_state=42)
clf_model.fit(X_train, y_class_train)

reg_model = RandomForestRegressor(n_estimators=200, random_state=42)
reg_model.fit(X_train, y_time_train)

# Assuming clf_model and reg_model are your trained models
with open('clf_model.pkl', 'wb') as f:
    pickle.dump(clf_model, f)

with open('reg_model.pkl', 'wb') as f:
    pickle.dump(reg_model, f)
print("Random Forest models trained successfully.")



Random Forest models trained successfully.


In [7]:
# Classification Evaluation
y_class_pred = clf_model.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_class_test, y_class_pred, target_names=[
    "Normal Operation", "Thermal Failure", "Electrical Failure",
    "Mechanical Failure", "Environmental Failure"
]))
print("Confusion Matrix:\n")
print(confusion_matrix(y_class_test, y_class_pred))



Classification Report:

                       precision    recall  f1-score   support

     Normal Operation       0.99      0.99      0.99      2032
      Thermal Failure       0.97      0.98      0.97      1998
   Electrical Failure       0.95      0.96      0.96      2001
   Mechanical Failure       0.93      0.94      0.93      1992
Environmental Failure       0.98      0.94      0.96      1977

             accuracy                           0.96     10000
            macro avg       0.96      0.96      0.96     10000
         weighted avg       0.96      0.96      0.96     10000

Confusion Matrix:

[[2019   13    0    0    0]
 [  30 1956   12    0    0]
 [   0   50 1928   23    0]
 [   0    0   93 1865   34]
 [   0    0    0  115 1862]]


In [8]:
# Regression Evaluation
y_time_pred = reg_model.predict(X_test)
print(f"Mean Absolute Error (Time-to-Failure): {mean_absolute_error(y_time_test, y_time_pred):.2f} minutes")
print(f"Mean Squared Error (Time-to-Failure): {mean_squared_error(y_time_test, y_time_pred):.2f} minutes^2")



Mean Absolute Error (Time-to-Failure): 357.44 minutes
Mean Squared Error (Time-to-Failure): 352029.63 minutes^2


In [9]:
# Simulated Real-Time Prediction
real_time_data = np.array([[360, 85, 90, 3.5, 45, 80]])
real_time_data_scaled = scaler.transform(real_time_data)

predicted_class = clf_model.predict(real_time_data_scaled)[0]
predicted_time = reg_model.predict(real_time_data_scaled)[0]

readable_time = convert_minutes(predicted_time)

class_names = ["Normal Operation", "Thermal Failure", "Electrical Failure", "Mechanical Failure", "Environmental Failure"]
if predicted_class == 0:
    print(f"Predicted: {class_names[predicted_class]} (No failure detected).")
else:
    
    global d,a,fault_device,time_taken
    d=class_names[predicted_class]
    a=readable_time
    if d== "Thermal Failure":
        fault_device="Battery Pack"
    elif  d== "Electrical Failure":
        fault_device="Battery Management System (BMS)"
    elif  d== "Mechanical Failure":
        fault_device="Electric Motor Bearings"
    elif  d== "Environmental Failure":
        fault_device=" Connectors and Cables"
    st1= f"🚨 Attention! We've detected a potential  {fault_device} failure.Estimated time to failure:  {a}. Please check the system promptly to avoid disruptions. 😊"
    print(st1)

🚨 Attention! We've detected a potential   Connectors and Cables failure.Estimated time to failure:  8.0 hour(s). Please check the system promptly to avoid disruptions. 😊
