### required imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import classification_report, accuracy_score



### importing the clean pre processed data (preparing it a little bit)

In [2]:
data=pd.read_csv('./CleanData.csv')
data = data.drop(columns=['Unnamed: 0','Year1','Year2','Year3','day','date enregistrement','Ordre de travail','Description'])
data['Date Fin'] = pd.to_datetime(data['Date Fin']).apply(lambda x: x.toordinal())
data['Date début'] = pd.to_datetime(data['Date début']).apply(lambda x: x.toordinal())
data['date_enreg'] = pd.to_datetime(data['date_enreg']).apply(lambda x: x.toordinal())
data.sample()

Unnamed: 0,Equipment,Nom Equipement,Noeud Parent,Nom_parent,Classification_Equipement,Date début,Date Fin,Type OT,day_diff,date_enreg
5033,ULPRPH0107,MARIANI,ULPRPH01,UHT A,Mécanique,737610,737610,correctif planifié,16,737594


In [3]:
X= data.drop(columns=['Classification_Equipement'])
Y= data['Classification_Equipement']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)   

### creating and training the model

In [None]:
# Copy to avoid changing original
X_train_enc = X_train.copy()
X_test_enc = X_test.copy()

# Identify categorical columns
cat_cols = X_train_enc.select_dtypes(include=['object']).columns

# Use OrdinalEncoder to safely handle unseen categories
encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)

# Fit on training set
X_train_enc[cat_cols] = encoder.fit_transform(X_train_enc[cat_cols])

# Transform test set using the same encoder
X_test_enc[cat_cols] = encoder.transform(X_test_enc[cat_cols])

# Initialize and train the Random Forest model
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_enc, Y_train)

# Predict
y_pred = rf_classifier.predict(X_test_enc)

# Evaluate
print("Accuracy:", accuracy_score(Y_test, y_pred))
print("Classification Report:\n", classification_report(Y_test, y_pred))

Accuracy: 0.9938697318007663
Classification Report:
               precision    recall  f1-score   support

  Electrique       1.00      0.92      0.96        48
 Energetique       0.50      0.25      0.33         4
   Mécanique       0.99      1.00      1.00      1253

    accuracy                           0.99      1305
   macro avg       0.83      0.72      0.76      1305
weighted avg       0.99      0.99      0.99      1305



In [6]:
import joblib
# Save the model
joblib.dump(rf_classifier, 'FailureClassification.joblib')

['FailureClassification.joblib']