In [68]:
import pandas as pd

# Load the data
PATH = '../data/production/FD001.csv'

# Loading the dataset
data = pd.read_csv(PATH)

In [69]:
# Display the first 5 rows
data.head()

Unnamed: 0,unit_number,time_in_cycles,RUL,sensor_measurement_2_rm,sensor_measurement_3_rm,sensor_measurement_4_rm,sensor_measurement_7_rm,sensor_measurement_8_rm,sensor_measurement_9_rm,sensor_measurement_11_rm,sensor_measurement_12_rm,sensor_measurement_13_rm,sensor_measurement_14_rm,sensor_measurement_15_rm,sensor_measurement_17_rm,sensor_measurement_20_rm,sensor_measurement_21_rm,maintanance_urgency
0,1,10,182,642.201,1587.712,1400.84,554.096,2388.049,9049.56,47.272,522.146,2388.044,8132.228,8.40839,391.8,38.984,23.3896,long
1,1,11,181,642.247,1586.917,1400.844,554.114,2388.048,9049.902,47.24,522.12,2388.043,8132.424,8.40984,391.8,38.972,23.39557,long
2,1,12,180,642.238,1586.076,1400.545,554.191,2388.053,9050.432,47.209,522.072,2388.038,8132.7,8.40604,391.7,38.978,23.38981,long
3,1,13,179,642.31,1585.496,1400.208,554.109,2388.057,9049.82,47.22,522.015,2388.043,8132.187,8.40578,392.0,38.976,23.38296,long
4,1,14,178,642.31,1586.512,1399.937,554.112,2388.055,9049.609,47.251,521.896,2388.035,8132.247,8.4086,392.1,39.006,23.38383,long


In [70]:
# Display the last 5 rows
data.tail()

Unnamed: 0,unit_number,time_in_cycles,RUL,sensor_measurement_2_rm,sensor_measurement_3_rm,sensor_measurement_4_rm,sensor_measurement_7_rm,sensor_measurement_8_rm,sensor_measurement_9_rm,sensor_measurement_11_rm,sensor_measurement_12_rm,sensor_measurement_13_rm,sensor_measurement_14_rm,sensor_measurement_15_rm,sensor_measurement_17_rm,sensor_measurement_20_rm,sensor_measurement_21_rm,maintanance_urgency
19726,100,196,4,643.456,1599.625,1426.079,551.477,2388.224,9066.903,48.116,519.84,2388.237,8141.17,8.51031,396.0,38.453,23.09254,urgent
19727,100,197,3,643.447,1600.379,1427.288,551.41,2388.225,9066.82,48.116,519.793,2388.237,8140.771,8.51083,395.9,38.416,23.0854,urgent
19728,100,198,2,643.47,1600.848,1427.449,551.427,2388.223,9066.642,48.096,519.796,2388.24,8140.882,8.51915,396.2,38.424,23.07321,urgent
19729,100,199,1,643.424,1601.389,1427.787,551.334,2388.23,9067.045,48.136,519.813,2388.239,8140.833,8.52434,396.0,38.388,23.0737,urgent
19730,100,200,0,643.497,1601.982,1428.397,551.307,2388.235,9066.719,48.157,519.791,2388.239,8140.338,8.52308,396.1,38.383,23.07289,urgent


In [71]:
# Display the shape of the dataset
data.shape

(19731, 18)

In [72]:
# Splitting the data into features and target
X = data.drop(columns=['RUL', 'unit_number', 'time_in_cycles', 'maintanance_urgency'])
y = data['maintanance_urgency']

# Display the shape of the features and target
print(f"Shape of the features: {X.shape}")
print(f"Shape of the target: {y.shape}")

Shape of the features: (19731, 14)
Shape of the target: (19731,)


In [73]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Shape of the training set: {X_train.shape}")

print(f"Shape of the test set: {X_test.shape}")


Shape of the training set: (15784, 14)
Shape of the test set: (3947, 14)


In [74]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [75]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Fit a KNN model
knn = KNeighborsClassifier()

# Fit the model
knn.fit(X_train_scaled, y_train)

# Predict the categories
y_pred = knn.predict(X_test_scaled)

# Classification report
print("KNN Classifier")
print(classification_report(y_test, y_pred))

KNN Classifier
              precision    recall  f1-score   support

        long       0.96      0.97      0.96      1427
      medium       0.95      0.96      0.95      1522
      urgent       0.98      0.97      0.98       998

    accuracy                           0.96      3947
   macro avg       0.97      0.96      0.96      3947
weighted avg       0.96      0.96      0.96      3947



In [76]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

rf = RandomForestClassifier(random_state=42)

rf.fit(X_train_scaled, y_train)

y_pred = rf.predict(X_test_scaled)

print('Random Forest Classifier')
print(classification_report(y_test, y_pred))

Random Forest Classifier
              precision    recall  f1-score   support

        long       0.92      0.95      0.93      1427
      medium       0.89      0.90      0.90      1522
      urgent       0.97      0.90      0.93       998

    accuracy                           0.92      3947
   macro avg       0.92      0.92      0.92      3947
weighted avg       0.92      0.92      0.92      3947



In [77]:
# Export the model
import joblib

# Save the model
joblib.dump(rf, '../models/classifiers/rf_model.pkl')
joblib.dump(knn, '../models/classifiers/knn_model.pkl')

# Save the scaler
joblib.dump(scaler, '../models/scalers/scaler.pkl')


['../models/scalers/scaler.pkl']

In [78]:
# Load the model
model = joblib.load('../models/classifiers/rf_model.pkl')

# Load the scaler
scaler = joblib.load('../models/scalers/scaler.pkl')

# Predict the categories
y_pred = model.predict(X_test_scaled)


In [79]:
y_pred

array(['long', 'long', 'long', ..., 'medium', 'long', 'urgent'],
      dtype=object)