# 1. Data Preprocessing and Feature Engineering

In [1]:
from os import path
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the IoT sensor data
dataset_path = 'path//to//csv_file[containing:]'
if path.exists(dataset_path):
    data = pd.read_csv(dataset_path)
else:
    nsample = 1000
    min_temp,max_temp = -100,100
    min_vib,max_vib = -1000,1000
    min_press,max_press = -500,500
    min_fail,max_fail = 0,10
    temperature = min_temp+(max_temp-min_temp)*np.random.random(nsample)
    vibration = min_vib+(max_vib-min_vib)*np.random.random(nsample)
    pressure = min_press+(max_press-min_press)*np.random.random(nsample)
    machine_failure = np.random.randint(min_fail,max_fail,1000)
    data_dict = {'temperature':temperature,'vibration':vibration,'pressure':pressure,
                 'machine_failure':machine_failure}
    data = pd.DataFrame(data_dict)

# Data preprocessing
# Feature engineering: extract useful features from sensor readings
data['temp_diff'] = data['temperature'].diff()
data['vibration_change'] = data['vibration'].pct_change()
data['pressure_roll_mean'] = data['pressure'].rolling(window=5).mean()
# Drop rows with NaN values (if created during feature engineering)
data.dropna(inplace=True)
# # Define target and features
X = data.drop(['machine_failure'], axis=1)
Y = data['machine_failure']
# # Split the data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
# # Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Model Development and Training

In [2]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib
# Initialize models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(n_estimators=100, random_state=42, eval_metric='logloss')

# Train models
rf_model.fit(X_train_scaled, Y_train)
xgb_model.fit(X_train_scaled, Y_train)

# save
joblib.dump(rf_model, "rf_model.joblib")
joblib.dump(xgb_model, "xgb_model.joblib")
joblib.dump(scaler, "scaler.joblib")
# Predictions and Evaluation
rf_predictions = rf_model.predict(X_test_scaled)
xgb_predictions = xgb_model.predict(X_test_scaled)

# Evaluation
print("Random Forest Model Performance:")
print(classification_report(Y_test, rf_predictions))
print(f"Accuracy: {accuracy_score(Y_test, rf_predictions):.2f}\n")

print("XGBoost Model Performance:")
print(classification_report(Y_test, xgb_predictions))
print(f"Accuracy: {accuracy_score(Y_test, xgb_predictions):.2f}")

Random Forest Model Performance:
              precision    recall  f1-score   support

           0       0.17      0.21      0.19        39
           1       0.04      0.07      0.05        29
           2       0.36      0.17      0.23        30
           3       0.13      0.23      0.16        22
           4       0.08      0.09      0.08        23
           5       0.18      0.19      0.19        31
           6       0.20      0.17      0.18        35
           7       0.18      0.14      0.16        36
           8       0.15      0.13      0.14        30
           9       0.00      0.00      0.00        24

    accuracy                           0.14       299
   macro avg       0.15      0.14      0.14       299
weighted avg       0.16      0.14      0.14       299

Accuracy: 0.14

XGBoost Model Performance:
              precision    recall  f1-score   support

           0       0.20      0.18      0.19        39
           1       0.05      0.07      0.05        29
  

# 3. Loading model

In [3]:
loaded_rf_model = joblib.load("rf_model.joblib")
loaded_xg_model = joblib.load("xgb_model.joblib")
loaded_scaler = joblib.load("scaler.joblib")