# Predictive Maintenance for Industrial Machines

## Data Exploration

In [None]:
!gdown --id 1k-ctbRBuMPPuGdghEmKPb55ROWIJ3oCX

import pandas as pd

# Read the downloaded file into a DataFrame
df = pd.read_csv('Dataset.csv')  # Replace with the actual downloaded file name

# Show the first few rows of the DataFrame
df.head()


Downloading...
From: https://drive.google.com/uc?id=1k-ctbRBuMPPuGdghEmKPb55ROWIJ3oCX
To: /content/Dataset.csv
100% 518k/518k [00:00<00:00, 85.8MB/s]


Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0


In [None]:
# Define the numerical columns that you want to normalize
numerical_cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

# Initialize the MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform these selected columns
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])


## Data Preprocessing

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the selected columns
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

## Feature Engineering

In [None]:
lag_features = ['Air temperature [K]', 'Torque [Nm]']

# Create lag of 1 and 2 time steps
for feature in lag_features:
    for lag in [1, 2]:
        df[f'{feature}_lag{lag}'] = df[feature].shift(lag)

df.dropna(inplace=True)

## Model Development

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Define feature and target variables
X = df.drop(['UDI', 'Product ID', 'Type', 'Machine failure', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF'], axis=1)
y = df['Machine failure']

# Time-based split: Using 70% of the data for training and 30% for validation
train_size = int(0.7 * len(df))
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]

# Initialize the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Fit the model to the training data
rf_model.fit(X_train, y_train)


## Model Evaluation

In [None]:
# Generate predictions on the validation set
y_val_pred_rf = rf_model.predict(X_val)

# Evaluate the Random Forest model performance
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_rf = accuracy_score(y_val, y_val_pred_rf)
precision_rf = precision_score(y_val, y_val_pred_rf)
recall_rf = recall_score(y_val, y_val_pred_rf)
f1_rf = f1_score(y_val, y_val_pred_rf)

accuracy_rf, precision_rf, recall_rf, f1_rf


(0.9876666666666667,
 0.8529411764705882,
 0.47540983606557374,
 0.6105263157894736)

## Hyperparameter Tuning

In [None]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Define the hyperparameter grid for Random Forest
param_dist = {
    'n_estimators': np.arange(50, 151, 50),
    'max_depth': np.arange(10, 31, 10),
    'min_samples_split': [2, 5, 10]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist,
                                   n_iter=10, cv=3, n_jobs=-1, verbose=2, scoring='f1', random_state=42)

# Fit the random search model to the training data
random_search.fit(X_train, y_train)


Fitting 3 folds for each of 10 candidates, totalling 30 fits


## Optimized Model

In [None]:
# Get the best parameters from RandomizedSearchCV
best_params = random_search.best_params_

# Initialize the optimized Random Forest model
optimized_rf_model = RandomForestClassifier(n_estimators=best_params['n_estimators'],
                                            max_depth=best_params['max_depth'],
                                            min_samples_split=best_params['min_samples_split'],
                                            random_state=42)

# Fit the optimized model to the training data
optimized_rf_model.fit(X_train, y_train)


# Evaluate Optimized Model


In [None]:
# Make predictions using the optimized model
y_val_pred_optimized = optimized_rf_model.predict(X_val)

# Evaluate the optimized model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy_optimized = accuracy_score(y_val, y_val_pred_optimized)
precision_optimized = precision_score(y_val, y_val_pred_optimized)
recall_optimized = recall_score(y_val, y_val_pred_optimized)
f1_optimized = f1_score(y_val, y_val_pred_optimized)

accuracy_optimized, precision_optimized, recall_optimized, f1_optimized


(0.9896666666666667,
 0.9411764705882353,
 0.5245901639344263,
 0.6736842105263159)

#Interpret the Results

In [None]:
# Get feature importances
feature_importances = optimized_rf_model.feature_importances_

# Map importances to feature names
feature_map = dict(zip(X.columns, feature_importances))

# Print or visualize the feature importances
feature_map


{'Air temperature [K]': 0.05260780819529809,
 'Process temperature [K]': 0.12528465439511144,
 'Rotational speed [rpm]': 0.22220859583676655,
 'Torque [Nm]': 0.29294513658069554,
 'Tool wear [min]': 0.11446554805192567,
 'Air temperature [K]_lag1': 0.06069476557098729,
 'Air temperature [K]_lag2': 0.054300723263871156,
 'Torque [Nm]_lag1': 0.04288009425672479,
 'Torque [Nm]_lag2': 0.034612673848619525}