In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, ClassificationPreset

In [2]:
reference_data = pd.read_csv("../artifacts/train.csv")
reference_data.head()


Unnamed: 0,Type,Machine failure,Rotational speed [rpm],Torque [Nm],Tool wear [min],Air temperature [c],Process temperature [c],type_of_failure
0,0.0,1,0.118955,0.742842,0.762951,0.659044,0.639374,1
1,0.0,0,0.32596,0.383242,0.525692,0.554348,0.703704,5
2,2.0,0,0.103609,0.699176,0.774704,0.728261,0.604938,5
3,0.0,0,0.159445,0.643389,0.524459,0.45749,0.444252,3
4,1.0,0,0.088475,0.802198,0.561265,0.543478,0.469136,5


In [3]:
current_data = pd.read_csv("../artifacts/test.csv")
current_data.head()

Unnamed: 0,Type,Machine failure,Rotational speed [rpm],Torque [Nm],Tool wear [min],Air temperature [c],Process temperature [c],type_of_failure
0,0.0,0,0.17458,0.640162,0.367495,0.458352,0.550464,3
1,0.0,0,0.186263,0.531593,0.577075,0.195652,0.333333,5
2,0.0,0,0.118743,0.623626,0.770751,0.532609,0.728395,5
3,0.0,1,0.109028,0.647565,0.74657,0.861843,0.721412,0
4,0.0,1,0.123728,0.863708,0.04103,0.275379,0.44048,2


In [4]:
reference_X = reference_data.drop(["Machine failure", "type_of_failure"], axis=1)
reference_Y = reference_data["Machine failure"]

current_X = current_data.drop(["Machine failure", "type_of_failure"], axis=1)
current_Y = current_data["Machine failure"]



# Model Performance

In [5]:
best_params = {
    'max_depth': 15,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 50
}

#Model with the best parameters
model = RandomForestClassifier(
    max_depth=best_params['max_depth'],
    min_samples_leaf=best_params['min_samples_leaf'],
    min_samples_split=best_params['min_samples_split'],
    n_estimators=best_params['n_estimators'],
    random_state=42
)
model.fit(reference_X, reference_Y)

# Step 5: Generate Predictions
reference_predictions = model.predict(reference_X)
current_predictions = model.predict(current_X)

In [6]:
# Combine features, target, and predictions for both datasets
reference_data_combined = reference_X.copy()
reference_data_combined['target'] = reference_Y
reference_data_combined['prediction'] = reference_predictions

current_data_combined = current_X.copy()
current_data_combined['target'] = current_Y
current_data_combined['prediction'] = current_predictions


In [7]:
#Define column mapping
column_mapping = ColumnMapping()
column_mapping.target = 'target'
column_mapping.prediction = 'prediction'
column_mapping.numerical_features = [
    "Type",
    "Rotational speed [rpm]",
    "Torque [Nm]",
    "Tool wear [min]",
    "Air temperature [c]",
    "Process temperature [c]"
]

In [8]:
Modelreport = Report(metrics=[
    ClassificationPreset(),
])

Modelreport.run(reference_data=reference_data_combined, current_data=current_data_combined, column_mapping=column_mapping)

# Save the report
Modelreport.save_html("../templates/machine_failure_classification_report_evidently.html")

# Data Drift

In [9]:
report = Report(metrics=[
    DataDriftPreset(),
])

report.run(reference_data=reference_data_combined, current_data=current_data_combined, column_mapping=column_mapping)

# Save the report
report.save_html("../templates/data_drift_report_evidently.html")