In [None]:
# from google.colab import files
import os
import numpy as np
import pandas as pd
import seaborn as sns
import joblib
import matplotlib
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
os.makedirs('./models/', exist_ok=True)

In [None]:
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)

In [None]:
# uploaded = files.upload()
df = pd.read_csv("predictive_maintenance.csv")
df = df.drop(["UDI", "Product ID"], axis=1)
df = df.replace({'L': 1, 'M': 2, 'H': 3})

print(df.sample(6))

In [None]:
display(df.shape)
display(df.size)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['Type'].value_counts()

In [None]:
df.apply(lambda x: x.nunique())

In [None]:
df['Failure Type'].value_counts()

In [None]:
mapping = {
    1: "No Failure",
    2: "Heat Dissipation Failure",
    3: "Power Failure",
    4: "Overstrain Failure",
    5: "Tool Wear Failure",
    6: "Random Failures"
}

df["Failure Type"] = df["Failure Type"].replace(mapping)

In [None]:
sns.pairplot(df, hue='Target')

In [None]:
colors = ['#E1728F', '#409E7D']
plt.pie(df['Target'].value_counts(),  explode=[0.1, 0.2], labels=['Not failure', 'Failure'],
        autopct='%1.1f%%', wedgeprops={'edgecolor': 'black'}, shadow=True, startangle=25,
        colors=colors)
plt.title('Failure vs not failure')
plt.tight_layout()
plt.show()

In [None]:
numeric_df = df.drop(columns=['Failure Type'])

plt.figure(figsize=(8, 8))
sns.heatmap(numeric_df.corr(), annot=True)
plt.show()

In [None]:
X = df.drop(columns=["Target", "Failure Type"], axis=1)
y = df[["Failure Type"]]

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=69)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(X_train, y_train)
y_pred_lr = logistic_regression_model.predict(X_test)
training_accuracy = round(logistic_regression_model.score(X_train, y_train) * 100, 2)
test_accuracy = round(accuracy_score(y_pred_lr, y_test) * 100, 2)

import joblib
with open('./model/LR.joblib', 'wb') as f:
    joblib.dump(logistic_regression_model,f)

print("Training Accuracy: {}%".format(training_accuracy))
print("Test Accuracy: {}%".format(test_accuracy))

In [None]:
importance = logistic_regression_model.coef_[0]
imp_scores = pd.Series(importance, index=X_train.columns).sort_values(ascending=False)

imp_scores

f, ax = plt.subplots(figsize=(8,5))
ax = sns.barplot(x=imp_scores, y=imp_scores.index)
ax.set_title("Visualize feature scores of the features")
ax.set_yticklabels(imp_scores.index)
ax.set_xlabel("Feature importance score")
ax.set_ylabel("Features")
plt.show()

In [None]:
logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(X_train, y_train)

def predict_logistic_regression(X):
    X = np.array(X).reshape(1, -1)
    return logistic_regression_model.predict(X)

def test_logistic_regression():
    type_of_material = float(input("Enter the type of material used: "))
    air_temperature = float(input("E30nter air temperature in Kelvin: "))
    process_temperature = float(input("Enter process temperature in Kelvin: "))
    rotational_speed = float(input("Enter rotational speed: "))
    torque = float(input("Enter torque: "))
    tool_wear = float(input("Enter tool wear: "))

    result = predict_logistic_regression([type_of_material, air_temperature, process_temperature, rotational_speed, torque, tool_wear])

    print("Predicted machine failure label: ", result)

test_logistic_regression()

In [None]:
from sklearn.ensemble import RandomForestClassifier

random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)
y_pred_rf = random_forest_model.predict(X_test)
training_accuracy_rf = round(random_forest_model.score(X_train, y_train) * 100, 2)
test_accuracy_rf = round(accuracy_score(y_pred_rf, y_test) * 100, 2)

import joblib
with open('./model/RF.joblib', 'wb') as f:
    joblib.dump(random_forest_model,f)

print("Training Accuracy (Random Forest): {}%".format(training_accuracy_rf))
print("Test Accuracy (Random Forest): {}%".format(test_accuracy_rf))

In [None]:
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)

def predict_rando1272m_forest(X):
    X = np.array(X).reshape(1, -1)
    return random_forest_model.predict(X)

def test_random_forest():
    type_of_material = float(input("Enter the type of material used: "))
    air_temperature = float(input("Enter air temperature in Kelvin: "))
    process_temperature = float(input("Enter process temperature in Kelvin: "))
    rotational_speed = float(input("Enter rotational speed: "))
    torque = float(input("Enter torque: "))
    tool_wear = float(input("Enter tool wear: "))

    result = predict_random_forest([type_of_material, air_temperature, process_temperature, rotational_speed, torque, tool_wear])

    print("Predicted machine failure label: ", result)

test_random_forest()

In [None]:
feature_scores = pd.Series(random_forest_model.feature_importances_, index=X_train.columns).sort_values(ascending=False)

feature_scores

In [None]:
f, ax = plt.subplots(figsize=(16, 10))
ax = sns.barplot(x=feature_scores, y=feature_scores.index)
ax.set_title("Visualize feature scores of the features")
ax.set_yticklabels(feature_scores.index)
ax.set_xlabel("Feature importance score")
ax.set_ylabel("Features")
plt.show()