In [None]:
import numpy as np 
import pandas as pd 
dataset = pd.read_csv("predictive_maintenance.csv") 
dataset.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Check for null values
null_values = dataset.isnull().sum()
# no null values obtained 

# Get statistical summary
stats_summary = dataset.describe()

# Visualising the data
def visualize_data(dataset):
    plt.figure(figsize=(12, 9))
    
    plt.subplot(2, 3, 1)
    sns.histplot(dataset['Air temperature [K]'], kde=True)
    plt.title('Air Temperature Distribution')
    
    plt.subplot(2, 3, 2)
    sns.histplot(dataset['Process temperature [K]'], kde=True)
    plt.title('Process Temperature Distribution')
    
    plt.subplot(2, 3, 3)
    sns.histplot(dataset['Rotational speed [rpm]'], kde=True)
    plt.title('Rotational Speed Distribution')
    
    plt.subplot(2, 3, 4)
    sns.histplot(dataset['Torque [Nm]'], kde=True)
    plt.title('Torque Distribution')
    
    plt.subplot(2, 3, 5)
    sns.histplot(dataset['Tool wear [min]'], kde=True)
    plt.title('Tool Wear Distribution')
    
    plt.figure(figsize=(10, 8))
    plt.subplot(1,1,1)
    sns.countplot(x='Failure Type', data=dataset)
    plt.title('Failure Type Distribution')
    
    plt.tight_layout()
    plt.show()

# Executing the functions
null_values, stats_summary, visualize_data(dataset)




In [None]:
dataset.index
#RangeIndex(start=0, stop=60000, step=1)
data_valuecounts = dataset['Target'].value_counts()

print ("Percentage of Target data points equal to 0 are: ",(data_valuecounts[0]/dataset.shape[0])*100,"%")
print ("Percentage of Target data points equal to 1 are: ",(data_valuecounts[1]/dataset.shape[0])*100,"%")
data_valuecounts

from sklearn.model_selection import train_test_split

x = dataset.drop(['UDI', 'Product ID', 'Type', 'Target', 'Failure Type'], axis=1)  # Features
y = dataset['Target']  # Target

# Split the data with stratification
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=0)

# Verify the split
print("Original target distribution:")
print(y.value_counts(normalize=True))

print("\nTraining target distribution:")
print(y_train.value_counts(normalize=True))

print("\nTest target distribution:")
print(y_test.value_counts(normalize=True))

In [None]:
from sklearn.preprocessing import MinMaxScaler
# Normalizing the features using MinMaxScaler
scaler = MinMaxScaler().fit(x_train)
x_train = pd.DataFrame(scaler.transform(x_train), columns=x_train.columns)
x_test = pd.DataFrame(scaler.transform(x_test), columns=x_test.columns)

# Display the first few rows of the normalized training data
x_train.head()

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Decision Tree
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(x_train, y_train)
decision_tree_pred = decision_tree_model.predict(x_test)
decision_tree_pred1 = decision_tree_model.predict(x_train)
decision_tree_accuracy = accuracy_score(y_test, decision_tree_pred) # check accuracy for test set
decision_tree_accuracy1 = accuracy_score(y_train, decision_tree_pred1) # check accuracy for train set
print(f"Decision Tree Accuracy for Test: {decision_tree_accuracy*100:.2f}%")
print(f"Decision Tree Accuracy for Train: {decision_tree_accuracy1*100:.2f}%")

# Random Forest
random_forest_model = RandomForestClassifier(random_state=0)
random_forest_model.fit(x_train, y_train)
random_forest_pred = random_forest_model.predict(x_test)
random_forest_pred1 = random_forest_model.predict(x_train)
random_forest_accuracy = accuracy_score(y_test, random_forest_pred) # check accuracy for test set
random_forest_accuracy1 = accuracy_score(y_train, random_forest_pred1) # check accuracy for train set
print(f"Random Forest Accuracy for Test: {random_forest_accuracy*100:.2f}%")
print(f"Random Forest Accuracy for Train: {random_forest_accuracy1*100:.2f}%")

# K-Nearest Neighbors
knn_model = KNeighborsClassifier()
knn_model.fit(x_train, y_train)
knn_pred = knn_model.predict(x_test)
knn_pred1 = knn_model.predict(x_train)
knn_accuracy = accuracy_score(y_test, knn_pred) # check accuracy for test set
knn_accuracy1 = accuracy_score(y_train, knn_pred1) # check accuracy for train set
print(f"K-Nearest Neighbors Accuracy for Test: {knn_accuracy*100:.2f}%")
print(f"K-Nearest Neighbors Accuracy for Train: {knn_accuracy1*100:.2f}%")