# 01 Data Overview Eda

In [None]:
# Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

# Load the dataset
df = pd.read_csv("../data/ai4i2020.csv")


In [None]:
# Plot 1: Machine Failure Distribution
plt.figure(figsize=(6, 4))
sns.countplot(x='Machine failure', data=df, palette='Set2')
plt.title("Machine Failure Distribution")
plt.xlabel("Failure (0 = No, 1 = Yes)")
plt.ylabel("Count")
plt.show()


In [None]:
# Plot 2: Breakdown of individual failure types
failure_cols = ['TWF', 'HDF', 'PWF', 'OSF', 'RNF']
failure_sums = df[failure_cols].sum().sort_values(ascending=False)

plt.figure(figsize=(8, 5))
sns.barplot(x=failure_sums.index, y=failure_sums.values, palette='Set3')
plt.title("Breakdown of Failure Types")
plt.ylabel("Failure Occurrences")
plt.xlabel("Failure Type")
plt.show()


In [None]:
# Plot 3: Torque Distribution by Failure
plt.figure(figsize=(10, 6))
sns.boxplot(x='Machine failure', y='Torque [Nm]', data=df, palette='Set1')
plt.title("Torque Distribution by Failure Status")
plt.xlabel("Failure")
plt.ylabel("Torque [Nm]")
plt.show()


In [None]:
# Plot 4: Correlation Heatmap
plt.figure(figsize=(12, 8))
corr = df.drop(columns=['UDI', 'Product ID']).corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()


In [None]:
# Plot 5: RPM vs Torque Scatter Plot colored by Failure
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Rotational speed [rpm]', y='Torque [Nm]', hue='Machine failure', palette='coolwarm', alpha=0.7)
plt.title("RPM vs Torque Colored by Machine Failure")
plt.xlabel("RPM")
plt.ylabel("Torque [Nm]")
plt.legend(title="Failure")
plt.show()


In [None]:
# Plot 6: Failure Rate by Machine Type
plt.figure(figsize=(7, 5))
type_failure = df.groupby('Type')['Machine failure'].mean().reset_index()
sns.barplot(data=type_failure, x='Type', y='Machine failure', palette='Set2')
plt.title("Failure Rate by Machine Type")
plt.ylabel("Failure Rate")
plt.xlabel("Machine Type")
plt.show()
