In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns

In [None]:
data = pd.read_csv("System_dataset.csv")
data.head()

In [None]:
X = data.drop("Label", axis=1)
y = data["Label"]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y,test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=3000, max_depth=10, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
ac = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {ac*100:.2f}%\n")

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
feat_importance = pd.Series(model.feature_importances_, index=X.columns)
feat_importance.nlargest(10).plot(kind='barh', color='teal')
plt.title("Top 10 Important Features")
plt.show()

In [None]:
print(data['Label'].value_counts())

In [None]:
print("Healthy (Label 0) Averages:\n", data[data['Label']==0].mean())
print("\nAbnormal (Label 1) Averages:\n", data[data['Label']==1].mean())

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X_scaled, y, cv=5)
print(scores)
print("Average CV accuracy:", scores.mean())

In [None]:
import joblib
joblib.dump(model, "hardware_health_model.pkl")
joblib.dump(scaler, "scaler.pkl")