In [None]:
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

df = pd.read_csv('DARWIN_cleaned.csv')  # Load data

# Encode labels: convert 'H' (Healthy) and 'P' (Patient) to 0/1
df['class'] = df['class'].map({'H': 0, 'P': 1})


In [None]:
# Data splitting (80/20) and model training
X = df.drop(columns=['class', 'ID'])
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42, 
                                                    stratify=y)
rf = RandomForestClassifier(n_estimators=100, 
                            random_state=42) 

rf.fit(X_train, y_train)

In [None]:
# Model Evaluation (Classification Report and Confusion Matrix)
y_pred = rf.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

In [None]:
# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Healthy (0)', 'Patient (1)'],
            yticklabels=['Healthy (0)', 'Patient (1)'])

plt.title('Confusion Matrix - Random Forest')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()

In [None]:
# Feature Importance
feature_importances = pd.Series(rf.feature_importances_, index=X.columns)
top_features = feature_importances.sort_values(ascending=False).head(10)

print("Top 10 Important Features:")
print(top_features)

plt.figure(figsize=(12, 4))
sns.barplot(x=top_features.values, y=top_features.index)
plt.title("Top 10 Important Features")
plt.xlabel("Importance Score")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()