In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Load Dataset
disease_df = pd.read_csv('disease_diagnosis.csv')

In [3]:
# === Preprocessing ===

# Extract systolic and diastolic from Blood Pressure
bp_split = disease_df['Blood_Pressure_mmHg'].str.split('/', expand=True).astype(float)
disease_df['BP_Systolic'] = bp_split[0]
disease_df['BP_Diastolic'] = bp_split[1]

# One-Hot Encode Symptoms
all_symptoms = pd.unique(
    disease_df[['Symptom_1', 'Symptom_2', 'Symptom_3']].values.ravel()
)

for sym in all_symptoms:
    disease_df[f"symptom_{sym}"] = disease_df[['Symptom_1', 'Symptom_2', 'Symptom_3']].isin([sym]).any(axis=1).astype(int)

# Features and Targets
features = ['Heart_Rate_bpm', 'Body_Temperature_C', 
            'Oxygen_Saturation_%', 'BP_Systolic', 'BP_Diastolic'] + \
           [f"symptom_{sym}" for sym in all_symptoms]

X = disease_df[features]
y = disease_df[['Diagnosis']]
z = disease_df[['Severity']]

# Label Encoding
diagnosis_encoder = LabelEncoder()
severity_encoder = LabelEncoder()

y['Diagnosis'] = diagnosis_encoder.fit_transform(y['Diagnosis'])
z['Severity'] = severity_encoder.fit_transform(z['Severity'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y['Diagnosis'] = diagnosis_encoder.fit_transform(y['Diagnosis'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  z['Severity'] = severity_encoder.fit_transform(z['Severity'])


In [4]:
# Train-test split for Diagnosis
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Feature Scaling ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === Model for Diagnosis ===
mlp_diag = MLPClassifier(
    hidden_layer_sizes=(200, 100, 50),
    activation='relu',
    solver='adam',
    max_iter=2000,
    early_stopping=True,
    random_state=42
)

mlp_diag.fit(X_train_scaled, y_train["Diagnosis"])
y_pred_diag = mlp_diag.predict(X_test_scaled)

In [5]:
# Train-test split for Severity
X_train, X_test, z_train, z_test = train_test_split(
    X, z, test_size=0.2, random_state=42
)

# --- Feature Scaling ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === Model for Severity ===
mlp_sev = MLPClassifier(
    hidden_layer_sizes=(200, 100, 50),
    activation='relu',
    solver='adam',
    max_iter=2000,
    early_stopping=True,
    random_state=42
)

mlp_sev.fit(X_train_scaled, z_train['Severity'])
z_pred_sev = mlp_sev.predict(X_test_scaled)

In [6]:
#Diagnosis Model Evaluation
print("=== Diagnosis Classification Report ===")
print(classification_report(y_test['Diagnosis'], y_pred_diag))
print("Diagnosis Confusion Matrix:")
print(confusion_matrix(y_test['Diagnosis'], y_pred_diag))

#Severity Model Evaluation
print("\n=== Severity Classification Report ===")
print(classification_report(z_test['Severity'], z_pred_sev))
print("Severity Confusion Matrix:")
print(confusion_matrix(z_test['Severity'], z_pred_sev))

=== Diagnosis Classification Report ===
              precision    recall  f1-score   support

           0       0.98      0.94      0.96        64
           1       0.96      0.97      0.97       238
           2       0.95      0.95      0.95        58
           3       1.00      0.73      0.84        11
           4       0.84      0.90      0.87        29

    accuracy                           0.95       400
   macro avg       0.95      0.90      0.92       400
weighted avg       0.95      0.95      0.95       400

Diagnosis Confusion Matrix:
[[ 60   3   1   0   0]
 [  0 232   1   0   5]
 [  0   3  55   0   0]
 [  1   1   1   8   0]
 [  0   3   0   0  26]]

=== Severity Classification Report ===
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       267
           1       0.84      0.91      0.88        58
           2       0.96      0.87      0.91        75

    accuracy                           0.95       400
   macro avg   