In [19]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

In [20]:
# Load Dataset
disease_df = pd.read_csv('disease_diagnosis.csv')

In [21]:

# Preprocessing

# Split Blood Pressure into Systolic and Diastolic
bp_split = disease_df['Blood_Pressure_mmHg'].str.split('/', expand=True).astype(float)
disease_df['BP_Systolic'] = bp_split[0]
disease_df['BP_Diastolic'] = bp_split[1]

# One-Hot Encode Symptoms
all_symptoms = pd.unique(
    disease_df[['Symptom_1', 'Symptom_2', 'Symptom_3']].values.ravel()
)

for sym in all_symptoms:
    disease_df[f"symptom_{sym}"] = disease_df[['Symptom_1', 'Symptom_2', 'Symptom_3']].isin([sym]).any(axis=1).astype(int)

# Feature and Target Selection
features = ['Heart_Rate_bpm', 'Body_Temperature_C',
            'Oxygen_Saturation_%', 'BP_Systolic', 'BP_Diastolic'] + \
           [f"symptom_{sym}" for sym in all_symptoms]

X = disease_df[features]
# Target Columns
y = disease_df[['Diagnosis', 'Severity']].copy()

# Encode Targets
svm_diagnosis_encoder = LabelEncoder()
svm_severity_encoder = LabelEncoder()

y['Diagnosis'] = svm_diagnosis_encoder.fit_transform(y['Diagnosis'])
y['Severity'] = svm_severity_encoder.fit_transform(y['Severity'])

# Scale Features
scaler_svm = StandardScaler()
X_scaled = scaler_svm.fit_transform(X)

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [22]:
# Combine Diagnosis and Severity into a single label
y_train_combined = y_train['Diagnosis'].astype(str) + '_' + y_train['Severity'].astype(str)

# Initialize SMOTE
smote = SMOTE(random_state=42)

# Fit and resample
X_resampled, y_resampled_combined = smote.fit_resample(X_train, y_train_combined)

# Split combined label back to Diagnosis and Severity
y_resampled = y_resampled_combined.str.split('_', expand=True)
y_resampled.columns = ['Diagnosis', 'Severity']
y_resampled = y_resampled.astype(int)

In [23]:
# Initialize SVM Classifier
svm = SVC(
    kernel='linear', 
    C=1.0,
    gamma='scale',
    class_weight='balanced',
    probability=True,
    random_state=42
)

multi_svm = MultiOutputClassifier(svm)

# Train Model
multi_svm.fit(X_resampled, y_resampled)

In [24]:
from sklearn.metrics import classification_report, confusion_matrix

# Predict on the test set
y_pred = multi_svm.predict(X_test)

# Classification Report for Diagnosis
print("Diagnosis Classification Report:")
print(classification_report(
    y_test['Diagnosis'], 
    y_pred[:, 0], 
    target_names=svm_diagnosis_encoder.classes_
))

# Classification Report for Severity
print("Severity Classification Report:")
print(classification_report(
    y_test['Severity'], 
    y_pred[:, 1], 
    target_names=svm_severity_encoder.classes_
))

# Confusion Matrix for Diagnosis
print("Diagnosis Confusion Matrix:")
print(confusion_matrix(
    y_test['Diagnosis'], 
    y_pred[:, 0]
))

# Confusion Matrix for Severity
print("Severity Confusion Matrix:")
print(confusion_matrix(
    y_test['Severity'], 
    y_pred[:, 1]
))


Diagnosis Classification Report:
              precision    recall  f1-score   support

  Bronchitis       0.86      1.00      0.93        64
  CommonCold       0.98      0.93      0.95       238
         Flu       0.91      0.91      0.91        58
   Pneumonia       0.64      0.64      0.64        11
  ViralFever       0.84      0.93      0.89        29

    accuracy                           0.93       400
   macro avg       0.85      0.88      0.86       400
weighted avg       0.93      0.93      0.93       400

Severity Classification Report:
              precision    recall  f1-score   support

        Mild       0.96      0.93      0.94       267
    Moderate       0.94      0.88      0.91        58
      Severe       0.75      0.87      0.80        75

    accuracy                           0.91       400
   macro avg       0.88      0.89      0.89       400
weighted avg       0.92      0.91      0.91       400

Diagnosis Confusion Matrix:
[[ 64   0   0   0   0]
 [  8 221   3 

In [10]:
import joblib

# Assuming these are your trained scaler and model
joblib.dump(scaler_svm, 'scaler_svm.pkl')
joblib.dump(multi_svm, 'svm_diagnosis_model.pkl')

['svm_diagnosis_model.pkl']

In [18]:
import joblib
import numpy as np
import pandas as pd

# Load saved scaler and model
scaler_svm = joblib.load('scaler_svm.pkl')
multi_svm = joblib.load('svm_diagnosis_model.pkl')

# List of all possible symptoms (adjust this to match your training set)
all_symptoms = [
    'Fatigue',
    'Sore throat',
    'Fever',
    'Cough',
    'Body ache',
    'Shortness of breath',
    'Headache',
    'Runny nose'
]

# Define mappings for diagnosis and severity
diagnosis_labels = {
    0: "Common Cold",
    1: "Bronchitis",
    2: "Viral Fever",
    3: "Flu",
    4: "Pneumonia"
}

severity_labels = {
    0: "Mild",
    1: "Moderate",
    2: "Severe"
}


def create_feature_vector(heart_rate, body_temp, oxy_saturation, bp_systolic, bp_diastolic, symptom1, symptom2, symptom3):
    # Create a binary vector for symptoms
    symptom_vector = [0] * len(all_symptoms)
    for sym in [symptom1, symptom2, symptom3]:
        if sym in all_symptoms:
            index = all_symptoms.index(sym)
            symptom_vector[index] = 1

    # Append other numeric features
    additional_features = [heart_rate, body_temp, oxy_saturation, bp_systolic, bp_diastolic]
    full_features = additional_features + symptom_vector 
    
    # Scale features (assumes the same scaler used during training)
    scaled_features = scaler_svm.transform([full_features])

    return scaled_features

# Example user input

heart_rate = int(input("Enter Heart Rate in mmHg: "))
body_temp = float(input("Enter Body Temperature in Celsius: "))	
oxy_saturation = int(input("Enter Oxygen Saturation Percentage: "))

# Accept blood pressure in "systolic/diastolic" format
bp_input = input("Enter Blood Pressure (Systolic/Diastolic): ")
try:
    bp_systolic, bp_diastolic = map(int, bp_input.strip().split('/'))
except ValueError:
    print("Invalid format! Please enter as Systolic/Diastolic (e.g. 132/91)")
    exit()
symptom1 = input("Enter Symptom A: ")
symptom2 = input("Enter Symptom B: ")
symptom3 = input("Enter Symptom C: ")

# Create feature vector
X_input = create_feature_vector(heart_rate, body_temp, oxy_saturation, bp_systolic, bp_diastolic, symptom1, symptom2, symptom3)

print("Input Vector before prediction: ", X_input)

# Make prediction
prediction = multi_svm.predict(X_input)

# Map numerical prediction to labels
diagnosis = diagnosis_labels.get(prediction[0][0], "Unknown Diagnosis")
severity = severity_labels.get(prediction[0][1], "Unknown Severity")

print(f"\nUser inputs: {symptom1}, {symptom2}, {symptom3}, {heart_rate}, {body_temp}, {oxy_saturation}, {bp_input}\n")

print(f"Predicted Diagnosis: {diagnosis}, {severity}")

Input Vector before prediction:  [[-1.60131214  0.73233658  1.22557274  0.12582099 -0.19307305  1.28277147
  -0.73138185 -0.79204754  1.25988973 -0.76471305 -0.77956208  1.28687315
  -0.77956208]]

User inputs: Fatigue, Cough, Headache, 62, 38.7, 98, 139/86

Predicted Diagnosis: Bronchitis, Mild




In [16]:
diagnosis_classes = ['CommonCold', 'Pneumonia','Flu', 'Bronchitis', 'ViralFever']
severity_classes = ['Mild', 'Moderate', 'Severe']

# Get prediction probabilities
probs = multi_svm.predict_proba(X_input)

# Map to labels
diagnosis_probs = dict(zip(diagnosis_classes, probs[0][0]))
severity_probs = dict(zip(severity_classes, probs[1][0]))

print("Diagnosis Probabilities:")
for label, prob in diagnosis_probs.items():
    print(f"{label}: {prob:.4f}")

print("\nSeverity Probabilities:")
for label, prob in severity_probs.items():
    print(f"{label}: {prob:.4f}")


Diagnosis Probabilities:
CommonCold: 0.9977
Pneumonia: 0.0001
Flu: 0.0000
Bronchitis: 0.0020
ViralFever: 0.0003

Severity Probabilities:
Mild: 0.0000
Moderate: 0.0000
Severe: 1.0000
