In [43]:
# =====================
# 1. Setup & Imports
# =====================
from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os

In [50]:
drive.mount('/content/drive')

# =====================
# 2. Load Dataset
# =====================
df=pd.read_csv('/content/drive/My Drive/DiseaseAndSymptoms.csv')
df.shape

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


(4920, 18)

In [22]:
# =====================
# 3. Data Cleaning & Preprocessing
# =====================
# Extract all symptom columns
symptom_cols = [col for col in df.columns if col.startswith('Symptom_')]

# Function to clean symptoms per row (remove NaNs, strip whitespace)
def clean_symptoms(row):
    symptoms = []
    for col in symptom_cols:
        val = row[col]
        if isinstance(val, str):
            symptoms.append(val.strip().lower())
    return symptoms

# Apply the cleaning function
df['all_symptoms'] = df.apply(clean_symptoms, axis=1)

# Create a sorted list of all unique symptoms
all_symptoms = set()
df['all_symptoms'].apply(lambda x: all_symptoms.update(x))
all_symptoms = sorted(all_symptoms)


In [26]:
# One-hot encode symptoms into binary columns
for symptom in all_symptoms:
    df[symptom] = df['all_symptoms'].apply(lambda x: int(symptom in x))

# Encode disease labels
le = LabelEncoder()
df['disease_encoded'] = le.fit_transform(df['Disease'])



  df['disease_encoded'] = le.fit_transform(df['Disease'])


In [35]:
# Features and target
df_X = df[all_symptoms]
df_y = df['disease_encoded']

131

In [34]:
# =====================
# 4. Train-Test Split
# =====================

X_train, X_test, y_train, y_test = train_test_split(
    df_X, df_y, test_size=0.2, stratify=df_y, random_state=42
)
X_train.shape, X_test.shape

((3936, 131), (984, 131))

In [40]:
# =====================
# 5. Build TensorFlow Model
# =====================

model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(le.classes_), activation='softmax')
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [41]:
# =====================
# 6. Train the Model
# =====================

history = model.fit(
    X_train,
    y_train,
    epochs=15,
    batch_size=32,
    validation_split=0.1
)

Epoch 1/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.3284 - loss: 3.1838 - val_accuracy: 0.9949 - val_loss: 0.6283
Epoch 2/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9944 - loss: 0.4148 - val_accuracy: 1.0000 - val_loss: 0.0399
Epoch 3/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9994 - loss: 0.0573 - val_accuracy: 1.0000 - val_loss: 0.0131
Epoch 4/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9997 - loss: 0.0241 - val_accuracy: 1.0000 - val_loss: 0.0081
Epoch 5/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0141 - val_accuracy: 1.0000 - val_loss: 0.0039
Epoch 6/15
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0089 - val_accuracy: 1.0000 - val_loss: 0.0027
Epoch 7/15
[1m111/111[0m 

In [42]:

# =====================
# 7. Evaluate the Model
# =====================

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 2.2410e-04 
Test Accuracy: 1.0000


In [60]:
# =====================
# 8. Save the Model for Deployment
# =====================

model_dir = 'symptom2risk'
os.makedirs(model_dir, exist_ok=True)
model.save(model_dir+'/'+model_dir+'model.keras')
print(f"Model saved to {model_dir}/")

Model saved to symptom2risk/


In [61]:
# =====================
# 9. Export Label Mapping (for inference)
# =====================

# Save label encoder classes
label_map_path = os.path.join(model_dir, 'label_map.csv')
pd.Series(le.classes_).to_csv(label_map_path, index_label='Class_ID', header=['Disease'])
print(f"Label map saved to {label_map_path}")

Label map saved to symptom2risk/label_map.csv
