<a href="https://colab.research.google.com/github/Shreya-singh01/HealthGuard/blob/main/Disease_Detection_and_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas numpy scikit-learn imblearn tensorflow
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('DiseasePredictionDataset_500.csv')

def preprocess_data(df):
    df[['Systolic_BP', 'Diastolic_BP']] = df['BloodPressure'].str.split('/', expand=True)
    df['Systolic_BP'] = pd.to_numeric(df['Systolic_BP'])
    df['Diastolic_BP'] = pd.to_numeric(df['Diastolic_BP'])

    df['Pulse_Pressure'] = df['Systolic_BP'] - df['Diastolic_BP']
    df['BP_Category'] = pd.cut(df['Systolic_BP'],
                              bins=[0, 120, 130, 140, 180, 300],
                              labels=[0, 1, 2, 3, 4]).astype(int)

    le_gender = LabelEncoder()
    df['Gender'] = le_gender.fit_transform(df['Gender'])

    all_symptoms = set()
    df['Symptoms'] = df['Symptoms'].fillna('None')
    for symptoms in df['Symptoms']:
        if symptoms != 'None':
            symptom_list = [s.strip() for s in symptoms.split(',')]
            all_symptoms.update(symptom_list)

    for symptom in all_symptoms:
        df[f'Symptom_{symptom}'] = df['Symptoms'].apply(lambda x: 1 if symptom in x else 0)

    df = df.drop(['BloodPressure', 'Symptoms'], axis=1)

    imputer = SimpleImputer(strategy='mean')
    numeric_columns = ['Age', 'HeartRate', 'RespirationRate', 'Temperature',
                      'Systolic_BP', 'Diastolic_BP', 'Pulse_Pressure']
    df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

    return df, le_gender, all_symptoms

processed_df, le_gender, all_symptoms = preprocess_data(data)
le_disease = LabelEncoder()
y = le_disease.fit_transform(processed_df['Disease'])
X = processed_df.drop('Disease', axis=1)

print("Class Distribution:")
print(pd.Series(le_disease.inverse_transform(y)).value_counts())

smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

scaler = StandardScaler()
numeric_columns = ['Age', 'HeartRate', 'RespirationRate', 'Temperature',
                  'Systolic_BP', 'Diastolic_BP', 'Pulse_Pressure']
X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
importances = pd.DataFrame({'Feature': X.columns, 'Importance': rf.feature_importances_})
print("\nFeature Importances (Top 10):")
print(importances.sort_values('Importance', ascending=False).head(10))

y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
n_classes = len(le_disease.classes_)

model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(n_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train_cat, epochs=100, batch_size=32,
                   validation_split=0.2, verbose=1)

test_loss, test_accuracy = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"\nNeural Network Test Accuracy: {test_accuracy:.2f}")

rf_cv_scores = cross_val_score(rf, X_balanced, y_balanced, cv=5)
print(f"\nRandom Forest CV Scores: {rf_cv_scores}")
print(f"Random Forest Average CV Score: {rf_cv_scores.mean():.2f} (+/- {rf_cv_scores.std() * 2:.2f})")

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, target_names=le_disease.classes_))
def predict_disease(age, gender, heart_rate, blood_pressure, respiration_rate,
                   temperature, symptoms_list=None):
    input_data = pd.DataFrame(columns=X.columns)
    input_data.loc[0] = 0

    input_data['Age'] = age
    input_data['Gender'] = le_gender.transform([gender])[0]
    input_data['HeartRate'] = heart_rate
    systolic, diastolic = map(int, blood_pressure.split('/'))
    input_data['Systolic_BP'] = systolic
    input_data['Diastolic_BP'] = diastolic
    input_data['Pulse_Pressure'] = systolic - diastolic
    input_data['BP_Category'] = pd.cut([systolic],
                                     bins=[0, 120, 130, 140, 180, 300],
                                     labels=[0, 1, 2, 3, 4]).astype(int)[0]
    input_data['RespirationRate'] = respiration_rate
    input_data['Temperature'] = temperature

    if symptoms_list:
        for symptom in symptoms_list:
            if f'Symptom_{symptom}' in input_data.columns:
                input_data[f'Symptom_{symptom}'] = 1


    input_data[numeric_columns] = scaler.transform(input_data[numeric_columns])

    pred_proba = model.predict(input_data, verbose=0)
    prediction = np.argmax(pred_proba, axis=1)
    confidence = np.max(pred_proba) * 100

    return le_disease.inverse_transform(prediction)[0], confidence

sample_input = {
    'age': 45,
    'gender': 'Female',
    'heart_rate': 85,
    'blood_pressure': '130/85',
    'respiration_rate': 18,
    'temperature': 98.6,
    'symptoms': ['Headache', 'Fatigue']
}

disease, confidence = predict_disease(
    sample_input['age'], sample_input['gender'], sample_input['heart_rate'],
    sample_input['blood_pressure'], sample_input['respiration_rate'],
    sample_input['temperature'], sample_input['symptoms']
)

print(f"\nPredicted Disease: {disease}")
print(f"Confidence: {confidence:.2f}%")

Class Distribution:
Typhoid          48
Anemia           44
Asthma           43
Influenza        36
Diabetes         35
COVID-19         34
Heart Disease    34
Bronchitis       33
Malaria          33
Hypertension     31
Tuberculosis     30
Pneumonia        27
Dengue           25
Stroke           24
Migraine         23
Name: count, dtype: int64

Feature Importances (Top 10):
                         Feature  Importance
31          Symptom_Night Sweats    0.052172
26            Symptom_Joint Pain    0.045533
16  Symptom_Sensitivity to Light    0.035521
22             Symptom_Pale Skin    0.033649
38              Symptom_Sweating    0.032684
2                      HeartRate    0.032522
23       Symptom_Chest Tightness    0.031518
17                 Symptom_Fever    0.031318
25            Symptom_Chest Pain    0.030350
24         Symptom_Loss of Smell    0.030349
Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.0462 - loss: 2.7528 - val_ac




Random Forest CV Scores: [0.93055556 0.90972222 0.95833333 0.92361111 0.91666667]
Random Forest Average CV Score: 0.93 (+/- 0.03)
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

Classification Report:
               precision    recall  f1-score   support

       Anemia       0.93      0.87      0.90        15
       Asthma       1.00      0.86      0.92         7
   Bronchitis       0.85      0.85      0.85        13
     COVID-19       0.89      0.80      0.84        10
       Dengue       1.00      1.00      1.00         5
     Diabetes       1.00      1.00      1.00         8
Heart Disease       0.82      1.00      0.90         9
 Hypertension       0.90      1.00      0.95         9
    Influenza       0.71      1.00      0.83         5
      Malaria       1.00      1.00      1.00         8
     Migraine       0.89      0.80      0.84        10
    Pneumonia       0.77      0.83      0.80        12
       Stroke       1.00      1.00      1.00         8
 T

In [None]:
import pickle
from google.colab import files

model.save('disease_prediction_model.h5')
files.download('disease_prediction_model.h5')

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
files.download('scaler.pkl')

with open('le_gender.pkl', 'wb') as f:
    pickle.dump(le_gender, f)
files.download('le_gender.pkl')

with open('le_disease.pkl', 'wb') as f:
    pickle.dump(le_disease, f)
files.download('le_disease.pkl')

with open('all_symptoms.pkl', 'wb') as f:
    pickle.dump(all_symptoms, f)
files.download('all_symptoms.pkl')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files

model.save('disease_prediction_model.h5')
files.download('disease_prediction_model.h5')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install scikeras tensorflow scikit-learn --upgrade
import scikeras
import tensorflow as tf
import sklearn

print(f"scikeras version: {scikeras.__version__}")
print(f"TensorFlow version: {tf.__version__}")
print(f"Scikit-learn version: {sklearn.__version__}")
!pip install tensorflow scikit-learn imbalanced-learn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('DiseasePredictionDataset_500_with_recommendations.csv')

def preprocess_data(df):
    df[['Systolic_BP', 'Diastolic_BP']] = df['BloodPressure'].str.split('/', expand=True)
    df['Systolic_BP'] = pd.to_numeric(df['Systolic_BP'])
    df['Diastolic_BP'] = pd.to_numeric(df['Diastolic_BP'])

    df['Pulse_Pressure'] = df['Systolic_BP'] - df['Diastolic_BP']
    df['BP_Category'] = pd.cut(df['Systolic_BP'],
                              bins=[0, 120, 130, 140, 180, 300],
                              labels=[0, 1, 2, 3, 4]).astype(int)

    le_gender = LabelEncoder()
    df['Gender'] = le_gender.fit_transform(df['Gender'])

    all_symptoms = set()
    df['Symptoms'] = df['Symptoms'].fillna('None')
    for symptoms in df['Symptoms']:
        if symptoms != 'None':
            symptom_list = [s.strip() for s in symptoms.split(',')]
            all_symptoms.update(symptom_list)

    for symptom in all_symptoms:
        df[f'Symptom_{symptom}'] = df['Symptoms'].apply(lambda x: 1 if symptom in x else 0)

    df = df.drop(['BloodPressure', 'Symptoms', 'Recommendations'], axis=1)

    imputer = SimpleImputer(strategy='mean')
    numeric_columns = ['Age', 'HeartRate', 'RespirationRate', 'Temperature',
                      'Systolic_BP', 'Diastolic_BP', 'Pulse_Pressure']
    df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

    return df, le_gender, all_symptoms

processed_df, le_gender, all_symptoms = preprocess_data(data)
le_disease = LabelEncoder()
y = le_disease.fit_transform(processed_df['Disease'])
X = processed_df.drop('Disease', axis=1)

print("Class Distribution:")
print(pd.Series(le_disease.inverse_transform(y)).value_counts())

smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

scaler = StandardScaler()
numeric_columns = ['Age', 'HeartRate', 'RespirationRate', 'Temperature',
                  'Systolic_BP', 'Diastolic_BP', 'Pulse_Pressure']
X_balanced[numeric_columns] = scaler.fit_transform(X_balanced[numeric_columns])

y_balanced_cat = to_categorical(y_balanced)
n_classes = len(le_disease.classes_)
n_features = X_balanced.shape[1]

def create_model():
    model = Sequential([
        Dense(512, activation='relu', input_shape=(n_features,)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Custom cross-validation
def custom_cross_val(X, y, n_folds=5, epochs=150, batch_size=16):
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
    cv_scores = []

    for train_idx, val_idx in kf.split(X):
        X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

        y_train_fold_cat = to_categorical(y_train_fold)
        y_val_fold_cat = to_categorical(y_val_fold)

        model = create_model()
        model.fit(X_train_fold, y_train_fold_cat, epochs=epochs, batch_size=batch_size,
                 verbose=0)

        val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold_cat, verbose=0)
        cv_scores.append(val_accuracy)

    return np.array(cv_scores)

cv_scores = custom_cross_val(X_balanced, y_balanced)
print(f"Cross-validation scores: {cv_scores}")
print(f"Average CV score: {cv_scores.mean():.2f} (+/- {cv_scores.std() * 2:.2f})")

X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

final_model = create_model()
final_model.fit(X_train, y_train_cat, epochs=150, batch_size=16, verbose=1)

test_loss, test_accuracy = final_model.evaluate(X_test, y_test_cat, verbose=0)
print(f"\nTest Accuracy: {test_accuracy:.2f}")

y_pred = final_model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, target_names=le_disease.classes_))

final_model.save('improved_disease_prediction_model.h5')
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
with open('le_gender.pkl', 'wb') as f:
    pickle.dump(le_gender, f)
with open('le_disease.pkl', 'wb') as f:
    pickle.dump(le_disease, f)
with open('all_symptoms.pkl', 'wb') as f:
    pickle.dump(all_symptoms, f)

scikeras version: 0.13.0
TensorFlow version: 2.18.0
Scikit-learn version: 1.6.1
Class Distribution:
Typhoid          48
Anemia           44
Asthma           43
Influenza        36
Diabetes         35
COVID-19         34
Heart Disease    34
Bronchitis       33
Malaria          33
Hypertension     31
Tuberculosis     30
Pneumonia        27
Dengue           25
Stroke           24
Migraine         23
Name: count, dtype: int64
Cross-validation scores: [0.90972221 0.88194442 0.91666669 0.9236111  0.9236111 ]
Average CV score: 0.91 (+/- 0.03)
Epoch 1/150
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.0906 - loss: 3.2598
Epoch 2/150
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.2366 - loss: 2.4165
Epoch 3/150
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3998 - loss: 1.9186
Epoch 4/150
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.




Classification Report:
               precision    recall  f1-score   support

       Anemia       0.93      0.87      0.90        15
       Asthma       0.86      0.86      0.86         7
   Bronchitis       0.90      0.69      0.78        13
     COVID-19       1.00      0.80      0.89        10
       Dengue       1.00      1.00      1.00         5
     Diabetes       1.00      1.00      1.00         8
Heart Disease       0.88      0.78      0.82         9
 Hypertension       1.00      1.00      1.00         9
    Influenza       0.71      1.00      0.83         5
      Malaria       1.00      1.00      1.00         8
     Migraine       0.83      1.00      0.91        10
    Pneumonia       0.71      1.00      0.83        12
       Stroke       1.00      1.00      1.00         8
 Tuberculosis       1.00      0.83      0.91        12
      Typhoid       1.00      1.00      1.00        13

     accuracy                           0.91       144
    macro avg       0.92      0.92     

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import pickle

model = load_model('improved_disease_prediction_model.h5')
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
with open('le_gender.pkl', 'rb') as f:
    le_gender = pickle.load(f)
with open('le_disease.pkl', 'rb') as f:
    le_disease = pickle.load(f)
with open('all_symptoms.pkl', 'rb') as f:
    all_symptoms = pickle.load(f)

data = pd.read_csv('DiseasePredictionDataset_500_with_recommendations.csv')

disease_recommendations = data.groupby('Disease')['Recommendations'].first().to_dict()

custom_recommendations = {
    'Diabetes': "Monitor blood sugar, follow a low-carb diet, exercise daily (e.g., 30 minutes of walking), and increase water intake.",
    'Hypertension': "Reduce salt intake, exercise regularly (e.g., yoga or light cardio), manage stress, and maintain a balanced diet.",
    'Asthma': "Avoid triggers, use prescribed inhalers, practice breathing exercises (e.g., diaphragmatic breathing), and ensure good air quality.",
    'Anemia': "Eat iron-rich foods like spinach and red meat, take iron supplements, and correct sleep cycle for better recovery.",
    'Heart Disease': "Maintain a balanced diet, avoid smoking, engage in light exercise (e.g., walking), and monitor stress levels.",
    'Migraine': "Reduce screen time, stay hydrated, avoid loud noises, and practice relaxation techniques (e.g., meditation).",
    'COVID-19': "Isolate, stay hydrated, consult a doctor if symptoms worsen, and rest adequately.",
    'Influenza': "Consult a healthcare professional for further guidance, rest, increase water intake, and maintain a balanced diet.",
    'Bronchitis': "Consult a healthcare professional for further guidance, rest, stay hydrated, and avoid irritants.",
    'Pneumonia': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet.",
    'Tuberculosis': "Consult a healthcare professional for further guidance, rest, and follow prescribed treatment.",
    'Malaria': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet.",
    'Dengue': "Consult a healthcare professional for further guidance, rest, stay hydrated, and avoid strenuous activity.",
    'Stroke': "Consult a healthcare professional for further guidance, rest, and follow medical advice for recovery.",
    'Typhoid': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet."
}

healthy_affirmations = [
    "You're doing great! Keep up your healthy lifestyle, stay hydrated, and maintain a regular sleep cycle.",
    "Looking good! Stay active, eat well, and ensure you get enough rest.",
    "Fantastic! Your health is on the right track—continue with good habits like exercise and proper hydration."
]

def preprocess_input(age, gender, heart_rate, blood_pressure, respiration_rate,
                    temperature, symptoms_list=None):
    all_columns = ['Age', 'Gender', 'HeartRate', 'Systolic_BP', 'Diastolic_BP',
                   'Pulse_Pressure', 'BP_Category', 'RespirationRate', 'Temperature']
    for symptom in all_symptoms:
        all_columns.append(f'Symptom_{symptom}')

    input_data = pd.DataFrame(columns=all_columns)
    input_data.loc[0] = 0

    input_data['Age'] = age
    input_data['Gender'] = le_gender.transform([gender])[0]
    input_data['HeartRate'] = heart_rate
    systolic, diastolic = map(int, blood_pressure.split('/'))
    input_data['Systolic_BP'] = systolic
    input_data['Diastolic_BP'] = diastolic
    input_data['Pulse_Pressure'] = systolic - diastolic
    input_data['BP_Category'] = pd.cut([systolic],
                                     bins=[0, 120, 130, 140, 180, 300],
                                     labels=[0, 1, 2, 3, 4]).astype(int)[0]
    input_data['RespirationRate'] = respiration_rate
    input_data['Temperature'] = temperature

    if symptoms_list:
        for symptom in symptoms_list:
            symptom_col = f'Symptom_{symptom}'
            if symptom_col in input_data.columns:
                input_data[symptom_col] = 1

    numeric_columns = ['Age', 'HeartRate', 'RespirationRate', 'Temperature',
                      'Systolic_BP', 'Diastolic_BP', 'Pulse_Pressure']
    input_data[numeric_columns] = scaler.transform(input_data[numeric_columns])

    return input_data

def recommend(age, gender, heart_rate, blood_pressure, respiration_rate,
              temperature, symptoms_list=None, confidence_threshold=0.7):
    input_data = preprocess_input(age, gender, heart_rate, blood_pressure,
                                 respiration_rate, temperature, symptoms_list)

    pred_proba = model.predict(input_data, verbose=0)
    prediction = np.argmax(pred_proba, axis=1)
    confidence = np.max(pred_proba)

    predicted_disease = le_disease.inverse_transform(prediction)[0]

    if confidence >= confidence_threshold:
        recommendation = custom_recommendations.get(predicted_disease,
                                                  disease_recommendations.get(predicted_disease,
                                                  "Consult a healthcare professional for further guidance."))
        return predicted_disease, recommendation, confidence * 100
    else:
        affirmation = np.random.choice(healthy_affirmations)
        return "No significant disease detected", affirmation, confidence * 100

sample_input = {
    'age': 45,
    'gender': 'Female',
    'heart_rate': 85,
    'blood_pressure': '130/85',
    'respiration_rate': 18,
    'temperature': 98.6,
    'symptoms': ['Headache', 'Fatigue']
}

disease, recommendation, confidence = recommend(
    sample_input['age'], sample_input['gender'], sample_input['heart_rate'],
    sample_input['blood_pressure'], sample_input['respiration_rate'],
    sample_input['temperature'], sample_input['symptoms']
)

print(f"Predicted Disease: {disease}")
print(f"Recommendation: {recommendation}")
print(f"Confidence: {confidence:.2f}%")

healthy_input = {
    'age': 30,
    'gender': 'Male',
    'heart_rate': 70,
    'blood_pressure': '120/80',
    'respiration_rate': 16,
    'temperature': 98.6,
    'symptoms': []
}

disease, recommendation, confidence = recommend(
    healthy_input['age'], healthy_input['gender'], healthy_input['heart_rate'],
    healthy_input['blood_pressure'], healthy_input['respiration_rate'],
    healthy_input['temperature'], healthy_input['symptoms']
)

print(f"\nPredicted Disease: {disease}")
print(f"Recommendation: {recommendation}")
print(f"Confidence: {confidence:.2f}%")



Predicted Disease: No significant disease detected
Recommendation: Looking good! Stay active, eat well, and ensure you get enough rest.
Confidence: 44.39%

Predicted Disease: Influenza
Recommendation: Consult a healthcare professional for further guidance, rest, increase water intake, and maintain a balanced diet.
Confidence: 97.31%


In [None]:
import joblib
from google.colab import files
from tensorflow.keras.models import load_model

model = load_model('improved_disease_prediction_model.h5')
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
with open('le_gender.pkl', 'rb') as f:
    le_gender = pickle.load(f)
with open('le_disease.pkl', 'rb') as f:
    le_disease = pickle.load(f)
with open('all_symptoms.pkl', 'rb') as f:
    all_symptoms = pickle.load(f)

import pandas as pd
data = pd.read_csv('DiseasePredictionDataset_500_with_recommendations.csv')
disease_recommendations = data.groupby('Disease')['Recommendations'].first().to_dict()

custom_recommendations = {
    'Diabetes': "Monitor blood sugar, follow a low-carb diet, exercise daily (e.g., 30 minutes of walking), and increase water intake.",
    'Hypertension': "Reduce salt intake, exercise regularly (e.g., yoga or light cardio), manage stress, and maintain a balanced diet.",
    'Asthma': "Avoid triggers, use prescribed inhalers, practice breathing exercises (e.g., diaphragmatic breathing), and ensure good air quality.",
    'Anemia': "Eat iron-rich foods like spinach and red meat, take iron supplements, and correct sleep cycle for better recovery.",
    'Heart Disease': "Maintain a balanced diet, avoid smoking, engage in light exercise (e.g., walking), and monitor stress levels.",
    'Migraine': "Reduce screen time, stay hydrated, avoid loud noises, and practice relaxation techniques (e.g., meditation).",
    'COVID-19': "Isolate, stay hydrated, consult a doctor if symptoms worsen, and rest adequately.",
    'Influenza': "Consult a healthcare professional for further guidance, rest, increase water intake, and maintain a balanced diet.",
    'Bronchitis': "Consult a healthcare professional for further guidance, rest, stay hydrated, and avoid irritants.",
    'Pneumonia': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet.",
    'Tuberculosis': "Consult a healthcare professional for further guidance, rest, and follow prescribed treatment.",
    'Malaria': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet.",
    'Dengue': "Consult a healthcare professional for further guidance, rest, stay hydrated, and avoid strenuous activity.",
    'Stroke': "Consult a healthcare professional for further guidance, rest, and follow medical advice for recovery.",
    'Typhoid': "Consult a healthcare professional for further guidance, rest, stay hydrated, and maintain a balanced diet."
}

healthy_affirmations = [
    "You're doing great! Keep up your healthy lifestyle, stay hydrated, and maintain a regular sleep cycle.",
    "Looking good! Stay active, eat well, and ensure you get enough rest.",
    "Fantastic! Your health is on the right track—continue with good habits like exercise and proper hydration."
]

recommendation_model = {
    'model': model,
    'scaler': scaler,
    'le_gender': le_gender,
    'le_disease': le_disease,
    'all_symptoms': all_symptoms,
    'disease_recommendations': disease_recommendations,
    'custom_recommendations': custom_recommendations,
    'healthy_affirmations': healthy_affirmations
}

joblib.dump(recommendation_model, 'recommendation_model.pkl')
files.download('recommendation_model.pkl')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>