<a href="https://colab.research.google.com/github/shruti629/Deep-Health-Analysis-and-Disease-Prediction/blob/main/Medicine_(Vedic_%26_English)_Based_upon_Disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
import pickle
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
rd=pd.read_csv('/content/drive/MyDrive/diseases_medicine.csv')
rd.head()

Unnamed: 0,Disease,Type,English Medicine,Vedic Medicine,Unnamed: 4
0,Diabetes,Chronic,"Metformin, Insulin","Bitter gourd juice, Fenugreek seeds",
1,Hypertension,Chronic,"ACE inhibitors, Beta-blockers","Sarpagandha, Ashwagandha",
2,Asthma,Chronic,"Inhaled corticosteroids, Beta agonists","Tulsi, Licorice root",
3,Arthritis,Chronic,"NSAIDs, DMARDs","Ginger, Turmeric",
4,Depression,Mental,"SSRIs, SNRIs","Brahmi, Ashwagandha",


In [None]:
# Encode categorical variables
le_disease = LabelEncoder()
le_type = LabelEncoder()
le_english_medicine = LabelEncoder()
le_vedic_medicine = LabelEncoder()

rd['Disease'] = le_disease.fit_transform(rd['Disease'])
rd['Type'] = le_type.fit_transform(rd['Type'])
rd['English Medicine'] = le_english_medicine.fit_transform(rd['English Medicine'])
rd['Vedic Medicine'] = le_vedic_medicine.fit_transform(rd['Vedic Medicine'])


In [None]:
# Split the data into features and target
X = rd[['Disease', 'Type']]
y_english_medicine = rd['English Medicine']
y_vedic_medicine = rd['Vedic Medicine']

In [None]:
# Check class distribution
print(y_english_medicine.value_counts())
print(y_vedic_medicine.value_counts())

English Medicine
12    3
7     3
37    2
47    1
43    1
     ..
75    1
61    1
80    1
81    1
93    1
Name: count, Length: 94, dtype: int64
Vedic Medicine
33    6
55    4
7     2
43    2
68    2
     ..
80    1
74    1
70    1
49    1
84    1
Name: count, Length: 85, dtype: int64


In [None]:
# Address class imbalance using RandomOverSampler
ros = RandomOverSampler(random_state=42)
X_resampled_english, y_resampled_english = ros.fit_resample(X, y_english_medicine)
X_resampled_vedic, y_resampled_vedic = ros.fit_resample(X, y_vedic_medicine)

In [None]:
# Split the data into training and testing sets
X_train_eng, X_test_eng, y_train_eng, y_test_eng = train_test_split(X_resampled_english, y_resampled_english, test_size=0.2, random_state=42)
X_train_vedic, X_test_vedic, y_train_vedic, y_test_vedic = train_test_split(X_resampled_vedic, y_resampled_vedic, test_size=0.2, random_state=42)

In [None]:
models = {
    'RandomForest': RandomForestClassifier(),
    'LogisticRegression': LogisticRegression(max_iter=1000)
}

best_model_english = None
best_score_english = 0
best_model_vedic = None
best_score_vedic = 0

In [None]:
for model_name, model in models.items():
    # English medicine prediction
    model.fit(X_train_eng, y_train_eng)
    y_pred_eng = model.predict(X_test_eng)
    f1_eng = f1_score(y_test_eng, y_pred_eng, average='macro', zero_division=1)
    print(f'F1-score for {model_name} (English medicine): {f1_eng}')
    if f1_eng > best_score_english:
        best_score_english = f1_eng
        best_model_english = model

    # Vedic medicine prediction
    model.fit(X_train_vedic, y_train_vedic)
    y_pred_vedic = model.predict(X_test_vedic)
    f1_vedic = f1_score(y_test_vedic, y_pred_vedic, average='macro', zero_division=1)
    print(f'F1-score for {model_name} (Vedic medicine): {f1_vedic}')
    if f1_vedic > best_score_vedic:
        best_score_vedic = f1_vedic
        best_model_vedic = model


F1-score for RandomForest (English medicine): 0.8823529411764706
F1-score for RandomForest (Vedic medicine): 0.9722222222222221
F1-score for LogisticRegression (English medicine): 0.14814814814814814
F1-score for LogisticRegression (Vedic medicine): 0.30414347557204696


In [None]:
# Function to make predictions
def predict_medicine(disease_name, disease_type):
    try:
        disease_name = disease_name.strip()
        disease_type = disease_type.strip()

        if disease_name not in le_disease.classes_ or disease_type not in le_type.classes_:
            raise ValueError(f"Invalid input: {disease_name} or {disease_type} is not in the label encoders' classes")

        disease_encoded = le_disease.transform([disease_name])
        type_encoded = le_type.transform([disease_type])
        features = [[disease_encoded[0], type_encoded[0]]]

        english_medicine_encoded = best_model_english.predict(features)
        vedic_medicine_encoded = best_model_vedic.predict(features)


        english_medicine = le_english_medicine.inverse_transform(english_medicine_encoded)
        vedic_medicine = le_vedic_medicine.inverse_transform(vedic_medicine_encoded)

        return english_medicine[0], vedic_medicine[0]
    except ValueError as e:
        return str(e), str(e)

In [None]:
# Example prediction
example_disease = 'Hypertension'
example_type = 'Chronic	'
predicted_english_medicine, predicted_vedic_medicine = predict_medicine(example_disease, example_type)
print(f'Predicted English medicine for {example_disease} ({example_type}): {predicted_english_medicine}')
print(f'Predicted Vedic medicine for {example_disease} ({example_type}): {predicted_vedic_medicine}')

Predicted English medicine for Hypertension (Chronic	): Levodopa, Dopamine agonists
Predicted Vedic medicine for Hypertension (Chronic	): Sarpagandha, Ashwagandha


In [None]:
# Save the best models using pickle
with open('best_model_english.pkl', 'wb') as f:
    pickle.dump(best_model_english, f)

with open('best_model_vedic.pkl', 'wb') as f:
    pickle.dump(best_model_vedic, f)

with open('le_disease.pkl', 'wb') as f:
    pickle.dump(le_disease, f)

with open('le_type.pkl', 'wb') as f:
    pickle.dump(le_type, f)

with open('le_english_medicine.pkl', 'wb') as f:
    pickle.dump(le_english_medicine, f)

with open('le_vedic_medicine.pkl', 'wb') as f:
    pickle.dump(le_vedic_medicine, f)

In [None]:
# Load the best models using pickle
def load_models():
    with open('best_model_english.pkl', 'rb') as f:
        best_model_english = pickle.load(f)
    with open('best_model_vedic.pkl', 'rb') as f:
        best_model_vedic = pickle.load(f)
    with open('le_disease.pkl', 'rb') as f:
        le_disease = pickle.load(f)
    with open('le_type.pkl', 'rb') as f:
        le_type = pickle.load(f)
    with open('le_english_medicine.pkl', 'rb') as f:
        le_english_medicine = pickle.load(f)
    with open('le_vedic_medicine.pkl', 'rb') as f:
        le_vedic_medicine = pickle.load(f)
    return best_model_english, best_model_vedic, le_disease, le_type, le_english_medicine, le_vedic_medicine

best_model_english, best_model_vedic, le_disease, le_type, le_english_medicine, le_vedic_medicine = load_models()
