In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import pickle
import warnings
warnings.filterwarnings('ignore')

In [36]:
# Load the dataset
df = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\Training.csv")

# Split features and target
X = df.drop("prognosis", axis=1)
y = df["prognosis"]

# Encode the target variable
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)

# Check shapes
print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

Training set shape: (3444, 132) (3444,)
Testing set shape: (1476, 132) (1476,)


In [38]:
# Define models
models = {
    "SVC": SVC(kernel='linear'),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "KNeighbors": KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB": MultinomialNB(),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=1000, random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42, algorithm='SAMME'),
    "XGBoost": XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='mlogloss'),
    "LightGBM": LGBMClassifier(n_estimators=100, random_state=42),
    "CatBoost": CatBoostClassifier(n_estimators=100, random_state=42, verbose=0)
}

# Train and evaluate models
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on test set
    predictions = model.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"{model_name} Accuracy: {accuracy:.4f}")
    
    # Confusion matrix
    cm = confusion_matrix(y_test, predictions)
    print(f"{model_name} Confusion Matrix:\n{cm}\n")
    print("="*50)

SVC Accuracy: 1.0000
SVC Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]

RandomForest Accuracy: 1.0000
RandomForest Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]

GradientBoosting Accuracy: 1.0000
GradientBoosting Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]

KNeighbors Accuracy: 1.0000
KNeighbors Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]

MultinomialNB Accuracy: 1.0000
MultinomialNB Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...

In [39]:
# Train and save SVC model
svc = SVC(kernel='linear')
svc.fit(X_train, y_train)
ypred = svc.predict(X_test)
print("SVC Test Accuracy:", accuracy_score(y_test, ypred))

# Save the model
pickle.dump(svc, open('svc.pkl', 'wb'))

SVC Test Accuracy: 1.0


In [42]:
# Load recommendation datasets
sym_des = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\symtoms_df.csv")
precautions = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\precautions_df.csv")
workout = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\workout_df.csv")
description = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\description.csv")
medications = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\medications.csv")
diets = pd.read_csv("C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\diets.csv")

In [44]:
# Symptoms dictionary
symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}

# Diseases mapping
diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}

# Helper function to fetch recommendations
def helper(dis):
    desc = description[description['Disease'] == dis]['Description']
    desc = " ".join([w for w in desc]) if not desc.empty else "No description available"

    pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [col for col in pre.values.flatten() if pd.notna(col)] if not pre.empty else ["No precautions available"]

    med = medications[medications['Disease'] == dis]['Medication']
    med = [m for m in med.values] if not med.empty else ["No medications available"]

    die = diets[diets['Disease'] == dis]['Diet']
    die = [d for d in die.values] if not die.empty else ["No diet recommendations available"]

    wrkout = workout[workout['disease'] == dis]['workout']
    wrkout = [w for w in wrkout.values] if not wrkout.empty else ["No workout recommendations available"]

    return desc, pre, med, die, wrkout

# Prediction function
def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict))
    for item in patient_symptoms:
        if item in symptoms_dict:
            input_vector[symptoms_dict[item]] = 1
        else:
            print(f"Warning: Symptom '{item}' not recognized.")
    return diseases_list[svc.predict([input_vector])[0]]

In [46]:
# Load the saved model
svc = pickle.load(open('svc.pkl', 'rb'))

# Main function to get user input and provide recommendations
def predict_disease():
    symptoms = input("Enter your symptoms (comma-separated): ")
    user_symptoms = [s.strip() for s in symptoms.split(',')]
    user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
    
    try:
        predicted_disease = get_predicted_value(user_symptoms)
        desc, pre, med, die, wrkout = helper(predicted_disease)

        print("\n================= Predicted Disease ================")
        print(predicted_disease)
        print("================= Description ==================")
        print(desc)
        print("================= Precautions ==================")
        for i, p in enumerate(pre, 1):
            print(f"{i}: {p}")
        print("================= Medications ==================")
        for i, m in enumerate(med, len(pre)+1):
            print(f"{i}: {m}")
        print("================= Diet Recommendations ==========")
        for i, d in enumerate(die, len(pre)+len(med)+1):
            print(f"{i}: {d}")
        print("================= Workout Recommendations =======")
        for i, w in enumerate(wrkout, len(pre)+len(med)+len(die)+1):
            print(f"{i}: {w}")
    except Exception as e:
        print(f"Error: {e}. Please ensure symptoms are valid.")

# Run the prediction
predict_disease()

Enter your symptoms (comma-separated):  itching



Fungal infection
Fungal infection is a common skin condition caused by fungi.
1: bath twice
2: use detol or neem in bathing water
3: keep infected area dry
4: use clean cloths
5: ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']
6: ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']
7: Avoid sugary foods
8: Consume probiotics
9: Increase intake of garlic
10: Include yogurt in diet
11: Limit processed foods
12: Stay hydrated
13: Consume green tea
14: Eat foods rich in zinc
15: Include turmeric in diet
16: Eat fruits and vegetables


In [48]:
# Simulated user profile (for demonstration)
user_profile = {
    "user_id": 1,
    "preferences": ["low_sugar_diet", "light_exercise"],
    "location": "urban",
    "time": "morning"
}

# Context-aware recommendation adjustment
def adjust_recommendations(desc, pre, med, die, wrkout, user_profile):
    adjusted_die = die
    adjusted_wrkout = wrkout
    
    # Adjust diet based on user preferences
    if "low_sugar_diet" in user_profile["preferences"]:
        adjusted_die = [d for d in die if "sugar" not in str(d).lower()] + ["Low-sugar diet recommended"]
    
    # Adjust workout based on preferences and location
    if "light_exercise" in user_profile["preferences"] and user_profile["location"] == "urban":
        adjusted_wrkout = [w for w in wrkout if "intense" not in str(w).lower()] + ["Light walking in park"]
    
    return desc, pre, med, adjusted_die, adjusted_wrkout

# Modified prediction function with context-aware recommendations
def predict_disease_with_context():
    symptoms = input("Enter your symptoms (comma-separated): ")
    user_symptoms = [s.strip() for s in symptoms.split(',')]
    user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
    
    try:
        predicted_disease = get_predicted_value(user_symptoms)
        desc, pre, med, die, wrkout = helper(predicted_disease)
        
        # Adjust recommendations based on user profile
        desc, pre, med, adjusted_die, adjusted_wrkout = adjust_recommendations(desc, pre, med, die, wrkout, user_profile)

        print("\n================= Predicted Disease ================")
        print(predicted_disease)
        print("================= Description ==================")
        print(desc)
        print("================= Precautions ==================")
        for i, p in enumerate(pre, 1):
            print(f"{i}: {p}")
        print("================= Medications ==================")
        for i, m in enumerate(med, len(pre)+1):
            print(f"{i}: {m}")
        print("================= Adjusted Diet Recommendations ==========")
        for i, d in enumerate(adjusted_die, len(pre)+len(med)+1):
            print(f"{i}: {d}")
        print("================= Adjusted Workout Recommendations =======")
        for i, w in enumerate(adjusted_wrkout, len(pre)+len(med)+len(adjusted_die)+1):
            print(f"{i}: {w}")
    except Exception as e:
        print(f"Error: {e}. Please ensure symptoms are valid.")

# Run the context-aware prediction
predict_disease_with_context()

Enter your symptoms (comma-separated):  skin rash



Urinary tract infection
Urinary tract infection is an infection in any part of the urinary system.
1: drink plenty of water
2: increase vitamin c intake
3: drink cranberry juice
4: take probiotics
5: ['Antibiotics', 'Urinary analgesics', 'Phenazopyridine', 'Antispasmodics', 'Probiotics']
6: ['UTI Diet', 'Hydration', 'Cranberry juice', 'Probiotics', 'Vitamin C-rich foods']
7: Low-sugar diet recommended
8: Stay hydrated
9: Consume cranberry products
10: Include vitamin C-rich foods
11: Limit caffeine and alcohol
12: Consume probiotics
13: Avoid spicy and acidic foods
14: Consult a healthcare professional
15: Follow medical recommendations
16: Maintain good hygiene
17: Limit sugary foods and beverages
18: Light walking in park


In [52]:
# Simulated user database
users = {
    "user1": {"password": "pree123", "role": "User", "profile": user_profile},
    "admin1": {"password": "admin123", "role": "Admin", "profile": {}},
    "analyst1": {"password": "analyst123", "role": "Analyst", "profile": {}}
}

# Simulated JWT-based authentication
def authenticate_user(username, password):
    if username in users and users[username]["password"] == password:
        return users[username]["role"], users[username]["profile"]
    return None, None

# Role-based access control
def access_recommendation_system():
    username = input("Enter username: ")
    password = input("Enter password: ")
    
    role, profile = authenticate_user(username, password)
    if role:
        print(f"Authenticated as {role}")
        if role in ["User", "Admin"]:
            predict_disease_with_context()
        elif role == "Analyst":
            print("Analyst access: View analytics dashboard (simulated).")
            # Simulate analytics dashboard
            print("Disease prediction accuracy: 100%")
        else:
            print("Unknown role.")
    else:
        print("Authentication failed.")

# Run the system
access_recommendation_system()

Enter username:  user1
Enter password:  pree123


Authenticated as User


Enter your symptoms (comma-separated):  skin ras



Urinary tract infection
Urinary tract infection is an infection in any part of the urinary system.
1: drink plenty of water
2: increase vitamin c intake
3: drink cranberry juice
4: take probiotics
5: ['Antibiotics', 'Urinary analgesics', 'Phenazopyridine', 'Antispasmodics', 'Probiotics']
6: ['UTI Diet', 'Hydration', 'Cranberry juice', 'Probiotics', 'Vitamin C-rich foods']
7: Low-sugar diet recommended
8: Stay hydrated
9: Consume cranberry products
10: Include vitamin C-rich foods
11: Limit caffeine and alcohol
12: Consume probiotics
13: Avoid spicy and acidic foods
14: Consult a healthcare professional
15: Follow medical recommendations
16: Maintain good hygiene
17: Limit sugary foods and beverages
18: Light walking in park


In [None]:
import pandas as pd
import numpy as np
import os
import pickle
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import warnings
warnings.filterwarnings('ignore')

# Define base directory for saving models and data
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
MODEL_DIR = os.path.join(BASE_DIR, 'models')
DATA_DIR = os.path.join(BASE_DIR, 'Datasets')

# Create directories if they don't exist
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

# Step 1: Load and preprocess dataset
def load_and_preprocess_data():
    try:
        df = pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\Training.csv"))
        X = df.drop('prognosis', axis=1)
        y = df['prognosis']
        
        # Encode target variable
        le = LabelEncoder()
        le.fit(y)
        Y = le.transform(y)
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)
        print(f"Training set shape: {X_train.shape}, {y_train.shape}")
        print(f"Testing set shape: {X_test.shape}, {y_test.shape}")
        
        return X_train, X_test, y_train, y_test, le
    except FileNotFoundError:
        print(f"Error: 'Training.csv' not found in {DATA_DIR}. Please ensure the file exists.")
        return None, None, None, None, None

# Step 2: Train and evaluate models
def train_and_evaluate_models(X_train, X_test, y_train, y_test):
    models = {
        'SVC': SVC(kernel='linear'),
        'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
        'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
        'KNeighbors': KNeighborsClassifier(n_neighbors=5),
        'MultinomialNB': MultinomialNB(),
        'DecisionTree': DecisionTreeClassifier(random_state=42),
        'LogisticRegression': LogisticRegression(max_iter=1000, random_state=42),
        'AdaBoost': AdaBoostClassifier(n_estimators=100, random_state=42, algorithm='SAMME'),
        'XGBoost': XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='mlogloss'),
        'LightGBM': LGBMClassifier(n_estimators=100, random_state=42),
        'CatBoost': CatBoostClassifier(n_estimators=100, random_state=42, verbose=0)
    }
    
    for name, model in models.items():
        try:
            model.fit(X_train, y_train)
            predictions = model.predict(X_test)
            accuracy = accuracy_score(y_test, predictions)
            print(f"{name} Accuracy: {accuracy:.4f}")
            cm = confusion_matrix(y_test, predictions)
            print(f"{name} Confusion Matrix:\n{cm}\n{'='*50}")
            
            # Save SVC model
            if name == 'SVC':
                with open(os.path.join(MODEL_DIR, 'svc.pkl'), 'wb') as f:
                    pickle.dump(model, f)
                print(f"SVC model saved at {os.path.join(MODEL_DIR, 'svc.pkl')}")
        except Exception as e:
            print(f"Error training {name}: {e}")
    
    return models.get('SVC')

# Step 3: Load recommendation datasets
def load_recommendation_data():
    try:
        datasets = {
            'symptoms': pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\symtoms_df.csv")),
            'precautions': pd.read_csv(os.path.join(DATA_DIR,"C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\precautions_df.csv")),
            'workouts': pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\workout_df.csv")),
            'descriptions': pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\description.csv")),
            'medications': pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\medications.csv")),
            'diets': pd.read_csv(os.path.join(DATA_DIR, "C:\\Users\\preet\\OneDrive\\Semester  4\\INTERN\\datasets\\diets.csv"))
        }
        return datasets
    except FileNotFoundError as e:
        print(f"Error: {e}. Please ensure all recommendation datasets are in {DATA_DIR}.")
        return None

# Step 4: Define mappings
symptoms_dict = {
    'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4,
    'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9,
    'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_urination': 13,
    'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18,
    'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23,
    'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29,
    'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34,
    'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39,
    'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44,
    'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48,
    'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52,
    'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57,
    'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61,
    'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67,
    'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71,
    'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75,
    'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79,
    'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83,
    'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87,
    'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of_urine': 90, 'continuous_feel_of_urine': 91,
    'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95,
    'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99,
    'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic_patches': 102, 'watering_from_eyes': 103,
    'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108,
    'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111,
    'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115,
    'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118,
    'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122,
    'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126,
    'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130,
    'yellow_crust_ooze': 131
}

diseases_list = {
    15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction',
    33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes', 17: 'Gastroenteritis', 6: 'Bronchial Asthma',
    23: 'Hypertension', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)',
    28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A',
    19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis',
    36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)',
    18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism',
    25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal Positional Vertigo',
    2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'
}

# Step 5: Save mappings
def save_mappings():
    with open(os.path.join(MODEL_DIR, 'symptoms_dict.pkl'), 'wb') as f:
        pickle.dump(symptoms_dict, f)
    with open(os.path.join(MODEL_DIR, 'diseases_list.pkl'), 'wb') as f:
        pickle.dump(diseases_list, f)
    with open(os.path.join(MODEL_DIR, 'label_encoder.pkl'), 'wb') as f:
        pickle.dump(le, f)
    print(f"Mappings saved at {MODEL_DIR}")

# Step 6: Helper functions
def helper(dis, datasets):
    desc = datasets['descriptions'][datasets['descriptions']['Disease'] == dis]['Description']
    desc = " ".join(desc) if not desc.empty else "No description available"
    
    pre = datasets['precautions'][datasets['precautions']['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [col for col in pre.values.flatten() if pd.notna(col)] if not pre.empty else ["No precautions available"]
    
    med = datasets['medications'][datasets['medications']['Disease'] == dis]['Medication']
    med = [m for m in med.values] if not med.empty else ["No medications available"]
    
    die = datasets['diets'][datasets['diets']['Disease'] == dis]['Diet']
    die = [d for d in die.values] if not die.empty else ["No diet recommendations available"]
    
    wrkout = datasets['workouts'][datasets['workouts']['disease'] == dis]['workout']
    wrkout = [w for w in wrkout.values] if not wrkout.empty else ["No workout recommendations available"]
    
    return desc, pre, med, die, wrkout

def get_predicted_value(patient_symptoms, model):
    input_vector = np.zeros(len(symptoms_dict))
    for item in patient_symptoms:
        if item in symptoms_dict:
            input_vector[symptoms_dict[item]] = 1
        else:
            print(f"Warning: Symptom '{item}' not recognized.")
    return diseases_list[model.predict([input_vector])[0]]

# Step 7: Context-aware recommendations
user_profile = {
    'user_id': 1,
    'preferences': ['low_sugar_diet', 'light_exercise'],
    'location': 'urban',
    'time': 'morning'
}

def adjust_recommendations(desc, pre, med, die, wrkout, profile):
    adjusted_die = die
    adjusted_wrkout = wrkout
    if 'low_sugar_diet' in profile['preferences']:
        adjusted_die = [d for d in die if 'sugar' not in str(d).lower()] + ['Low-sugar diet recommended']
    if 'light_exercise' in profile['preferences'] and profile['location'] == 'urban':
        adjusted_wrkout = [w for w in wrkout if 'intense' not in str(w).lower()] + ['Light walking in park']
    return desc, pre, med, adjusted_die, adjusted_wrkout

# Step 8: User management
users = {
    'user1': {'password': 'pass123', 'role': 'User', 'profile': user_profile},
    'admin1': {'password': 'admin123', 'role': 'Admin', 'profile': {}},
    'analyst1': {'password': 'analyst123', 'role': 'Analyst', 'profile': {}}
}

def authenticate_user(username, password):
    if username in users and users[username]['password'] == password:
        return users[username]['role'], users[username]['profile']
    return None, None

# Step 9: Main recommendation system
def access_recommendation_system():
    try:
        # Load model
        with open(os.path.join(MODEL_DIR, 'svc.pkl'), 'rb') as f:
            model = pickle.load(f)
        
        # Load recommendation datasets
        datasets = load_recommendation_data()
        if datasets is None:
            return
        
        # User authentication
        username = input("Enter username: ")
        password = input("Enter password: ")
        role, profile = authenticate_user(username, password)
        
        if not role:
            print("Authentication failed.")
            return
        
        print(f"Authenticated as {role}")
        if role in ['User', 'Admin']:
            symptoms = input("Enter your symptoms (comma-separated): ")
            user_symptoms = [s.strip() for s in symptoms.split(',')]
            user_symptoms = [s.strip("[]' ") for s in user_symptoms]
            
            try:
                predicted_disease = get_predicted_value(user_symptoms, model)
                desc, pre, med, die, wrkout = helper(predicted_disease, datasets)
                desc, pre, med, adjusted_die, adjusted_wrkout = adjust_recommendations(desc, pre, med, die, wrkout, profile)
                
                print("\n================= Predicted Disease ================")
                print(predicted_disease)
                print("================= Description ==================")
                print(desc)
                print("================= Precautions ==================")
                for i, p in enumerate(pre, 1):
                    print(f"{i}: {p}")
                print("================= Medications ==================")
                for i, m in enumerate(med, len(pre)+1):
                    print(f"{i}: {m}")
                print("================= Adjusted Diet Recommendations ==========")
                for i, d in enumerate(adjusted_die, len(pre)+len(med)+1):
                    print(f"{i}: {d}")
                print("================= Adjusted Workout Recommendations =======")
                for i, w in enumerate(adjusted_wrkout, len(pre)+len(med)+len(adjusted_die)+1):
                    print(f"{i}: {w}")
            except Exception as e:
                print(f"Error: {e}. Please ensure symptoms are valid.")
        elif role == 'Analyst':
            print("Analyst access: View analytics dashboard (simulated).")
            print("Disease prediction accuracy: 100%")
        else:
            print("Unknown role.")
    except FileNotFoundError:
        print(f"Error: 'svc.pkl' not found in {MODEL_DIR}. Please train the model first.")
    except Exception as e:
        print(f"Error: {e}")

# Step 10: Main execution
if __name__ == '__main__':
    # Load and preprocess data
    X_train, X_test, y_train, y_test, le = load_and_preprocess_data()
    if X_train is None:
        exit(1)
    
    # Train models
    svc_model = train_and_evaluate_models(X_train, X_test, y_train, y_test)
    
    # Save mappings
    save_mappings()
    
    # Run recommendation system
    access_recommendation_system()

Training set shape: (3444, 132), (3444,)
Testing set shape: (1476, 132), (1476,)
SVC Accuracy: 1.0000
SVC Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
SVC model saved at C:\Users\preet\models\svc.pkl
RandomForest Accuracy: 1.0000
RandomForest Confusion Matrix:
[[40  0  0 ...  0  0  0]
 [ 0 43  0 ...  0  0  0]
 [ 0  0 28 ...  0  0  0]
 ...
 [ 0  0  0 ... 34  0  0]
 [ 0  0  0 ...  0 41  0]
 [ 0  0  0 ...  0  0 31]]
