In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
import pickle

# Load data from CSV files
symptoms_df = pd.read_csv('datasets\symtoms_df.csv')
description_df = pd.read_csv('datasets\description.csv')
precautions_df = pd.read_csv('datasets\precautions_df.csv')
medications_df = pd.read_csv('datasets\medications.csv')
workout_df = pd.read_csv('datasets\workout_df.csv')
diets_df = pd.read_csv('datasets\diets.csv')

# Preprocess symptoms data
symptoms_df['Symptoms'] = symptoms_df[['Symptom_1', 'Symptom_2', 'Symptom_3', 'Symptom_4']].values.tolist()
symptoms_df['Symptoms'] = symptoms_df['Symptoms'].apply(lambda x: [symptom.strip() for symptom in x if str(symptom) != 'nan'])

# Transform symptoms using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
symptoms_transformed = mlb.fit_transform(symptoms_df['Symptoms'])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(symptoms_transformed, symptoms_df['Disease'], test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Save the trained model to a file
with open('symptom_disease_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save the symptom-to-disease mapping for later use
with open('symptom_mapping.pkl', 'wb') as f:
    pickle.dump(mlb, f)

# Test the model (Optional)
print("Model accuracy:", model.score(X_test, y_test))

: 