In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer

# Load dataset
dataset_path = r"C:\Users\kulai\OneDrive\Desktop\BSDoc\sympton_prediction_model-i\dataset.csv"
df = pd.read_csv(dataset_path)

# Combine symptoms into a single list per row
symptom_columns = [col for col in df.columns if col.startswith("Symptom_")]
df["Symptoms"] = df[symptom_columns].apply(lambda x: [symptom for symptom in x if pd.notna(symptom)], axis=1)

# Data preprocessing
mlb = MultiLabelBinarizer()
symptom_matrix = mlb.fit_transform(df["Symptoms"])
labels = df["Disease"]

# Train the model
X_train, X_test, y_train, y_test = train_test_split(symptom_matrix, labels, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Save model and symptom list
joblib.dump(model, "disease_prediction_model.pkl")
joblib.dump(mlb.classes_.tolist(), "symptom_list.pkl")

def predict_disease(symptoms):
    """
    Predict disease based on user symptoms.
    """
    symptom_vector = np.zeros(len(mlb.classes_))
    for symptom in symptoms:
        if symptom in mlb.classes_:
            symptom_vector[mlb.classes_.tolist().index(symptom)] = 1
    
    prediction = model.predict([symptom_vector])
    probabilities = model.predict_proba([symptom_vector])[0]
    
    disease_probabilities = {model.classes_[i]: probabilities[i] for i in range(len(model.classes_))}
    
    return {
        "predicted_disease": prediction[0],
        "confidence_scores": disease_probabilities
    }

# FastAPI instance
app = FastAPI()

# Request model
class SymptomRequest(BaseModel):
    symptoms: list[str]

@app.post("/predict")
def predict(request: SymptomRequest):
    return predict_disease(request.symptoms)

In [5]:
import pandas as pd

# Load dataset
dataset_path = r"C:\Users\kulai\OneDrive\Desktop\BSDoc\sympton_prediction_model-i\dataset.csv"
df = pd.read_csv(dataset_path)

# Extract all symptom columns
symptom_columns = [col for col in df.columns if col.startswith("Symptom_")]

# Flatten all symptoms into a unique list
all_symptoms = set()
for col in symptom_columns:
    all_symptoms.update(df[col].dropna().unique())

# Print all symptoms
print("List of Symptoms in the Dataset:")
print(sorted(all_symptoms))


List of Symptoms in the Dataset:
[' abdominal_pain', ' abnormal_menstruation', ' acidity', ' acute_liver_failure', ' altered_sensorium', ' anxiety', ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort', ' blister', ' blood_in_sputum', ' bloody_stool', ' blurred_and_distorted_vision', ' breathlessness', ' brittle_nails', ' bruising', ' burning_micturition', ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma', ' congestion', ' constipation', ' continuous_feel_of_urine', ' continuous_sneezing', ' cough', ' cramps', ' dark_urine', ' dehydration', ' depression', ' diarrhoea', ' dischromic _patches', ' distention_of_abdomen', ' dizziness', ' drying_and_tingling_lips', ' enlarged_thyroid', ' excessive_hunger', ' extra_marital_contacts', ' family_history', ' fast_heart_rate', ' fatigue', ' fluid_overload', ' foul_smell_of urine', ' headache', ' high_fever', ' hip_joint_pain', ' history_of_alcohol_consumption', ' increased_appetite', ' indigestion', ' inflammatory_nails', ' 

In [3]:
import pandas as pd

dataset_path = r"C:\Users\kulai\OneDrive\Desktop\BSDoc\sympton_prediction_model-i\dataset.csv"
df = pd.read_csv(dataset_path)
print(df.head())

            Disease   Symptom_1              Symptom_2              Symptom_3  \
0  Fungal infection     itching              skin_rash   nodal_skin_eruptions   
1  Fungal infection   skin_rash   nodal_skin_eruptions    dischromic _patches   
2  Fungal infection     itching   nodal_skin_eruptions    dischromic _patches   
3  Fungal infection     itching              skin_rash    dischromic _patches   
4  Fungal infection     itching              skin_rash   nodal_skin_eruptions   

              Symptom_4 Symptom_5 Symptom_6 Symptom_7 Symptom_8 Symptom_9  \
0   dischromic _patches       NaN       NaN       NaN       NaN       NaN   
1                   NaN       NaN       NaN       NaN       NaN       NaN   
2                   NaN       NaN       NaN       NaN       NaN       NaN   
3                   NaN       NaN       NaN       NaN       NaN       NaN   
4                   NaN       NaN       NaN       NaN       NaN       NaN   

  Symptom_10 Symptom_11 Symptom_12 Symptom_13 Symp

In [3]:
import pandas as pd

# Corrected dataset path
dataset_path = r"C:\Users\kulai\OneDrive\Desktop\BSDoc\sympton_prediction_model-i\symptom_Description.csv"

# Load dataset
df = pd.read_csv(dataset_path)

# Display first 5 rows
print(df.head(10))


                                   Disease  \
0                            Drug Reaction   
1                                  Malaria   
2                                  Allergy   
3                           Hypothyroidism   
4                                Psoriasis   
5                                     GERD   
6                      Chronic cholestasis   
7                              hepatitis A   
8                          Osteoarthristis   
9  (vertigo) Paroymsal  Positional Vertigo   

                                         Description  
0  An adverse drug reaction (ADR) is an injury ca...  
1  An infectious disease caused by protozoan para...  
2  An allergy is an immune system response to a f...  
3  Hypothyroidism, also called underactive thyroi...  
4  Psoriasis is a common skin disorder that forms...  
5  Gastroesophageal reflux disease, or GERD, is a...  
6  Chronic cholestatic diseases, whether occurrin...  
7  Hepatitis A is a highly contagious liver infec... 

In [4]:
import pandas as pd

dataset_path = r"C:\Users\kulai\OneDrive\Desktop\BSDoc\sympton_prediction_model-i\dataset.csv"
df = pd.read_csv(dataset_path)

print(df.head(10))

            Disease   Symptom_1              Symptom_2              Symptom_3  \
0  Fungal infection     itching              skin_rash   nodal_skin_eruptions   
1  Fungal infection   skin_rash   nodal_skin_eruptions    dischromic _patches   
2  Fungal infection     itching   nodal_skin_eruptions    dischromic _patches   
3  Fungal infection     itching              skin_rash    dischromic _patches   
4  Fungal infection     itching              skin_rash   nodal_skin_eruptions   
5  Fungal infection   skin_rash   nodal_skin_eruptions    dischromic _patches   
6  Fungal infection     itching   nodal_skin_eruptions    dischromic _patches   
7  Fungal infection     itching              skin_rash    dischromic _patches   
8  Fungal infection     itching              skin_rash   nodal_skin_eruptions   
9  Fungal infection     itching              skin_rash   nodal_skin_eruptions   

              Symptom_4 Symptom_5 Symptom_6 Symptom_7 Symptom_8 Symptom_9  \
0   dischromic _patches       N