# Test with a model

In [50]:
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [51]:
path = "databases/diseases/dataset.csv"
diseases_df = pd.read_csv(os.path.join("..", path))

# Print the columns of the DataFrame to check for existing column names
print(diseases_df.columns)

# Melt the DataFrame to long format, ensuring the value_name does not conflict with existing columns
diseases_melted = diseases_df.melt(id_vars=['Disease'], value_vars=[f'Symptom_{i}' for i in range(1, 18)], var_name='Symptom_Column', value_name='Symptom_Value')
diseases_melted.dropna(subset=['Symptom_Value'], inplace=True)

# Adding a severity column with the highest severity
diseases_melted['Severity'] = 4  # Assuming the highest severity is 4

# Pivot the table back to wide format with symptoms and severities as features
diseases_pivot = diseases_melted.pivot_table(index='Disease', columns='Symptom_Value', values='Severity', fill_value=0)

# Flatten the column names
diseases_pivot.columns = [f'{symptom}_severity' for symptom in diseases_pivot.columns]
diseases_pivot.reset_index(inplace=True)
diseases_pivot

Index(['Disease', 'Symptom_1', 'Symptom_2', 'Symptom_3', 'Symptom_4',
       'Symptom_5', 'Symptom_6', 'Symptom_7', 'Symptom_8', 'Symptom_9',
       'Symptom_10', 'Symptom_11', 'Symptom_12', 'Symptom_13', 'Symptom_14',
       'Symptom_15', 'Symptom_16', 'Symptom_17'],
      dtype='object')


Unnamed: 0,Disease,abdominal_pain_severity,abnormal_menstruation_severity,acidity_severity,acute_liver_failure_severity,altered_sensorium_severity,anxiety_severity,back_pain_severity,belly_pain_severity,blackheads_severity,...,watering_from_eyes_severity,weakness_in_limbs_severity,weakness_of_one_body_side_severity,weight_gain_severity,weight_loss_severity,yellow_crust_ooze_severity,yellow_urine_severity,yellowing_of_eyes_severity,yellowish_skin_severity,itching_severity
0,(vertigo) Paroymsal Positional Vertigo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,AIDS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Acne,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Alcoholic hepatitis,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
4,Allergy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Arthritis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bronchial Asthma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Cervical spondylosis,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Chicken pox,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
9,Chronic cholestasis,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,4.0


In [52]:
X = diseases_pivot.drop(columns='Disease')
y = diseases_pivot['Disease']

In [54]:
# Initialize the Nearest Neighbors model
model = NearestNeighbors(n_neighbors=5, metric='euclidean')
model.fit(X,y)

In [60]:
# Function to predict top 5 closest diseases based on input symptoms and severity
def predict_closest_diseases(input_symptoms_severity):
    # Normalize the input symptoms
    input_data = pd.DataFrame([input_symptoms_severity])
    input_data = input_data.reindex(columns=X.columns, fill_value=0)
    
    # Find the nearest neighbors
    distances, indices = nbrs.kneighbors(input_data)
    
    # Get the top 5 closest diseases
    closest_diseases = y.iloc[indices[0]].values
    closest_distances = distances[0]
    
    return list(zip(closest_diseases, closest_distances))

In [65]:
# Example usage
input_symptoms_severity = {
    'itching_severity': 1,
    'skin_rash_severity': 4,
    #'nodal_skin_eruptions_severity': 1,
    #'continuous_sneezing_severity': 1
    # Add other symptoms with severities if available
}
top_5_diseases = predict_closest_diseases(input_symptoms_severity)
top_5_diseases

[('Fungal infection', 7.54983443527075),
 ('Paralysis (brain hemorrhage)', 8.06225774829855),
 ('Gastroenteritis', 8.06225774829855),
 ('AIDS', 8.06225774829855),
 ('Heart attack', 8.06225774829855)]

# Test with logic

In [226]:
import pandas as pd
import os

# Define the path and load the DataFrame
path = "databases/diseases/dataset.csv"
diseases_df = pd.read_csv(os.path.join("..", path))

# Initialize a dictionary to store diseases and their symptoms
disease_symptom_dict = {}

# Populate the dictionary with diseases and their symptoms
for _, row in diseases_df.iterrows():
    disease = row['Disease']
    symptoms = row[1:].dropna().tolist()  # Get all symptoms for the disease
    if disease not in disease_symptom_dict:
        disease_symptom_dict[disease] = set()
    disease_symptom_dict[disease].update(symptoms)

# Create a set of all unique symptoms
all_symptoms = list(symptom for symptoms in disease_symptom_dict.values() for symptom in symptoms)

# Create a DataFrame with diseases as rows and symptoms as columns (boolean values)
disease_symptom_matrix = pd.DataFrame(index=disease_symptom_dict.keys(), columns=all_symptoms, data=False)

# Populate the DataFrame with boolean values
for disease, symptoms in disease_symptom_dict.items():
    for symptom in symptoms:
        disease_symptom_matrix.at[disease, symptom] = True

disease_symptom_matrix

Unnamed: 0,skin_rash,dischromic _patches,itching,nodal_skin_eruptions,watering_from_eyes,shivering,continuous_sneezing,chills,cough,acidity,...,small_dents_in_nails,inflammatory_nails,joint_pain,silver_like_dusting,skin_peeling,red_sore_around_nose,skin_rash.1,yellow_crust_ooze,high_fever,blister
Fungal infection,True,True,True,True,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
Allergy,False,False,False,False,True,True,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
GERD,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
Chronic cholestasis,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
Drug Reaction,True,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
Peptic ulcer diseae,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AIDS,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
Diabetes,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
Gastroenteritis,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
Bronchial Asthma,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,True,False


In [229]:
# Example input symptoms and their severities
input_symptoms_severity = {
    'itching': 4,
    'skin_rash': 4,
    'nodal_skin_eruptions': 4,
    'continuous_sneezing': 4
}

# Function to score diseases based on input symptoms and severity
def score_diseases(input_symptoms_severity, disease_symptom_matrix):
    # Initialize a dictionary to store scores for each disease
    disease_scores = {disease: 0 for disease in disease_symptom_matrix.index}
    
    # Iterate over each symptom in the input
    for symptom, severity in input_symptoms_severity.items():
        if symptom in disease_symptom_matrix.columns:
            # Sum the severities for each matching disease
            for disease in disease_symptom_matrix.index:
                if disease_symptom_matrix.loc[disease, symptom]:
                    disease_scores[disease] += severity
    
    # Filter out diseases with a score of 0
    filtered_scores = {disease: score for disease, score in disease_scores.items() if score > 0}
    
    return filtered_scores

# Example usage
remaining_diseases = score_diseases(input_symptoms_severity, disease_symptom_matrix)
print("Remaining diseases with scores:", remaining_diseases)


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().