Importing Libraries

In [2]:
# Data handling and manipulation
import pandas as pd
# Libraries for cleaning and processing data
import numpy as np

# Machine Learning libraries
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Metrics for model evaluation
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

#For interactive widgets
import ipywidgets as widgets
from IPython.display import display

# For handling Excel file saving
import openpyxl

# # Visualization
# import matplotlib.pyplot as plt
# import seaborn as sns

Data Cleaning (Doses dataset)

In [3]:
# Step 1: Read the doses dataset
doses_data = pd.read_excel('Fruit_Doses_for_Diseases.xlsx')  # Load doses dataset

# Step 2: Inspect the columns and first few rows
print("Doses Dataset Columns:", doses_data.columns)
print("Doses Dataset Preview:")
print(doses_data.head())

# Step 3: Clean the doses dataset
# Drop rows with missing values in the critical column 'Fruit'
doses_data_cleaned = doses_data.dropna(subset=['Fruit'])

# Step 4: Remove duplicate rows
doses_data_cleaned = doses_data_cleaned.drop_duplicates()

# Step 5: Inspect the cleaned data
print("Cleaned Doses Dataset Preview:")
print(doses_data_cleaned.head())

# Step 6: Save the cleaned doses dataset to a new file
doses_data_cleaned.to_excel('Cleaned_Fruit_Doses_for_Diseases.xlsx', index=False)

Doses Dataset Columns: Index(['Disease', 'Fruit', 'Dose (Breakfast)', 'Dose (Lunch)',
       'Dose (Dinner)'],
      dtype='object')
Doses Dataset Preview:
            Disease         Fruit Dose (Breakfast)     Dose (Lunch)  \
0     Heart Disease  Strawberries    1 cups (176g)                0   
1  Digestive Issues  Strawberries   1 medium (87g)  2 slices (151g)   
2            Anemia        Papaya                0  3 slices (164g)   
3            Anemia        Papaya  2 medium (132g)                0   
4            Anemia        Banana  2 medium (188g)                0   

     Dose (Dinner)  
0  2 medium (163g)  
1    2 cups (240g)  
2  1 slices (201g)  
3    2 cups (226g)  
4  2 medium (130g)  
Cleaned Doses Dataset Preview:
            Disease         Fruit Dose (Breakfast)     Dose (Lunch)  \
0     Heart Disease  Strawberries    1 cups (176g)                0   
1  Digestive Issues  Strawberries   1 medium (87g)  2 slices (151g)   
2            Anemia        Papaya              

Merging the Doses Dataset with the Fruit Disease dataset

In [4]:
# Step 7: Load the cleaned doses dataset
doses_data_cleaned = pd.read_excel('Cleaned_Fruit_Doses_for_Diseases.xlsx')

# Step 8: Load the fruit_disease_dataset
fruit_disease_data = pd.read_excel('fruit_disease_dataset.xlsx')

# Step 9: Inspect the columns of both datasets
print("Fruit Disease Dataset Columns:", fruit_disease_data.columns)
print("Cleaned Doses Dataset Columns:", doses_data_cleaned.columns)

# Step 10: Merge the datasets on the 'Fruit' column
merged_data = pd.merge(doses_data_cleaned, fruit_disease_data[['Fruit', 'Nutrients', 'Recommended Intake']], on='Fruit', how='left')

# Step 11: Inspect the merged dataset to ensure 'Nutrients' and 'Recommended Intake' are added correctly
print("Merged Dataset Preview:")
print(merged_data.head())

# Step 12: Save the merged dataset with nutrients and recommended intake
merged_data.to_excel('Merged_Fruit_Doses_with_Nutrients_and_Intake.xlsx', index=False)

Fruit Disease Dataset Columns: Index(['Fruit', 'Nutrients', 'Diseases/Conditions Affected',
       'Potential Benefits', 'Recommended Intake'],
      dtype='object')
Cleaned Doses Dataset Columns: Index(['Disease', 'Fruit', 'Dose (Breakfast)', 'Dose (Lunch)',
       'Dose (Dinner)'],
      dtype='object')
Merged Dataset Preview:
            Disease         Fruit Dose (Breakfast)     Dose (Lunch)  \
0     Heart Disease  Strawberries    1 cups (176g)                0   
1  Digestive Issues  Strawberries   1 medium (87g)  2 slices (151g)   
2            Anemia        Papaya                0  3 slices (164g)   
3            Anemia        Papaya                0  3 slices (164g)   
4            Anemia        Papaya                0  3 slices (164g)   

     Dose (Dinner)          Nutrients   Recommended Intake  
0  2 medium (163g)                NaN                  NaN  
1    2 cups (240g)                NaN                  NaN  
2  1 slices (201g)    Zinc, Vitamin C     1 medium per day 

Encoding the Data for the Decision Tree Model

In [5]:
# Step 12: Load the merged dataset
merged_data = pd.read_excel('Merged_Fruit_Doses_with_Nutrients_and_Intake.xlsx', engine='openpyxl')

# Step 13: Encoding categorical columns using LabelEncoder
label_encoders = {}
# Encoding categorical columns ('Fruit', 'Disease', 'Nutrients', and 'Recommended Intake')
for column in ['Fruit', 'Disease', 'Nutrients', 'Recommended Intake']:
    le = LabelEncoder()
    merged_data[column] = le.fit_transform(merged_data[column].astype(str))
    label_encoders[column] = le  # Store encoder for later use

# Step 14: Split data into features (X) and targets (y)
X = merged_data[['Disease']]  # Disease as input feature
y = merged_data[['Fruit', 'Nutrients', 'Recommended Intake']]  # Targets: Fruit, Nutrients, Recommended Intake

# Step 15: Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 16: Initialize the Decision Tree Classifier and train the model for each target
fruit_model = DecisionTreeClassifier(max_depth=5, random_state=42)
nutrients_model = DecisionTreeClassifier(max_depth=5, random_state=42)
intake_model = DecisionTreeClassifier(max_depth=5, random_state=42)

# Train models for each target variable (Fruit, Nutrients, Recommended Intake)
fruit_model.fit(X_train, y_train['Fruit'])
nutrients_model.fit(X_train, y_train['Nutrients'])
intake_model.fit(X_train, y_train['Recommended Intake'])

# Step 17: Evaluate the models using accuracy score
fruit_accuracy = accuracy_score(y_test['Fruit'], fruit_model.predict(X_test))
nutrients_accuracy = accuracy_score(y_test['Nutrients'], nutrients_model.predict(X_test))
intake_accuracy = accuracy_score(y_test['Recommended Intake'], intake_model.predict(X_test))

print(f"Fruit Model Accuracy: {fruit_accuracy}")
print(f"Nutrients Model Accuracy: {nutrients_accuracy}")
print(f"Intake Model Accuracy: {intake_accuracy}")

Fruit Model Accuracy: 0.13580032653271112
Nutrients Model Accuracy: 0.010708730250556006
Intake Model Accuracy: 0.12377389538097626


Diet Chart

In [None]:
# Load the cleaned fruit doses data
cleaned_fruit_doses_data = pd.read_excel('Cleaned_Fruit_Doses_for_Diseases.xlsx')

# Rename the column 'Diseases/Conditions Affected' to 'Disease'
cleaned_fruit_doses_data.rename(columns={'Diseases': 'Disease'}, inplace=True)

# Step 1: Function to extract quantity from the Dose columns
def extract_quantity_from_dose(dose_column):
    """Extracts the numeric quantity (in grams) from the dose description."""
    # If there is no dose, return 0
    if pd.isna(dose_column) or dose_column == '0':
        return 0
    # Extract the numeric part from strings like '2 cups (176g)'
    try:
        quantity = int(dose_column.split('(')[1].replace('g)', '').strip())
    except Exception as e:
        quantity = 0  # In case of malformed data
    return quantity

# Applying the function to extract the quantity (in grams) for each meal (Breakfast, Lunch, Dinner)
cleaned_fruit_doses_data['Breakfast_Quantity (g)'] = cleaned_fruit_doses_data['Dose (Breakfast)'].apply(extract_quantity_from_dose)
cleaned_fruit_doses_data['Lunch_Quantity (g)'] = cleaned_fruit_doses_data['Dose (Lunch)'].apply(extract_quantity_from_dose)
cleaned_fruit_doses_data['Dinner_Quantity (g)'] = cleaned_fruit_doses_data['Dose (Dinner)'].apply(extract_quantity_from_dose)

# Step 2: Encoding the 'Disease' column using LabelEncoder
label_encoder = LabelEncoder()
cleaned_fruit_doses_data['Disease_encoded'] = label_encoder.fit_transform(cleaned_fruit_doses_data['Disease'])

# Step 3: Train the Decision Tree model for Fruit
X = cleaned_fruit_doses_data[['Disease_encoded']]  # Disease (encoded) as input feature
y = cleaned_fruit_doses_data['Fruit']  # Target: Fruit

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the DecisionTreeClassifier
fruit_model = DecisionTreeClassifier(max_depth=5, random_state=42)

# Train the model
fruit_model.fit(X_train, y_train)

# Evaluate the model's accuracy
fruit_accuracy = accuracy_score(y_test, fruit_model.predict(X_test))
print(f"Fruit Model Accuracy: {fruit_accuracy}")

# Step 4: Function to generate diet chart based on selected disease
def generate_diet_chart(disease):
    # Step 4.1: Encode the selected disease
    disease_encoded = label_encoder.transform([disease])[0]  # Transform disease to encoded value
    
    # Step 4.2: Get the unique fruits for the selected disease
    fruits_for_disease = cleaned_fruit_doses_data[cleaned_fruit_doses_data['Disease'] == disease]['Fruit'].unique()
    
    # Step 4.3: Create a Weekly Diet Chart for the selected disease
    diet_chart = []
    
    # Structure: 7 days x 3 meals (Breakfast, Lunch, Dinner)
    meals = ['Breakfast', 'Lunch', 'Dinner']
    fruit_index = 0
    
    for day in range(7):  # 7 days
        for meal in meals:
            # Get the corresponding fruit for the meal
            selected_fruit = fruits_for_disease[fruit_index % len(fruits_for_disease)]
            
            # Get the quantity (in grams) for each meal
            if meal == 'Breakfast':
                quantity = cleaned_fruit_doses_data[(cleaned_fruit_doses_data['Fruit'] == selected_fruit)]['Breakfast_Quantity (g)'].values[0]
            elif meal == 'Lunch':
                quantity = cleaned_fruit_doses_data[(cleaned_fruit_doses_data['Fruit'] == selected_fruit)]['Lunch_Quantity (g)'].values[0]
            else:
                quantity = cleaned_fruit_doses_data[(cleaned_fruit_doses_data['Fruit'] == selected_fruit)]['Dinner_Quantity (g)'].values[0]
            
            diet_chart.append({
                'Day': day + 1,
                'Meal': meal,
                'Fruit': selected_fruit,
                'Quantity (g)': quantity
            })
            
            # Cycle through the fruits for the next meal
            fruit_index += 1
    
    # Convert to DataFrame
    diet_df = pd.DataFrame(diet_chart)
    
    # Display the diet chart
    print(f"Diet chart for {disease}:")
    display(diet_df)

# Step 5: Create a dropdown for selecting the disease
disease_list = cleaned_fruit_doses_data['Disease'].unique().tolist()

disease_dropdown = widgets.Dropdown(
    options=disease_list,
    description='Select Disease:',
    disabled=False
)

# Attach the function to the dropdown selection
widgets.interactive(generate_diet_chart, disease=disease_dropdown)

Fruit Model Accuracy: 0.062311557788944726


interactive(children=(Dropdown(description='Select Disease:', options=('Heart Disease', 'Digestive Issues', 'A…