In [85]:
import random
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib

In [86]:
# Define the base path for the food categories directory
base_path = './Model 02 Dependencies/'
profile = pd.read_csv(f"{base_path}User Profile.csv")

In [87]:
# Load existing user profiles
user_profiles = profile

foods_path = f'{base_path}Categories/'
# Define the file names with their corresponding categories
file_names = {
    'Cereals': '1. Cereal and Cereal Products.xlsx',
    'Legumes': '2. Legumes.xlsx',
    'Vegetables': '3. Vegetables.xlsx',
    'Fruits': '4. Fruits.xlsx',
    'Nuts and Dry Fruits': '5. Nuts and Dry Fruits.xlsx',
    'Dairy Products': '6. Dairy Products.xlsx',
    'Meat and Meat Products': '7. Meat and Meat Products.xlsx',
    'Fish': '8. Fish.xlsx',
    'Eggs': '9. Eggs.xlsx',
    'Sugars and Sweets': '10. Sugar - Sweets and Beverages.xlsx',
    'Dishes': '11. Dishes.xlsx'
}

# Load all files into a dictionary of DataFrames using the base path and file names
food_data = {category: pd.read_excel(foods_path + filename) for category, filename in file_names.items()}

# Function to assign reaction (0-5)
def get_reaction():
    return random.randint(1, 5)

# Functions to calculate impacts for fasting sugar
def age_impact_fasting(age):
    if age <= 40:
        return 1.0
    elif age <= 60:
        return 1.0 + (age - 40) * 0.01  # Increase by 1% per year above 40
    else:
        return 1.20 + (age - 60) * 0.015  # Further increase by 1.5% per year above 60

def bmi_impact_fasting(bmi):
    if bmi < 18.5:
        return 0.90  # Decrease by 10% for underweight
    elif bmi < 25:
        return 1.0  # No impact for normal weight
    elif bmi < 30:
        return 1.10 + (bmi - 25) * 0.02  # Increase by 2% for each unit of BMI above 25
    else:
        return 1.20 + (bmi - 30) * 0.025  # Further increase by 2.5% for each unit of BMI above 30

def hba1c_impact_fasting(hba1c):
    if hba1c < 5.7:
        return 1.0
    elif hba1c < 6.5:
        return 1.0 + (hba1c - 5.7) * 0.10  # Increase by 10% per unit above 5.7
    else:
        return 1.20 + (hba1c - 6.5) * 0.20  # Further increase by 20% per unit above 6.5

# Functions to calculate impacts for 2-hour post-meal sugar
def age_impact_post_meal(age):
    if age <= 40:
        return 1.0
    elif age <= 60:
        return 1.0 + (age - 40) * 0.15  # Increase by 15% per year above 40
    else:
        return 1.30 + (age - 60) * 0.20  # Further increase by 20% per year above 60

def bmi_impact_post_meal(bmi):
    if bmi < 18.5:
        return 0.90  # Decrease by 10% for underweight
    elif bmi < 25:
        return 1.0  # No impact for normal weight
    elif bmi < 30:
        return 1.15 + (bmi - 25) * 0.03  # Increase by 3% for each unit of BMI above 25
    else:
        return 1.30 + (bmi - 30) * 0.05  # Further increase by 5% for each unit of BMI above 30

def hba1c_impact_post_meal(hba1c):
    if hba1c < 5.7:
        return 1.0
    elif hba1c < 6.5:
        return 1.0 + (hba1c - 5.7) * 0.30  # Increase by 30% per unit above 5.7
    else:
        return 1.30 + (hba1c - 6.5) * 0.50  # Further increase by 50% per unit above 6.5

# Function to calculate gender impact
def gender_impact(gender):
    return 0.95 if gender == 'Female' else 1.0  # Decrease by 5% for females

# Function to calculate fasting blood sugar level
def calculate_fasting_sugar(hba1c, age, bmi, gender):
    baseline_fbs = 90  # Baseline fasting blood sugar level for normal range
    if hba1c >= 5.7:  # Adjust baseline for diabetic range
        baseline_fbs = 100
    total_impact_factor = age_impact_fasting(age) * bmi_impact_fasting(bmi) * hba1c_impact_fasting(hba1c) * gender_impact(gender)
    adjusted_fbs = baseline_fbs * total_impact_factor
    return adjusted_fbs

# Function to calculate 2-hour post-meal blood sugar level
def calculate_post_meal_sugar(fasting_sugar, gl, age, bmi, hba1c, gender):
    base_glycemic_response = 1.7  # Base response for significant impact
    # Calculate the total impact factor from age, BMI, HbA1c, and gender
    total_impact_factor = age_impact_post_meal(age) * bmi_impact_post_meal(bmi) * hba1c_impact_post_meal(hba1c) * gender_impact(gender)

    # Amplify the effect of GL if it's high
    if gl >= 25:
        gl_effect = gl * 1.3  # Increase the effect of high GL
    else:
        gl_effect = gl  # Normal effect for low GL

    # Calculate the adjusted glycemic response
    adjusted_glycemic_response = base_glycemic_response * total_impact_factor * gl_effect

    # Calculate the increase in sugar due to meal
    initial_increase = adjusted_glycemic_response
    increase_after_2_hours = initial_increase * 0.70  # Applying decay factor after 2 hours

    # Final post-meal sugar calculation
    post_meal_sugar = fasting_sugar + increase_after_2_hours
    return post_meal_sugar


# Function to generate a profile
def create_profile(user, food_df):
    profiles = []
    for _, food in food_df.iterrows():
        fasting_sugar = round(calculate_fasting_sugar(user['HbA1c Level'], user['Age'], user['BMI'], user['Gender']))
        reaction = get_reaction()
        post_meal_sugar = round(calculate_post_meal_sugar(fasting_sugar, food['GL'], user['Age'], user['BMI'], user['HbA1c Level'], user['Gender']))
        
        profile = {
            'Age': user['Age'],
            'Gender': user['Gender'],
            'BMI': user['BMI'],
            'HbA1c Level': user['HbA1c Level'],
            'Food': food['Food'],
            'GL': food['GL'],
            'Reaction': reaction,
            'FBS': fasting_sugar,
            'PMS': post_meal_sugar
        }
        profiles.append(profile)
    return profiles

# Function to sample user profiles based on HbA1c distribution
def sample_user_profiles(user_profiles, proportion_above_prediabetes=0.75):
    n_total = len(user_profiles)
    n_above_prediabetes = int(n_total * proportion_above_prediabetes)
    n_normal = n_total - n_above_prediabetes

    above_prediabetes_profiles = user_profiles[user_profiles['HbA1c Level'] >= 5.7]
    normal_profiles = user_profiles[user_profiles['HbA1c Level'] < 5.7]

    sampled_above_prediabetes = above_prediabetes_profiles.sample(n=min(n_above_prediabetes, len(above_prediabetes_profiles)), random_state=42)
    sampled_normal = normal_profiles.sample(n=min(n_normal, len(normal_profiles)), random_state=42)

    sampled_profiles = pd.concat([sampled_above_prediabetes, sampled_normal])
    return sampled_profiles

# Function to generate datasets for each category and compile them into a dictionary of DataFrames
def generate_datasets_by_category(user_profiles):
    category_datasets = {}
    for category, food_df in food_data.items():
        all_data = []
        for _, user in user_profiles.iterrows():
            user_data = create_profile(user, food_df)
            all_data.extend(user_data)
        category_datasets[category] = pd.DataFrame(all_data)
    return category_datasets

# Generate dataset
datasets_by_category = generate_datasets_by_category(user_profiles)

# Display the first few rows of the dataset
display(datasets_by_category['Dishes'].head(50))

Unnamed: 0,Age,Gender,BMI,HbA1c Level,Food,GL,Reaction,FBS,PMS
0,43,Male,24,4.4,Kitchri,31,1,93,163
1,43,Male,24,4.4,Rice Kheer,29,5,93,158
2,43,Male,24,4.4,Feerni,32,4,93,165
3,43,Male,24,4.4,Wheat Dalia,28,3,93,156
4,43,Male,24,4.4,Banana Kheer,21,1,93,129
5,43,Male,24,4.4,Chapati,30,5,93,160
6,43,Male,24,4.4,Daal Masur Curry,12,4,93,114
7,43,Male,24,4.4,Kidney Bean Curry Kalool,10,5,93,110
8,43,Male,24,4.4,Alu Gosht,15,1,93,119
9,43,Male,24,4.4,Kofta,5,2,93,102


In [88]:
# Load the trained pipeline and the target encoder
pipeline = joblib.load(f'{base_path}Model 02 Pipeline.pkl')
target_encoder = joblib.load(f'{base_path}Target Encoder.pkl')

# Function to preprocess and predict for a given DataFrame
def preprocess_and_predict(new_data):
    # Convert 'Food' column to string to prevent FutureWarning
    new_data['Food'] = new_data['Food'].astype(str)
    # Apply the target encoder to the new data
    encoded_food = target_encoder.transform(new_data['Food'])
    new_data['Food'] = encoded_food
    
    # Predict using the pipeline
    predictions = pipeline.predict(new_data)
    return predictions

# Dictionary to store predictions for each category
category_predictions = {}

# Example of using the prediction function for each category
for category, data in datasets_by_category.items():
    category_predictions[category] = preprocess_and_predict(data)
    print(f"Recommended Score for {category}: {category_predictions[category]}")

Recommended Score for Cereals: [4 1 1 8 1 2 6 1 6 9 1 1 4 8 4 1 1 1 6 9 8 5 1]
Recommended Score for Legumes: [6 9 4 0 5 5 3 3 2 7 8 7 8 9 4 7 7 9 3 5]
Recommended Score for Vegetables: [5 3 5 7 3 0 3 9 7 9 5 7 7 3 3 0 3 7 7 0 0]
Recommended Score for Fruits: [9 9 3 7 7 0 5 0 3 0 3 0 0 7 5 5 0 7 0 9 3 9 7 0 0 9 9 0 5 0 9 9 9 5]
Recommended Score for Nuts and Dry Fruits: [3 9 9 5 5 5 7 5 8 0 7 1]
Recommended Score for Dairy Products: [9 5 3 9 7 3 5 5 6 9 0 7]
Recommended Score for Meat and Meat Products: [7 9 5 9 0 0 3 7 0 5 9 0 9 9]
Recommended Score for Fish: [5 0 5 9 0 7 0 5 0]
Recommended Score for Eggs: [7 3 5 5 7 7 7 9]
Recommended Score for Sugars and Sweets: [1 1 1 1 1 1 9 5 5 5 5 5 9 5]
Recommended Score for Dishes: [7 7 6 7 2 7 7 9 0 3 9 9 5 3 9 3 0 7 5 7 1 6]
