In [1]:
import random
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib

## Only Need To Change The <u>"User Profile"</u> To Change The Payload

In [2]:
# Define the base path for the food categories directory
base_path = './Dependencies/'
profile = pd.read_csv(f"{base_path}User Profile.csv")

In [3]:
# Load existing user profiles
user_profiles = profile

foods_path = f'{base_path}Categories/'
# Define the file names with their corresponding categories
file_names = {
    'Cereals': '1. Cereal and Cereal Products.xlsx',
    'Legumes': '2. Legumes.xlsx',
    'Vegetables': '3. Vegetables.xlsx',
    'Fruits': '4. Fruits.xlsx',
    'Nuts and Dry Fruits': '5. Nuts and Dry Fruits.xlsx',
    'Dairy Products': '6. Dairy Products.xlsx',
    'Meat and Meat Products': '7. Meat and Meat Products.xlsx',
    'Fish': '8. Fish.xlsx',
    'Eggs': '9. Eggs.xlsx',
    'Sugars and Sweets': '10. Sugar - Sweets and Beverages.xlsx',
    'Dishes': '11. Dishes.xlsx'
}

# Load all files into a dictionary of DataFrames using the base path and file names
food_data = {category: pd.read_excel(foods_path + filename) for category, filename in file_names.items()}

# Function to assign reaction (0-5)
def get_reaction():
    return random.randint(0, 5)

# Functions to calculate impacts for fasting sugar
def age_impact_fasting(age):
    if age <= 40:
        return 1.0
    elif age <= 60:
        return 1.0 + (age - 40) * 0.01  # Increase by 1% per year above 40
    else:
        return 1.20 + (age - 60) * 0.015  # Further increase by 1.5% per year above 60

def bmi_impact_fasting(bmi):
    if bmi < 18.5:
        return 0.90  # Decrease by 10% for underweight
    elif bmi < 25:
        return 1.0  # No impact for normal weight
    elif bmi < 30:
        return 1.10 + (bmi - 25) * 0.02  # Increase by 2% for each unit of BMI above 25
    else:
        return 1.20 + (bmi - 30) * 0.025  # Further increase by 2.5% for each unit of BMI above 30

def hba1c_impact_fasting(hba1c):
    if hba1c < 5.7:
        return 1.0
    elif hba1c < 6.5:
        return 1.0 + (hba1c - 5.7) * 0.10  # Increase by 10% per unit above 5.7
    else:
        return 1.20 + (hba1c - 6.5) * 0.20  # Further increase by 20% per unit above 6.5

# Functions to calculate impacts for 2-hour post-meal sugar
def age_impact_post_meal(age):
    if age <= 40:
        return 1.0
    elif age <= 60:
        return 1.0 + (age - 40) * 0.15  # Increase by 15% per year above 40
    else:
        return 1.30 + (age - 60) * 0.20  # Further increase by 20% per year above 60

def bmi_impact_post_meal(bmi):
    if bmi < 18.5:
        return 0.90  # Decrease by 10% for underweight
    elif bmi < 25:
        return 1.0  # No impact for normal weight
    elif bmi < 30:
        return 1.15 + (bmi - 25) * 0.03  # Increase by 3% for each unit of BMI above 25
    else:
        return 1.30 + (bmi - 30) * 0.05  # Further increase by 5% for each unit of BMI above 30

def hba1c_impact_post_meal(hba1c):
    if hba1c < 5.7:
        return 1.0
    elif hba1c < 6.5:
        return 1.0 + (hba1c - 5.7) * 0.30  # Increase by 30% per unit above 5.7
    else:
        return 1.30 + (hba1c - 6.5) * 0.50  # Further increase by 50% per unit above 6.5

# Function to calculate gender impact
def gender_impact(gender):
    return 0.95 if gender == 'Female' else 1.0  # Decrease by 5% for females

# Function to calculate fasting blood sugar level
def calculate_fasting_sugar(hba1c, age, bmi, gender):
    baseline_fbs = 90  # Baseline fasting blood sugar level for normal range
    if hba1c >= 5.7:  # Adjust baseline for diabetic range
        baseline_fbs = 100
    total_impact_factor = age_impact_fasting(age) * bmi_impact_fasting(bmi) * hba1c_impact_fasting(hba1c) * gender_impact(gender)
    adjusted_fbs = baseline_fbs * total_impact_factor
    return adjusted_fbs

# Function to calculate 2-hour post-meal blood sugar level
def calculate_post_meal_sugar(fasting_sugar, gl, age, bmi, hba1c, gender, strictness=0.1):
    base_glycemic_response = 1.5  # Increased base response for more significant impact
    total_impact_factor = age_impact_post_meal(age) * bmi_impact_post_meal(bmi) * hba1c_impact_post_meal(hba1c) * gender_impact(gender)
    adjusted_glycemic_response = base_glycemic_response * total_impact_factor
    initial_increase = gl * adjusted_glycemic_response
    increase_after_2_hours = initial_increase * 0.70  # Applying 30% decay factor for 2 hours
    post_meal_sugar = fasting_sugar + increase_after_2_hours

    # Limiting the post-meal sugar value
    max_pms = 350  # Threshold for the post-meal sugar level
    if post_meal_sugar > max_pms:
        excess = post_meal_sugar - max_pms
        limited_excess = max_pms / (1 + np.exp(strictness * (excess - max_pms / 2)))
        post_meal_sugar = max_pms + limited_excess

    return post_meal_sugar

# Function to generate a profile
def create_profile(user, food_df):
    profiles = []
    for _, food in food_df.iterrows():
        fasting_sugar = round(calculate_fasting_sugar(user['HbA1c Level'], user['Age'], user['BMI'], user['Gender']))
        reaction = get_reaction()
        post_meal_sugar = round(calculate_post_meal_sugar(fasting_sugar, food['GL'], user['Age'], user['BMI'], user['HbA1c Level'], user['Gender']))
        
        profile = {
            'Age': user['Age'],
            'Gender': user['Gender'],
            'BMI': user['BMI'],
            'HbA1c Level': user['HbA1c Level'],
            'Food': food['Food'],
            'GL': food['GL'],
            'Reaction': reaction,
            'FBS': fasting_sugar,
            'PMS': post_meal_sugar
        }
        profiles.append(profile)
    return profiles

# Function to sample user profiles based on HbA1c distribution
def sample_user_profiles(user_profiles, proportion_above_prediabetes=0.75):
    n_total = len(user_profiles)
    n_above_prediabetes = int(n_total * proportion_above_prediabetes)
    n_normal = n_total - n_above_prediabetes

    above_prediabetes_profiles = user_profiles[user_profiles['HbA1c Level'] >= 5.7]
    normal_profiles = user_profiles[user_profiles['HbA1c Level'] < 5.7]

    sampled_above_prediabetes = above_prediabetes_profiles.sample(n=min(n_above_prediabetes, len(above_prediabetes_profiles)), random_state=42)
    sampled_normal = normal_profiles.sample(n=min(n_normal, len(normal_profiles)), random_state=42)

    sampled_profiles = pd.concat([sampled_above_prediabetes, sampled_normal])
    return sampled_profiles

# Function to generate datasets for each category and compile them into a dictionary of DataFrames
def generate_datasets_by_category(user_profiles):
    category_datasets = {}
    for category, food_df in food_data.items():
        all_data = []
        for _, user in user_profiles.iterrows():
            user_data = create_profile(user, food_df)
            all_data.extend(user_data)
        category_datasets[category] = pd.DataFrame(all_data)
    return category_datasets

# Generate dataset
datasets_by_category = generate_datasets_by_category(user_profiles)

# Display the first few rows of the dataset
display(datasets_by_category['Dishes'].head(50))

Unnamed: 0,Age,Gender,BMI,HbA1c Level,Food,GL,Reaction,FBS,PMS
0,32,Male,26.0,7.1,Kitchri,31,3,148,209
1,32,Male,26.0,7.1,Rice Kheer,29,0,148,205
2,32,Male,26.0,7.1,Feerni,32,3,148,211
3,32,Male,26.0,7.1,Wheat Dalia,28,5,148,204
4,32,Male,26.0,7.1,Banana Kheer,21,5,148,190
5,32,Male,26.0,7.1,Chapati,30,2,148,207
6,32,Male,26.0,7.1,Daal Masur Curry,12,4,148,172
7,32,Male,26.0,7.1,Kidney Bean Curry Kalool,10,1,148,168
8,32,Male,26.0,7.1,Alu Gosht,15,3,148,178
9,32,Male,26.0,7.1,Kofta,5,4,148,158


In [4]:
import json

# Dictionary to hold all category data
all_categories_data = {}

# Loop through each category and convert DataFrame to JSON
for category, df in datasets_by_category.items():
    # Convert the DataFrame to a list of dicts
    category_data = df.to_dict(orient='records')
    
    # Add this category's data to the all_categories_data dictionary
    all_categories_data[category] = category_data

# Convert the combined dictionary to a JSON string
combined_json = json.dumps(all_categories_data, indent=4)

# Optionally, save this combined JSON data to a file
with open('./Outputs/Payload.json', 'w') as file:
    file.write(combined_json)

# Print a sample to verify (printing the first 500 characters of the JSON string)
print("Combined JSON data:")
print(combined_json)

Combined JSON data:
{
    "Cereals": [
        {
            "Age": 32,
            "Gender": "Male",
            "BMI": 26.0,
            "HbA1c Level": 7.1,
            "Food": "Barley Whole grain flour",
            "GL": 21.78,
            "Reaction": 5,
            "FBS": 148,
            "PMS": 191
        },
        {
            "Age": 32,
            "Gender": "Male",
            "BMI": 26.0,
            "HbA1c Level": 7.1,
            "Food": "Corn Whole grain flour",
            "GL": 49.0,
            "Reaction": 2,
            "FBS": 148,
            "PMS": 245
        },
        {
            "Age": 32,
            "Gender": "Male",
            "BMI": 26.0,
            "HbA1c Level": 7.1,
            "Food": "Corn Flakes",
            "GL": 65.238,
            "Reaction": 4,
            "FBS": 148,
            "PMS": 277
        },
        {
            "Age": 32,
            "Gender": "Male",
            "BMI": 26.0,
            "HbA1c Level": 7.1,
            "Food": "C