# Modeling

### Imports

In [7]:
import pandas as pd

#### Read in Preprocessed Data

In [9]:
# Restaurant Menu Data
menu_df = pd.read_csv('./preprocessed_data/menu_df.csv')
menu_df.head()

Unnamed: 0,restaurant_name,food_name,serving_size,serving_unit,calories,carbohydrates,sugars,fats,saturated_fats,cholesterol,sodium,fiber,potassium,proteins,carb_percent,fat_percent,protein_percent,score
0,McDonald's Canada,Egg BLT McMuffin with Shredded Lettuce (McDona...,1.0,Serving,7.99,1.55,0.56,0.14,0.02,0.0,3.76,0.99,116.09,0.58,77.596996,15.769712,29.036295,0.308
1,McDonald's,Cheeseburger,1.0,Serving,535.31,39.24,7.16,28.66,14.0,95.52,1176.09,2.39,443.77,30.27,29.321328,48.185164,22.61867,0.519
2,McDonald's,Hamburger,1.0,Serving,540.14,40.27,7.16,26.56,10.52,122.04,791.0,0.03,569.52,34.28,29.821898,44.255193,25.386011,0.498
3,McDonald's,Honey,1.0,Serving,63.84,17.3,17.25,0.0,0.0,0.0,0.84,0.04,10.92,0.06,108.39599,0.0,0.37594,0.166
4,McDonald's,Hotcakes,1.0,Serving,90.8,11.32,7.16,3.88,0.85,23.6,175.6,0.03,52.8,2.56,49.867841,38.45815,11.277533,0.135


In [64]:
# Individual Food Data
individual_foods_df = pd.read_csv('./preprocessed_data/individual_foods_df.csv')

#### MOVE THIS TO PREPROCESSING?
# Remove duplicate foods, keeping the one with the highest score
individual_foods_df_sorted = individual_foods_df.sort_values(by='score', ascending=False)
individual_foods_df = individual_foods_df_sorted.drop_duplicates(subset='food_name', keep='first')

individual_foods_df.head()

Unnamed: 0,food_name,category,description,brand,food_category,calories,carbohydrates,fiber,sugars,fats,proteins,score
289,Lentils,Individual Foods,LENTILS,ARUJ,Vegetable and Lentil Mixes,175.0,47.5,22.5,0.0,0.0,22.9,0.952
338,Chia seeds,Individual Foods,CHIA SEEDS,O ORGANICS,Other Grains & Seeds,500.0,43.3,33.3,0.0,30.0,16.7,0.896
517,Rye Bread,Individual Foods,RYE BREAD,RUSSO'S BAKERY,Breads & Buns,267.0,53.3,3.3,0.0,0.0,10.0,0.886
510,Wheat Bread,Individual Foods,WHEAT BREAD,THE NATURAL LA CAMPAGNE BAKERY,Breads & Buns,267.0,50.0,5.0,0.0,3.33,8.33,0.885
154,Quinoa,Individual Foods,QUINOA,EILLIEN'S,Other Grains & Seeds,357.0,64.3,7.1,0.0,7.14,14.3,0.854


In [13]:
# Patient Data
patient_df = pd.read_csv('./preprocessed_data/patient_df.csv')
patient_df.head()

Unnamed: 0,ID,Diabetes_012,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,GenHlth,MentHlth,PhysHlth,Sex,Age,Glucose Value,Time Checked,GlucoseRank,Cluster
0,16515,2.0,0,1.0,1,33.0,0.0,1.0,1.0,1,5.0,30.0,30.0,1,0.833333,87,2024-09-08T19:38:12,Norm,3
1,12855,2.0,0,1.0,0,30.0,1.0,0.0,1.0,0,4.0,30.0,15.0,0,0.416667,129,2024-09-21T14:43:19,Norm,0
2,6691,2.0,0,0.0,0,31.0,0.0,0.0,1.0,1,5.0,4.0,0.0,0,0.5,152,2024-10-13T04:33:24,High,0
3,15303,2.0,0,0.0,1,34.0,1.0,0.0,0.0,0,2.0,0.0,0.0,0,0.583333,99,2024-09-13T00:38:24,Norm,3
4,5714,2.0,0,1.0,1,25.0,1.0,0.0,0.0,1,3.0,0.0,0.0,0,0.5,115,2024-10-16T13:58:32,Norm,3


### Compute Scores for Recommendations

While we already have a score that captures the suitability of a meal/food item based on general diabetes nutrition recommendations, this patient-specific score uses that general score in combination with personalized factors to suggest the best meal options for a person.

In [36]:
# Patient-Specific Score Function
def patient_suitability_score(patient_data, food_data):
    # Patient Info
    glucose_rank = patient_data['GlucoseRank']
    high_bp = patient_data['HighBP']
    high_chol = patient_data['HighChol']
    smoker = patient_data['Smoker']
    stroke = patient_data['Stroke']
    heart = patient_data['HeartDiseaseorAttack']
    general_health = patient_data['GenHlth']

    # Score Adjustments
    score = food_data['score']
    
    # High glucose lowers the suitability score
    if glucose_rank == 'High':
        score -= 0.1
    # Low glucose raises the suitability score
    elif glucose_rank == 'Low':
        score += 0.1
    else:
        score = score

    # Health Penalties
    score -= 0.05 * (high_bp + high_chol + smoker + stroke + heart)

    patient_score = max(0, score)
    return patient_score

#### Menu Data Patient Scores

In [68]:
# Initialize empty df for recommendations
menu_recs = []

patient_sample = patient_df.sample(n=1000)

# Run each patient/menu food pair through the function to calculate a score
for _, patient in patient_sample.iterrows():
    for _, food in menu_df.iterrows():
        score = patient_suitability_score(patient, food)
        menu_recs.append({
            'Patient_ID': patient['ID'],
            'Food_Name': food['food_name'],
            'Food_Category': None,
            'Restaurant': food['restaurant_name'],
            'General_Score': food['score'],
            'Patient_Score': score
        })

# Convert to pandas DataFrame
menu_recs_df = pd.DataFrame(menu_recs)

# Print
menu_recs_df.head()

Unnamed: 0,Patient_ID,Food_Name,Food_Category,Restaurant,General_Score,Patient_Score
0,10871,Egg BLT McMuffin with Shredded Lettuce (McDona...,,McDonald's Canada,0.308,0.208
1,10871,Cheeseburger,,McDonald's,0.519,0.419
2,10871,Hamburger,,McDonald's,0.498,0.398
3,10871,Honey,,McDonald's,0.166,0.066
4,10871,Hotcakes,,McDonald's,0.135,0.035


#### Individual Food Patient Scores

In [70]:
# Initialize empty df for recommendations
individual_food_recs = []

# Run each patient/individual food pair through the function to calculate a score
for _, patient in patient_sample.iterrows():
    for _, food in individual_foods_df.iterrows():
        score = patient_suitability_score(patient, food)
        individual_food_recs.append({
            'Patient_ID': patient['ID'],
            'Food_Name': food['food_name'],
            'Food_Category': food['food_category'],
            'Restaurant': None,
            'General_Score': food['score'],
            'Patient_Score': score
        })

# Convert to pandas DataFrame
individual_food_recs_df = pd.DataFrame(individual_food_recs)

# Print
individual_food_recs_df.head()

Unnamed: 0,Patient_ID,Food_Name,Food_Category,Restaurant,General_Score,Patient_Score
0,10871,Lentils,Vegetable and Lentil Mixes,,0.952,0.852
1,10871,Chia seeds,Other Grains & Seeds,,0.896,0.796
2,10871,Rye Bread,Breads & Buns,,0.886,0.786
3,10871,Wheat Bread,Breads & Buns,,0.885,0.785
4,10871,Quinoa,Other Grains & Seeds,,0.854,0.754


#### Recommend Top 5 Meals and Top 5 Foods per Patient

In [86]:
# Group by Patient_ID and get the top 5 Patient_Score for menu recommendations
top_menu_recs = menu_recs_df.groupby('Patient_ID').apply(
    lambda group: group.nlargest(5, 'Patient_Score')
).reset_index(drop=True)

# Display the result
top_menu_recs.head()

Unnamed: 0,Patient_ID,Food_Name,Food_Category,Restaurant,General_Score,Patient_Score
0,73,"House Cheesesteak, Beef",,Yard House,0.882,0.782
1,73,Chicken Drunken Noodles,,Pick Up Stix,0.852,0.752
2,73,Black Beans,,Chipotle,0.843,0.743
3,73,Black Beans,,Chili's,0.843,0.743
4,73,Black Beans,,Broken Yolk Cafe,0.843,0.743


In [84]:
# Group by Patient_ID and get the top 5 Patient_Score for menu recommendations
top_foods_recs = individual_food_recs_df.groupby('Patient_ID').apply(
    lambda group: group.nlargest(5, 'Patient_Score')
).reset_index(drop=True)

# Display the result
top_foods_recs.head()

Unnamed: 0,Patient_ID,Food_Name,Food_Category,Restaurant,General_Score,Patient_Score
0,73,Lentils,Vegetable and Lentil Mixes,,0.952,0.852
1,73,Chia seeds,Other Grains & Seeds,,0.896,0.796
2,73,Rye Bread,Breads & Buns,,0.886,0.786
3,73,Wheat Bread,Breads & Buns,,0.885,0.785
4,73,Quinoa,Other Grains & Seeds,,0.854,0.754


In [82]:
# Combine all food recommendations
final_recs = pd.concat([top_menu_recs, top_foods_recs], ignore_index=True)

# Display the result
final_recs.head()

Unnamed: 0,Patient_ID,Food_Name,Food_Category,Restaurant,General_Score,Patient_Score
0,73,"House Cheesesteak, Beef",,Yard House,0.882,0.782
1,73,Chicken Drunken Noodles,,Pick Up Stix,0.852,0.752
2,73,Black Beans,,Chipotle,0.843,0.743
3,73,Black Beans,,Chili's,0.843,0.743
4,73,Black Beans,,Broken Yolk Cafe,0.843,0.743
