# Food Recommendation System for Diabetic Patients

#### MADS Capstone Project
##### Claire Bentzen, Tara Dehdari, Logan Van Dine

* Project description/intro

### Imports

In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup



### Data Extraction

#### Restaraunt/Nutritoinal Information Data

##### NutritionX API

In [16]:
# API ID and key
app_key = "37d8dfed8a3b918543105661e60d25aa"
app_id = "4449c303"

# Endpoints
restaurant_url = "https://trackapi.nutritionix.com/v2/search/instant"
nutrients_url = "https://trackapi.nutritionix.com/v2/natural/nutrients"

# API Headers
headers = {
    "x-app-id": app_id,
    "x-app-key": app_key,
    "Content-Type": "application/json"
}

# List of restaurants
restaurants = ["McDonald's", "Burger King", "Taco Bell", "Chick-fil-A", "Wendy's"]

# Initialize an empty list to store data
all_menu_items = []

# Loop through each restaurant and search for menu items
for restaurant in restaurants:
    print(f"Searching for menu items from {restaurant}...")

    params = {
        "query": restaurant
    }
    
    response = requests.get(restaurant_url, headers=headers, params=params)
    
    if response.status_code == 200:
        search_data = response.json()
        menu_items = search_data.get('branded', [])

        # Loop through the menu items to gather nutritional information
        for item in menu_items:
            food_name = item.get('food_name', 'N/A')
            menu_item_data = {
                'restaurant_name': item.get('brand_name', 'N/A'),
                'food_name': food_name,
                'serving_size': item.get('serving_qty', 'N/A'),
                'serving_unit': item.get('serving_unit', 'N/A'),
                'calories': item.get('nf_calories', 'N/A'),
                'carbohydrates': item.get('nf_total_carbohydrate', 'N/A'),
                'sugars': item.get('nf_sugars', 'N/A'),
                'fats': item.get('nf_total_fat', 'N/A'),
                'saturated_fats': item.get('nf_saturated_fat', 'N/A'),
                'cholesterol': item.get('nf_cholesterol', 'N/A'),
                'sodium': item.get('nf_sodium', 'N/A'),
                'fiber': item.get('nf_dietary_fiber', 'N/A'),
                'potassium': item.get('nf_potassium', 'N/A'),
                'proteins': item.get('nf_protein', 'N/A')
            }
            
            # Check for missing nutritional info and try to get it from common foods
            if any(v == 'N/A' for v in [menu_item_data['calories'], menu_item_data['carbohydrates'], 
                                         menu_item_data['sugars'], menu_item_data['fats'],
                                         menu_item_data['saturated_fats'], menu_item_data['cholesterol'],
                                         menu_item_data['sodium'], menu_item_data['fiber'], 
                                         menu_item_data['potassium'], menu_item_data['proteins']]):
                
                # Make a search for the common food name
                common_params = {
                    "query": food_name
                }
                common_response = requests.get(restaurant_url, headers=headers, params=common_params)
                
                if common_response.status_code == 200:
                    common_data = common_response.json()
                    common_foods = common_data.get('common', [])
                    
                    if common_foods:
                        # Use the first common food to get the nutritional info
                        common_food_name = common_foods[0].get('food_name')
                        nutrients_data = {
                            "query": common_food_name
                        }
                        
                        # Make the POST request to the /natural/nutrients endpoint
                        nutrients_response = requests.post(nutrients_url, headers=headers, json=nutrients_data)
                        
                        if nutrients_response.status_code == 200:
                            nutrients_info = nutrients_response.json()
                            food_nutrients = nutrients_info.get('foods', [])
                            
                            if food_nutrients:
                                # Update menu item data with nutrients
                                food_nutrient_info = food_nutrients[0]
                                menu_item_data.update({
                                    'calories': food_nutrient_info.get('nf_calories', 'N/A'),
                                    'carbohydrates': food_nutrient_info.get('nf_total_carbohydrate', 'N/A'),
                                    'sugars': food_nutrient_info.get('nf_sugars', 'N/A'),
                                    'fats': food_nutrient_info.get('nf_total_fat', 'N/A'),
                                    'saturated_fats': food_nutrient_info.get('nf_saturated_fat', 'N/A'),
                                    'cholesterol': food_nutrient_info.get('nf_cholesterol', 'N/A'),
                                    'sodium': food_nutrient_info.get('nf_sodium', 'N/A'),
                                    'fiber': food_nutrient_info.get('nf_dietary_fiber', 'N/A'),
                                    'potassium': food_nutrient_info.get('nf_potassium', 'N/A'),
                                    'protein': food_nutrient_info.get('nf_protein', 'N/A')
                                })

            all_menu_items.append(menu_item_data)
    else:
        print(f"Error: Unable to search for {restaurant}. Status code: {response.status_code}")

# Convert the list of all menu items into a DataFrame
menu_df = pd.DataFrame(all_menu_items)

# Display the menu_info dataframe
print(menu_df)

Searching for menu items from McDonald's...
Searching for menu items from Burger King...
Searching for menu items from Taco Bell...
Searching for menu items from Chick-fil-A...
Searching for menu items from Wendy's...
      restaurant_name                                          food_name  \
0   McDonald's Canada  Egg BLT McMuffin with Shredded Lettuce (McDona...   
1          McDonald's                                       Cheeseburger   
2          McDonald's                                          Hamburger   
3          McDonald's                                              Honey   
4          McDonald's                                           Hotcakes   
..                ...                                                ...   
95            Wendy's                                  Pretzel Baconator   
96            Wendy's                                    Sausage Biscuit   
97            Wendy's                                      Sprite, Large   
98            Wendy's 

In [18]:
# Display menu_df head
menu_df.head()

Unnamed: 0,restaurant_name,food_name,serving_size,serving_unit,calories,carbohydrates,sugars,fats,saturated_fats,cholesterol,sodium,fiber,potassium,proteins,protein
0,McDonald's Canada,Egg BLT McMuffin with Shredded Lettuce (McDona...,1,Serving,7.99,1.55,0.56,0.14,0.02,0.0,3.76,0.99,116.09,,0.58
1,McDonald's,Cheeseburger,1,Serving,535.31,39.24,7.16,28.66,14.0,95.52,1176.09,2.39,443.77,,30.27
2,McDonald's,Hamburger,1,Serving,540.14,40.27,,26.56,10.52,122.04,791.0,,569.52,,34.28
3,McDonald's,Honey,1,Serving,63.84,17.3,17.25,0.0,0.0,0.0,0.84,0.04,10.92,,0.06
4,McDonald's,Hotcakes,1,Serving,90.8,11.32,,3.88,0.85,23.6,175.6,,52.8,,2.56


In [19]:
# Save the DataFrame as a CSV file
data_dir = './data/'
menu_df.to_csv(data_dir + 'menu_df.csv', index=False)

#### Nutritional Information Data

##### FoodData API

In [7]:
# FoodData Central API key and base URL
api_key = 'wS2YuAB4DyHslyim5H0B9pwatIUFcx75frCAeZfn'
base_url = 'https://api.nal.usda.gov/fdc/v1'

# Headers for requests
headers = {
    'Content-Type': 'application/json'
}

**Function to Search for Foods**

This function takes a search query and returns a list of foods matching that query

In [8]:
# Function to search for foods
def search_foods(query, page_size=25):
    search_url = f"{base_url}/foods/search"
    params = {
        'api_key': api_key,
        'query': query,
        'pageSize': page_size,
        'dataType': ["Foundation", "SR Legacy", "Branded"]  # Specifying data types
    }
    response = requests.get(search_url, params=params, headers=headers)
    if response.status_code == 200:
        return response.json().get('foods', [])
    else:
        print(f"Error: {response.status_code}")
        return []

**Function to Fetch Food Details**

This function retrieves information about a specific food using its FDC ID

In [9]:
# Function to fetch food details
def get_food_details(fdc_id):
    details_url = f"{base_url}/food/{fdc_id}"
    params = {'api_key': api_key}
    response = requests.get(details_url, params=params, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching details for FDC ID {fdc_id}: {response.status_code}")
        return None

**Search for Multiple Foods and Fetch Details** 

Combining the two functions to search for foods and then retrieve detailed information for each food item found.

In [10]:
# List of diabetic-friendly foods
search_items = [
    # Vegetables
    "Broccoli", "Spinach", "Kale", "Cauliflower", "Zucchini", "Bell peppers",
    "Cucumber", "Asparagus", "Green beans", "Brussels sprouts",
    
    # Fruits
    "Apples", "Strawberries", "Blueberries", "Raspberries", "Oranges", "Peaches",
    "Grapefruit", "Cherries", "Pears", "Plums", "Kiwi",
    
    # Whole Grains
    "Quinoa", "Barley", "Brown rice", "Oats", "Whole wheat pasta", "Buckwheat",
    "Farro", "Bulgur",
    
    # Lean Proteins
    "Chicken breast", "Turkey breast", "Salmon", "Cod", "Tofu", "Tempeh",
    "Eggs", "Greek yogurt", "Cottage cheese",
    
    # Legumes and Beans
    "Lentils", "Chickpeas", "Black beans", "Kidney beans", "Pinto beans", "Edamame",
    
    # Nuts and Seeds
    "Almonds", "Walnuts", "Chia seeds", "Flax seeds", "Pumpkin seeds", "Sunflower seeds",
    
    # Dairy and Dairy Alternatives
    "Unsweetened almond milk", "Unsweetened soy milk", "Greek yogurt", "Cottage cheese",
    "Mozzarella cheese", "Ricotta cheese",
    
    # Healthy Oils
    "Olive oil", "Avocado oil", "Coconut oil",
    
    # Snacks
    "Popcorn", "Hummus", "Hard-boiled eggs", "Mixed nuts",
    
    # Miscellaneous
    "Avocado", "Sweet potatoes", "Dark chocolate", "Vinegar",
    "Basil", "Oregano", "Parsley", "Cinnamon", "Turmeric", "Ginger"
]

In [11]:
# Function to gather data for multiple foods
def gather_food_data(search_items):
    all_food_data = []

    for item in search_items:
        print(f"Searching for {item}...")
        foods = search_foods(item, page_size=5)  # Limiting to top 5 results per item

        for food in foods:
            # Extract relevant fields
            food_info = {
                'food_name': item,
                'category': 'Diabetic-Friendly Foods',
                'description': food.get('description'),
                'brand': food.get('brandName', 'N/A'),
                'food_category': food.get('foodCategory', 'N/A'),
                'calories': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Energy'), 'N/A'),
                'carbohydrates': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Carbohydrate, by difference'), 'N/A'),
                'fiber': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Fiber, total dietary'), 'N/A'),
                'sugars': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Sugars, total'), 'N/A'),
                'fats': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Total lipid (fat)'), 'N/A'),
                'proteins': next((nutrient['value'] for nutrient in food['foodNutrients'] if nutrient['nutrientName'] == 'Protein'), 'N/A')
            }
            all_food_data.append(food_info)

    # Convert to DataFrame
    diabetic_food_df = pd.DataFrame(all_food_data)
    return diabetic_food_df

# Run the function and display the DataFrame
diabetic_food_df = gather_food_data(search_items)
print(diabetic_food_df.head())


Searching for Broccoli...
Searching for Spinach...
Searching for Kale...
Searching for Cauliflower...
Searching for Zucchini...
Searching for Bell peppers...
Searching for Cucumber...
Searching for Asparagus...
Searching for Green beans...
Searching for Brussels sprouts...
Searching for Apples...
Searching for Strawberries...
Searching for Blueberries...
Searching for Raspberries...
Searching for Oranges...
Searching for Peaches...
Searching for Grapefruit...
Searching for Cherries...
Searching for Pears...
Searching for Plums...
Searching for Kiwi...
Searching for Quinoa...
Searching for Barley...
Searching for Brown rice...
Searching for Oats...
Searching for Whole wheat pasta...
Searching for Buckwheat...
Searching for Farro...
Searching for Bulgur...
Searching for Chicken breast...
Searching for Turkey breast...
Searching for Salmon...
Searching for Cod...
Searching for Tofu...
Searching for Tempeh...
Searching for Eggs...
Searching for Greek yogurt...
Searching for Cottage cheese.

**Export Data to CSV**

Saving to CSV file after gathering the data

In [29]:
# Save the DataFrame as CSV
data_dir = './data/'
diabetic_food_df.to_csv(data_dir + 'diabetic_friendly_foods.csv', index=False)

#### Glycemic Food Index Data 

##### University Health News Webscraping 

In [8]:
# URL of page scraping
url = 'https://universityhealthnews.com/daily/nutrition/glycemic-index-chart/'

# Send GET request to the webpage
response = requests.get(url)

# Check if  request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the table containing the glycemic index data
    tables = soup.find_all('table')
 
    if tables:
        table = tables[0]
        
        # Extract rows and columns
        rows = table.find_all('tr')
        data = []
        
        # Loop through rows to extract each cell
        for row in rows:
            cols = row.find_all(['td', 'th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)
        
        # Create a DataFrame from the extracted data
        gi_df = pd.DataFrame(data[1:], columns=data[0]) 
        print(gi_df)
        
        # Save to CSV
        gi_df.to_csv(data_dir + 'glycemic_index.csv', index=False)
        print("Data saved to glycemic_index.csv")
    else:
        print("No tables found on the page.")
else:
    print("Failed to retrieve the page.")

   LOW GLYCEMIC INDEX (55 or less)    
0                                     
1                           Fruits    
2                    Apples (120g)  40
3               Apple juice (250g)  39
4            Apricots, dried (60g)  32
..                             ...  ..
61           Oatmeal cookies (25g)  54
62                  Snickers (60g)  43
63               Sponge cake (63g)  46
64            Strawberry jam (30g)  51
65                    Sushi (100g)  55

[66 rows x 2 columns]
Data saved to glycemic_index.csv


#### Patient Health Information