# Import CoFID Dataset

In [21]:
import pandas as pd

# import CoFID dataset

cofid_path = 'data/CoFID_Dataset_2021.xlsx'

# proximates sheet (traditional food composition data)
df_proximates = pd.read_excel(cofid_path, sheet_name='1.3 Proximates', usecols='B, H:AU')
use_cols_prox = ['Food Name', #'Protein (g)', 'Fat (g)', 'Carbohydrate (g)', 'Energy (kcal) (kcal)', 
                 'Starch (g)', 'Total sugars (g)', 'AOAC fibre (g)']
df_proximates = df_proximates[use_cols_prox]
df_proximates = df_proximates.rename(columns={'Energy (kcal) (kcal)': 'Energy (kcal)', 
                                             'AOAC fibre (g)': 'Fibre (g)',
                                             'Total sugars (g)': 'Sugars (g)', 
                                             'Food Name': 'food'})

# inorganics sheet (metals and minerals like potassium, calcium, etc.)
df_inorganics = pd.read_excel(cofid_path, sheet_name='1.4 Inorganics', usecols='B, H:S')
df_inorganics = df_inorganics.rename(columns={'Food Name': 'food'})

df_vitamins = pd.read_excel(cofid_path, sheet_name='1.5 Vitamins', usecols='B, K:X')
df_vitamins = df_vitamins.rename(columns={'Food Name': 'food'})

# Merge tables
df_cofid = pd.merge(df_proximates, df_inorganics, on="food", how="outer").fillna(0)
df_cofid = pd.merge(df_cofid, df_vitamins, on="food", how="outer").fillna(0)

print(df_cofid.columns)

#display(df_proximates.head())
#display(df_inorganics.head())


Index(['food', 'Starch (g)', 'Sugars (g)', 'Fibre (g)', 'Sodium (mg)',
       'Potassium (mg)', 'Calcium (mg)', 'Magnesium (mg)', 'Phosphorus (mg)',
       'Iron (mg)', 'Copper (mg)', 'Zinc (mg)', 'Chloride (mg)',
       'Manganese (mg)', 'Selenium (µg)', 'Iodine (µg)', 'Vitamin D (µg)',
       'Vitamin E (mg)', 'Vitamin K1 (µg)', 'Thiamin (mg)', 'Riboflavin (mg)',
       'Niacin (mg)', 'Tryptophan/60 (mg)', 'Niacin equivalent (mg)',
       'Vitamin B6 (mg)', 'Vitamin B12 (µg)', 'Folate (µg)',
       'Pantothenate (mg)', 'Biotin (µg)', 'Vitamin C (mg)'],
      dtype='object')


In [25]:
# search cofid dataset through this link 
#https://quadram.ac.uk/UKfoodcomposition/?s=Chopped+tomatoes&submit=Search

food_grams = {
    'Bananas, flesh only' : 114, 
    'Raspberries, raw' : 100, 
    'Passion fruit, flesh and pips' : 22, 
    'Pumpkin seeds' : 10,
    'Yogurt, low fat, plain' : 70,
    'Chicken, breast, grilled without skin, meat only' : 360, 
    'Ham, gammon joint, boiled' : 57, 
    'Tomatoes, canned, whole contents' : 400, 
    'Baked beans, canned in tomato sauce, reduced sugar, reduced salt' : 400, 
    'Avocado, Hass, flesh only' : 45, 
    'Almonds, whole kernels' : 12, 
    'Tuna, canned in brine, drained' : 50, 
    'Chocolate, plain' : 40,
    'Eggs, chicken, whole, raw' : 65, 
    'Mushrooms, white, raw' : 70
}

In [26]:
num_cols = df_cofid.columns.drop('food')
df_cofid[num_cols] = df_cofid[num_cols]\
    .apply(pd.to_numeric, errors='coerce')

# 2️⃣ Then your loop will work without TypeErrors:
records = []
for food, grams in food_grams.items():
    match = df_cofid[df_cofid['food'].str.contains(food, na=False)]
    if match.empty:
        rec = {'food': food, 'grams': grams}
        for col in num_cols:
            rec[col] = None
    else:
        row = match.iloc[0]
        rec = {'food': food, 'grams': grams}
        for col in num_cols:
            # row[col] is now a float or NaN, so this math works
            rec[col] = row[col] * grams / 100
    records.append(rec)

df_items = pd.DataFrame(records).set_index('food').round(1)
totals   = df_items.sum(numeric_only=True).round(1)

#print("Per-item breakdown:")
#print(df_items, "\n")
print("Daily totals:")
print(totals)

Daily totals:
grams                     1815.0
Starch (g)                  45.4
Sugars (g)                  86.6
Fibre (g)                   31.5
Sodium (mg)               1955.8
Potassium (mg)            5504.5
Calcium (mg)               472.3
Magnesium (mg)             500.4
Phosphorus (mg)           2293.3
Iron (mg)                   15.5
Copper (mg)                  2.5
Zinc (mg)                   11.3
Chloride (mg)             4164.7
Manganese (mg)               3.2
Selenium (µg)              141.9
Iodine (µg)                115.2
Vitamin D (µg)               4.4
Vitamin E (mg)              14.1
Vitamin K1 (µg)             25.3
Thiamin (mg)                 2.6
Riboflavin (mg)              1.8
Niacin (mg)                 77.3
Tryptophan/60 (mg)          37.4
Niacin equivalent (mg)     114.4
Vitamin B6 (mg)              4.5
Vitamin B12 (µg)             3.7
Folate (µg)                351.1
Pantothenate (mg)           12.6
Biotin (µg)                 61.6
Vitamin C (mg)              8

In [27]:
from collections import OrderedDict

rda = {
    #'Water (g)':                   3700,   # Adequate Intake (AI) for men 19–50 y
    #'Protein (g)':                   56,   # RDA for adult men
    #'Fat (g)':                       78,   # FDA Daily Value
    #'Carbohydrate (g)':             130,   # RDA (minimum)
    #'Energy (kcal)':               2500,   # Estimated energy requirement
    'Fibre (g)':                     38,   # Adequate Intake for men 19–50 y
    'Sodium (mg)':                 2300,   # Tolerable Upper Intake Level
    'Potassium (mg)':              3400,   # Adequate Intake for men
    'Calcium (mg)':                1000,   # RDA for adults 19–50 y
    'Magnesium (mg)':               420,   # RDA for adult men
    'Phosphorus (mg)':              700,   # RDA for adults
    'Iron (mg)':                      8,   # RDA for adult men
    'Copper (mg)':                  0.9,   # RDA for adults
    'Zinc (mg)':                    11,   # RDA for adult men
    'Chloride (mg)':               2300,   # Adequate Intake for adults
    'Manganese (mg)':               2.3,   # Adequate Intake for men
    'Selenium (µg)':                 55,   # RDA for adults
    'Iodine (µg)':                  150,   # RDA for adults
    'Vitamin D (µg)':                15,   # RDA (600 IU)
    'Vitamin E (mg)':                15,   # RDA for adults
    'Vitamin K1 (µg)':              120,   # RDA for adult men
    'Thiamin (mg)':                  1.2,  # RDA for adult men
    'Riboflavin (mg)':               1.3,  # RDA for adult men
    'Niacin equivalent (mg)':       16,    # RDA for adult men
    'Vitamin B6 (mg)':               1.3,  # RDA for adult men
    'Vitamin B12 (µg)':              2.4,  # RDA for adult men
    'Folate (µg)':                  400,   # RDA (µg DFE) for adults
    'Pantothenate (mg)':             5,    # Adequate Intake for adults
    'Biotin (µg)':                   30,   # Adequate Intake for adults
    'Vitamin C (mg)':                90,   # RDA for adult men
}

# 2️⃣ Build DataFrame for comparison
# Ensure 'totals' contains the same indices as rda keys
df_compare = pd.DataFrame({
    "Intake": totals,
    "RDA": pd.Series(rda)
}).dropna()

# Ensure the order of columns matches the order in rda
df_compare = pd.DataFrame(OrderedDict([
    ("Intake", totals),
    ("RDA", pd.Series(rda)),
]))
df_compare["% of RDA"] = (df_compare["Intake"] / df_compare["RDA"] * 100).round(1)

# 3️⃣ Print comparison
print("Comparison of Daily Intake to Recommendations:")
print(df_compare)

Comparison of Daily Intake to Recommendations:
                        Intake     RDA  % of RDA
Biotin (µg)               61.6    30.0     205.3
Calcium (mg)             472.3  1000.0      47.2
Chloride (mg)           4164.7  2300.0     181.1
Copper (mg)                2.5     0.9     277.8
Fibre (g)                 31.5    38.0      82.9
Folate (µg)              351.1   400.0      87.8
Iodine (µg)              115.2   150.0      76.8
Iron (mg)                 15.5     8.0     193.8
Magnesium (mg)           500.4   420.0     119.1
Manganese (mg)             3.2     2.3     139.1
Niacin (mg)               77.3     NaN       NaN
Niacin equivalent (mg)   114.4    16.0     715.0
Pantothenate (mg)         12.6     5.0     252.0
Phosphorus (mg)         2293.3   700.0     327.6
Potassium (mg)          5504.5  3400.0     161.9
Riboflavin (mg)            1.8     1.3     138.5
Selenium (µg)            141.9    55.0     258.0
Sodium (mg)             1955.8  2300.0      85.0
Starch (g)            