### Import stuff

In [41]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from io import StringIO

### Reading the csv with all data

In [42]:
data = pd.read_csv("all_meals_sample.csv")
display(data)

Unnamed: 0,date,round,meal_id,item_name,quantity,measure,esha_code,gov_code,cals_kcal,prot_g,...,portion,portion_size_notes,lact_g_73,lact_g_175,day,first_day,last_day,day_of_round,weekend,time_shift
0,12/11/20,1,1,"omelet, western, prepared with added fat",2-Jan,Cup,124464,32130890,193.91,13.01,...,,,,,12/11/20,12/11/20,12/24/20,1,False,False
1,12/11/20,1,1,tortilla,0.9,Ounce-weight,157895,52215000,66.85,1.77,...,,,,,12/11/20,12/11/20,12/24/20,1,False,False
2,12/11/20,1,1,"iced tea, diet, Snapple",16,Fluid ounce,129990,92309020,4.80,0.00,...,,,,,12/11/20,12/11/20,12/24/20,1,False,False
3,12/11/20,1,2,"chewing gum, sugar free",1,Stick,120338,91802000,4.82,0.00,...,,,,,12/11/20,12/11/20,12/24/20,1,False,False
4,12/11/20,1,3,"omelet, western, prepared with added fat",2-Jan,Cup,124464,32130890,193.91,13.01,...,,,,,12/11/20,12/11/20,12/24/20,1,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,2/3/21,2,16,"fried chicken thigh, with skin",5.5,Ounce-weight,156970,24157300,422.55,29.59,...,,,,,2/3/21,1/21/21,2/3/21,14,False,False
118,2/3/21,2,16,"tomatoes, fresh",2,Ounce-weight,119089,74101000,10.21,0.50,...,,,,,2/3/21,1/21/21,2/3/21,14,False,False
119,2/3/21,2,16,mayonnaise,1,Tablespoon,120064,83107000,102.00,0.14,...,,,,,2/3/21,1/21/21,2/3/21,14,False,False
120,2/3/21,2,16,"lettuce, romaine, fresh",4-Mar,Cup,118826,72116000,3.94,0.36,...,,,,,2/3/21,1/21/21,2/3/21,14,False,False


### Loading table and parsing it

In [138]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
}

url = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3925198/table/tab2/?report=objectonly'

response = requests.get(url, headers=headers)

soup = BeautifulSoup(response.content, 'html.parser')

table = soup.find_all('table')[0]

# original table
display_table = pd.read_html(StringIO(str(table)))[0]

In [139]:
df = display_table
df.drop(df.columns[1:3], axis=1, inplace=True) # remove weighted number of articles - not important, also removed raw inflammatory score

# Or rename the existing DataFrame (rather than creating a copy) 
df.rename(columns={df.columns[1]: 'overall inflammatory score', df.columns[2]: 'mean', df.columns[3]: "sd"}, inplace=True)

In [140]:
for index, row in df.iterrows():
    for col in df.columns:
        for i in range(len(row[col])):
            if row[col][i] == '·': 
                row[col] = row[col][:i] + "." + row[col][i+1:]
            elif row[col][i] == '−': 
                row[col] = row[col][:i] + "-" + row[col][i+1:]
df


Unnamed: 0,Food parameter,overall inflammatory score,mean,sd
0,Alcohol (g),-0.278,13.98,3.72
1,Vitamin B12 (μg),0.106,5.15,2.7
2,Vitamin B6 (mg),-0.365,1.47,0.74
3,β-Carotene (μg),-0.584,3718.0,1720.0
4,Caffeine (g),-0.11,8.05,6.67
5,Carbohydrate (g),0.097,272.2,40.0
6,Cholesterol (mg),0.11,279.4,51.2
7,Energy (kcal),0.18,2056.0,338.0
8,Eugenol (mg),-0.14,0.01,0.08
9,Total fat (g),0.298,71.4,19.4


In [45]:
count = 0
for col in df.columns[1:]:
    for row in df.iterrows():
        count += 1
count

135

#### How to calculate DII score
https://pubs.sciepub.com/jfnr/8/4/2/index.html#:~:text=The%20first%20step%20to%20calculate,to%20a%20centered%20percentile%20score.

In [47]:
food_params = {}
l = list(display_table['Food parameter'])


In [48]:
for item in l:
    food_params[item] = None

food_params['Alcohol (g)'] = 'alc_g'
food_params['Vitamin B12 (μg)'] = 'vit_b12_mcg'
food_params['Vitamin B6 (mg)'] = 'vit_b6_mg'
food_params['β-Carotene (μg)'] = 'beta_caro_mcg'
food_params['Caffeine (g)'] = 'caff_mg' # need to convert units
food_params['Carbohydrate (g)'] = 'carb_g'
food_params['Cholesterol (mg)'] = 'chol_mg'
food_params['Energy (kcal)'] = 'cals_kcal'
# food_params['Eugenol (mg)'] = 
food_params['Total fat (g)'] = 'fat_g'
food_params['Fibre (g)'] = 'tot_fib_g'
food_params['Folic acid (μg)'] = 'fol_acid_mcg'
# food_params['Garlic (g)'] = 
# food_params['Ginger (g)'] = 
food_params['Fe (mg)'] = 'iron_mg'
food_params['Mg (mg)'] = 'magn_mg'
food_params['MUFA (g)'] = 'mono_fat_g'
# food_params['Niacin (mg)'] = 
food_params['n-3 Fatty acids (g)'] = 'omega3_g'
food_params['n-6 Fatty acids (g)'] = 'omega6_g'
# food_params['Onion (g)'] =  
food_params['Protein (g)'] = 'prot_g'
food_params['PUFA (g)'] = 'poly_fat_g'
# food_params['Riboflavin (mg)'] = 
# food_params['Saffron (g)'] =
food_params['Saturated fat (g)'] = 'sat_fat_g'
food_params['Se (μg)'] = 'sel_mcg'
# food_params['Thiamin (mg)'] = 
food_params['Trans fat (g)'] = 'trans_fat_g'
# food_params['Turmeric (mg)'] = 
food_params['Vitamin A (RE)'] = 'vit_a_re_mcg'
food_params['Vitamin C (mg)'] = 'vit_c_mg'
food_params['Vitamin D (μg)'] = 'vit_d_mcg_mcg'
food_params['Vitamin E (mg)'] = 'vit_e_a_toco_mg'
food_params['Zn (mg)'] = 'zinc_mg'
# food_params['Green/black tea (g)'] = 
# food_params['Flavan-3-ol (mg)'] = 
# food_params['Flavones (mg)'] = 
# food_params['Flavonols (mg)'] = 
# food_params['Flavonones (mg)'] = 
# food_params['Anthocyanidins (mg)'] = 
# food_params['Isoflavones (mg)'] = 
# food_params['Pepper (g)'] = 
# food_params['Thyme/oregano (mg)'] = 
# food_params['Rosemary (mg)'] = 


In [49]:
new_dict = dict([(value, key) for key, value in food_params.items()])
