In [5]:
import pickle
import pandas as pd

def save_dict_list_as(dic, file_name):
    with open(file_name, 'wb') as file:
        pickle.dump(dic, file)
    return 0


def load_dict_list(file_name):
    with open(file_name, 'rb') as file:
        loaded_dict = pickle.load(file)
    return loaded_dict
    

In [6]:
features = load_dict_list('Food_group/features.pkl')

### **Prepare nutritional features categories**

In [7]:
# Macronutriments
macronutrients = [
    'carb_eaten',
    'fat_eaten',
    'protein_eaten',
    'fiber_eaten',
    'alcohol_eaten'
]

# Micronutriments
micronutrients = [
    'beta_carotene_eaten',
    'calcium_eaten',
    'cholesterol_eaten',
    'fatty_acids_monounsaturated_eaten',
    'fatty_acids_polyunsaturated_eaten',
    'fatty_acids_saturated_eaten',
    'folate_eaten',
    'iron_eaten',
    'magnesium_eaten',
    'niacin_eaten',
    'pantothenic_acid_eaten',
    'phosphorus_eaten',
    'potassium_eaten',
    'sodium_eaten',
    'vitamin_b1_eaten',
    'vitamin_b12_eaten',
    'vitamin_b2_eaten',
    'vitamin_b6_eaten',
    'vitamin_c_eaten',
    'vitamin_d_eaten',
    'zinc_eaten'
]

# Types d'aliments
food_types = [
    'dairy_products_meat_fish_eggs_tofu',
    'vegetables_fruits',
    'sweets_salty_snacks_alcohol',
    'non_alcoholic_beverages',
    'grains_potatoes_pulses',
    'oils_fats_nuts'
]


li = {
    "Food_type" : food_types,
    "Macronutrients" : macronutrients,
    "Micronutrients" : micronutrients,  
}


for name,val in li.items() : 
    save_dict_list_as(val, f"Food_group/{name}.pkl")

**Table of data's demographic feature distribution**


In [8]:
data = pd.read_csv("data_set\data_for_analysis.csv")
data =data.groupby("subject_key")[["gender","age_group", "bmi_cat"]].head(1)
res = {}
for c in data.columns : 
    val = (data[c].value_counts())
    percent = (data[c].value_counts(normalize=True)* 100)
    res[c] = pd.concat([val, percent], axis=1, keys=['Total', 'Percentage'])

df_gender = pd.DataFrame(res['gender'], index=['female', 'male'])
df_age_group = pd.DataFrame(res['age_group'], index=['35-49', '18-34', '50-64', '65+'])
df_bmi_cat = pd.DataFrame(res['bmi_cat'], index=['Normal', 'Overweight', 'Obese', 'Underweight'])

combined_df = pd.concat([df_gender, df_age_group, df_bmi_cat], keys=['Gender', 'Age Group', 'BMI Category'])
combined_df.index.names = ['', 'Category']
combined_df["Percentage"] = combined_df["Percentage"].apply(lambda x: f"{str(x)[:4]}%")
combined_df.to_excel("table/demographic_distribution_table.xlsx")
