# Filipino Obesity - Data Cleaning

It's going to be such a headache when analyzing the data accross several dataframes. So concatenation will be done, as well as add some features to investigate the Why's of obesity in the Philippines as well as understand more about how the obesity rate has increased over the years.

In [1]:
#load up libraries
import pandas as pd
import numpy as np

import re

In [2]:
#load 2008 and 2013 national survey

diet_2008 = {
    "Preschool Children" :pd.read_csv("Top 30 most commonly consumed/2008/6 months to 5 years old.csv"),
    "School Age Children": pd.read_csv("Top 30 most commonly consumed/2008/6 to 12 years old.csv"),
    "Adolescents" : pd.read_csv("Top 30 most commonly consumed/2008/13 to 19 years old.csv"),
    "Adult" :pd.read_csv("Top 30 most commonly consumed/2008/20 to 59 years old.csv"),
    "Elderly": pd.read_csv("Top 30 most commonly consumed/2008/60 years old and above.csv")
                                
}

diet_2013 = {
    "Preschool Children" :pd.read_csv("Top 30 most commonly consumed/2013/6 months to 5 years old.csv"),
    "School Age Children": pd.read_csv("Top 30 most commonly consumed/2013/6 to 12 years old.csv"),
    "Adolescents" : pd.read_csv("Top 30 most commonly consumed/2013/13 to 18 years old.csv"),
    "Adult" :pd.read_csv("Top 30 most commonly consumed/2013/19 to 59 years old.csv"),
    "Elderly": pd.read_csv("Top 30 most commonly consumed/2013/60 years old and above.csv")
                                
}

In [3]:
#adding age and survey year
diet = [diet_2008, diet_2013]
year = [2008, 2013]

for diet_year, survey_time in zip(diet, year):
    for key, value in diet_year.items():
            diet_year[key]["Age Group"] = key
            diet_year[key]["Survey Year"] = survey_time

In [4]:
#concatenate dataframes
dietary_survey = []

for national_survey in diet:
    for key, value in national_survey.items():
        dietary_survey.append(national_survey[key])
        
commonly_consumed = pd.concat(dietary_survey, axis = 0).reindex()

# Feature Engineering

Now the data set has been concatenated and added Survey Year and Age Group to easily pull out any subset from the data, next will be feature engineering. After some research, gaining weight comes from several factors other than calories, such as having too  much: Fat, Sugar, and Salt and these nutrients mainly come from tertiary processed food. 

Tertiary processed food, or convenience food, are types of food that have been heavily processed and have added a lot of sugar, salt, or fat to improve taste and/or lengthen shelf life. 

This as  well as counting the number of fruits and vegetables, since these types of foods have high amount of vitamins and minerals.

In [5]:
#Check if the food is highly processed or not
Tertiary_Processed = {
    'Rice, white, well milled': "Whole",
    'Okra': "Whole",
    'Sugar': "Processed",
    'Cooking oil (coconut)': "Processed",
    'Breads': "Processed",
    'Egg, chicken, whole (itlog, manok, buo)': "Whole",
    'Milk, filled, powder (gatas, filled, pulbos)': "Processed",
    'Noodles, instant': "Processed",
    'Cookies': "Processed",
    'Chocolate drink, powder': "Processed",
    'Crackers': "Processed",
    'Noodles, not instant': "Whole",
    'Coffee, instant': "Processed",
    'Rice gruel': "Whole",
    'Squash, fruit (kalabasa, bunga)': "Whole",
    'Banana, saba': "Whole",
    'Softdrinks': "Processed",
    'Scad, round (galunggong)': "Whole",
    'Cabbage (repolyo)': "Whole",
    'Milkfish, fresh (bangus)': "Whole",
    'Ampalaya': "Whole",
    'Sardines, in tomato sauce, canned': "Whole",
    'Horseradish leaves (malunggay, dahon)': "Whole",
    'Sausage, hotdog': "Processed",
    'Coconut cream (niyog, kakang gata)': "Processed",
    'String beans (sitaw, bunga, berde)': "Whole",
    'Cakes': "Processed",
    'Snack Foods (Chips, Curls)': "Processed",
    'Pork, boston butt, lean (baboy, paypay, laman)': "Whole",
    'Tilapia, fresh (tilapia)': "Whole",
    'Pork, belly, less fat (baboy, liempo, malaman)': "Whole",
    'Indian Sardine, dried': "Whole",
    'Milk, for growing up': "Processed",
    'Eggplant (talong)': "Whole",
    'Mungbean, seed, green, dried': "Whole",
    'Sweet potato leaves (kamote, dahon)': "Whole",
    'Fruit juice drink': "Processed",
    'Carrot': "Whole",
    'Coffee, creamer & sugar (3-in-1)': "Processed",
    'Chicken': "Whole",
    'Soy sauce': "Processed",
    'Vinegar': "Whole",
    'Pasta': "Whole",
    'Tomato sauce': "Processed",
    'Potato (patatas)': "Whole",
    'Swamp cabbage, leaves (kangkong, dahon)': "Whole"
}

#Check if the food is high on sugar, salt, or fat
High_Sugar = {
    'Rice, white, well milled': "No",
    'Okra': "No",
    'Sugar': "Yes",
    'Cooking oil (coconut)': "No",
    'Breads': "Yes",
    'Egg, chicken, whole (itlog, manok, buo)': "No",
    'Milk, filled, powder (gatas, filled, pulbos)': "Yes",
    'Noodles, instant': "No",
    'Cookies': "Yes",
    'Chocolate drink, powder': "Yes",
    'Crackers': "No",
    'Noodles, not instant': "No",
    'Coffee, instant': "Yes",
    'Rice gruel': "No",
    'Squash, fruit (kalabasa, bunga)': "No",
    'Banana, saba': "No",
    'Softdrinks': "Yes",
    'Scad, round (galunggong)': "No",
    'Cabbage (repolyo)': "No",
    'Milkfish, fresh (bangus)': "No",
    'Ampalaya': "No",
    'Sardines, in tomato sauce, canned': "No",
    'Horseradish leaves (malunggay, dahon)': "No",
    'Sausage, hotdog': "No",
    'Coconut cream (niyog, kakang gata)': "Yes",
    'String beans (sitaw, bunga, berde)': "No",
    'Cakes': "Yes",
    'Snack Foods (Chips, Curls)': "No",
    'Pork, boston butt, lean (baboy, paypay, laman)': "No",
    'Tilapia, fresh (tilapia)': "No",
    'Pork, belly, less fat (baboy, liempo, malaman)': "No",
    'Indian Sardine, dried': "No",
    'Milk, for growing up': "Yes",
    'Eggplant (talong)': "No",
    'Mungbean, seed, green, dried': "No",
    'Sweet potato leaves (kamote, dahon)': "No",
    'Fruit juice drink': "Yes",
    'Carrot': "No",
    'Coffee, creamer & sugar (3-in-1)': "No",
    'Chicken': "No",
    'Soy sauce': "No",
    'Vinegar': "No",
    'Pasta': "No",
    'Tomato sauce': "Yes",
    'Potato (patatas)': "No",
    'Swamp cabbage, leaves (kangkong, dahon)': "No"
}
High_Salt = {
    'Rice, white, well milled': "No",
    'Okra': "No",
    'Sugar': "No",
    'Cooking oil (coconut)': "No",
    'Breads': "Yes",
    'Egg, chicken, whole (itlog, manok, buo)': "No",
    'Milk, filled, powder (gatas, filled, pulbos)': "No",
    'Noodles, instant': "Yes",
    'Cookies': "Yes",
    'Chocolate drink, powder': "No",
    'Crackers': "Yes",
    'Noodles, not instant': "Yes",
    'Coffee, instant': "No",
    'Rice gruel': "No",
    'Squash, fruit (kalabasa, bunga)': "No",
    'Banana, saba': "No",
    'Softdrinks': "No",
    'Scad, round (galunggong)': "No",
    'Cabbage (repolyo)': "No",
    'Milkfish, fresh (bangus)': "No",
    'Ampalaya': "No",
    'Sardines, in tomato sauce, canned': "Yes",
    'Horseradish leaves (malunggay, dahon)': "No",
    'Sausage, hotdog': "Yes",
    'Coconut cream (niyog, kakang gata)': "No",
    'String beans (sitaw, bunga, berde)': "No",
    'Cakes': "Yes",
    'Snack Foods (Chips, Curls)': "Yes",
    'Pork, boston butt, lean (baboy, paypay, laman)': "No",
    'Tilapia, fresh (tilapia)': "No",
    'Pork, belly, less fat (baboy, liempo, malaman)': "No",
    'Indian Sardine, dried': "No",
    'Milk, for growing up': "No",
    'Eggplant (talong)': "No",
    'Mungbean, seed, green, dried': "No",
    'Sweet potato leaves (kamote, dahon)': "No",
    'Fruit juice drink': "No",
    'Carrot': "No",
    'Coffee, creamer & sugar (3-in-1)': "No",
    'Chicken': "No",
    'Soy sauce': "Yes",
    'Vinegar': "No",
    'Pasta': "No",
    'Tomato sauce': "Yes",
    'Potato (patatas)': "No",
    'Swamp cabbage, leaves (kangkong, dahon)': "No"
}
High_Fat = {
    'Rice, white, well milled': "No",
    'Okra': "No",
    'Sugar': "No",
    'Cooking oil (coconut)': "Yes",
    'Breads': "No",
    'Egg, chicken, whole (itlog, manok, buo)': "No",
    'Milk, filled, powder (gatas, filled, pulbos)': "No",
    'Noodles, instant': "Yes",
    'Cookies': "No",
    'Chocolate drink, powder': "No",
    'Crackers': "No",
    'Noodles, not instant': "No",
    'Coffee, instant': "No",
    'Rice gruel': "No",
    'Squash, fruit (kalabasa, bunga)': "No",
    'Banana, saba': "No",
    'Softdrinks': "No",
    'Scad, round (galunggong)': "No",
    'Cabbage (repolyo)': "No",
    'Milkfish, fresh (bangus)': "No",
    'Ampalaya': "No",
    'Sardines, in tomato sauce, canned': "No",
    'Horseradish leaves (malunggay, dahon)': "No",
    'Sausage, hotdog': "Yes",
    'Coconut cream (niyog, kakang gata)': "Yes",
    'String beans (sitaw, bunga, berde)': "No",
    'Cakes': "No",
    'Snack Foods (Chips, Curls)': "Yes",
    'Pork, boston butt, lean (baboy, paypay, laman)': "Yes",
    'Tilapia, fresh (tilapia)': "No",
    'Pork, belly, less fat (baboy, liempo, malaman)': "Yes",
    'Indian Sardine, dried': "No",
    'Milk, for growing up': "No",
    'Eggplant (talong)': "No",
    'Mungbean, seed, green, dried': "No",
    'Sweet potato leaves (kamote, dahon)': "No",
    'Fruit juice drink': "No",
    'Carrot': "No",
    'Coffee, creamer & sugar (3-in-1)': "No",
    'Chicken': "No",
    'Soy sauce': "No",
    'Vinegar': "No",
    'Pasta': "No",
    'Tomato sauce': "No",
    'Potato (patatas)': "No",
    'Swamp cabbage, leaves (kangkong, dahon)': "No"
}

#check if the food is a vegetable or a fruit
Fruit_Vegetable = {
    'Rice, white, well milled': "No",
    'Okra': "Yes",
    'Sugar': "No",
    'Cooking oil (coconut)': "No",
    'Breads': "No",
    'Egg, chicken, whole (itlog, manok, buo)': "No",
    'Milk, filled, powder (gatas, filled, pulbos)': "No",
    'Noodles, instant': "No",
    'Cookies': "No",
    'Chocolate drink, powder': "No",
    'Crackers': "No",
    'Noodles, not instant': "No",
    'Coffee, instant': "No",
    'Rice gruel': "No",
    'Squash, fruit (kalabasa, bunga)': "Yes",
    'Banana, saba': "Yes",
    'Softdrinks': "No",
    'Scad, round (galunggong)': "No",
    'Cabbage (repolyo)': "Yes",
    'Milkfish, fresh (bangus)': "No",
    'Ampalaya': "Yes",
    'Sardines, in tomato sauce, canned': "No",
    'Horseradish leaves (malunggay, dahon)': "Yes",
    'Sausage, hotdog': "No",
    'Coconut cream (niyog, kakang gata)': "No",
    'String beans (sitaw, bunga, berde)': "Yes",
    'Cakes': "No",
    'Snack Foods (Chips, Curls)': "No",
    'Pork, boston butt, lean (baboy, paypay, laman)': "No",
    'Tilapia, fresh (tilapia)': "No",
    'Pork, belly, less fat (baboy, liempo, malaman)': "No",
    'Indian Sardine, dried': "No",
    'Milk, for growing up': "No",
    'Eggplant (talong)': "Yes",
    'Mungbean, seed, green, dried': "Yes",
    'Sweet potato leaves (kamote, dahon)': "Yes",
    'Fruit juice drink': "No",
    'Carrot': "Yes",
    'Coffee, creamer & sugar (3-in-1)': "No",
    'Chicken': "No",
    'Soy sauce': "No",
    'Vinegar': "No",
    'Pasta': "No",
    'Tomato sauce': "No",
    'Potato (patatas)': "Yes",
    'Swamp cabbage, leaves (kangkong, dahon)': "Yes"
}

In [6]:
added_features = ["Processed/Whole", "High Sugar", "High Salt", "High Fat", "Fruit/Vegetable"]
engineer_feature = [Tertiary_Processed, High_Sugar, High_Salt, High_Fat, Fruit_Vegetable]

for add, engineered in zip(added_features, engineer_feature):
    commonly_consumed[add] = commonly_consumed["Food Item"].map(engineered)

In [7]:
commonly_consumed.to_csv("commonly_consumed.csv", index = False)