This is a notebook for exploring the data.

In [2]:
import pandas as pd 
from gfdx.containers import structures
import pprint

printer = pprint.PrettyPrinter(indent=4)

In [65]:
target_cols = [
    "country_code",
    "redcap_event_name",
    "redcap_repeat_instrument",
    "redcap_repeat_instance",
    "standard_nutrient",
    "nutrient_compound",
    "nutrient_level",
]
df = pd.read_csv("../data/gfdx.csv", usecols=target_cols)

new_cols = [
    "country",
    "food",
    "instrument",
    "repeat",
    "nutrient",
    "nutrient_level",
    "compound",
]
df.columns = new_cols

target_foods = {"maize_flour_arm_1", "wheat_flour_arm_1", "salt_arm_1"}
df = df[(df.food.isin(target_foods)) & (df.instrument == "nutrients_compounds")]
# print(df.head())


df["food"] = df.food.apply(lambda x: x.split("_")[0])

In [66]:
df[df.country == 100.0]

Unnamed: 0,country,food,instrument,repeat,nutrient,nutrient_level,compound
37716,100.0,salt,nutrients_compounds,1.0,6.0,25.0,33.0
37717,100.0,salt,nutrients_compounds,2.0,7.0,975.0,50.0
37718,100.0,salt,nutrients_compounds,3.0,7.0,975.0,55.0
37723,100.0,wheat,nutrients_compounds,1.0,1.0,2.0,2.0
37724,100.0,wheat,nutrients_compounds,2.0,1.0,2.0,3.0
37725,100.0,wheat,nutrients_compounds,3.0,2.0,0.001,4.0
37726,100.0,wheat,nutrients_compounds,4.0,2.0,0.001,5.0
37727,100.0,wheat,nutrients_compounds,5.0,5.0,0.1,29.0
37728,100.0,wheat,nutrients_compounds,6.0,7.0,35.25,38.0
37729,100.0,wheat,nutrients_compounds,7.0,7.0,35.25,44.0


In [67]:
def build_data():
    data = []
    for country in df.country.unique():
        country_df = df[df.country == country]
        datum = structures.Country(code=country)
        foods = list(country_df.food.unique())
        for food in foods:
            food_item = structures.Food(name=food)
            food_df = country_df[country_df.food == food]
            nutrients = list(food_df.nutrient.unique())
            if any(nutrients):
                for nutrient in nutrients:
                    nutrient_df = food_df[food_df.nutrient == nutrient]
                    compounds = list(nutrient_df.compound.unique())
                    if any(compounds):
                        nutrient_item = structures.Nutrient(
                            name=nutrient,
                            compounds=[structures.Compound(name=c) for c in compounds],
                        )
                        food_item.nutrients.append(nutrient_item)
                        datum.foods.append(food_item)
        data.append(datum)
    return data


In [68]:
data = build_data()

In [78]:
data[0].foods

[Food(name=&#39;salt&#39;, nutrients=[Nutrient(name=&#39;6.0&#39;, compounds=[Compound(name=&#39;1.0&#39;)], recommendation_status=&#39;None&#39;)])]

In [70]:
import enum

class CompoundEnum(enum.Enum):
    Ferrous_Sulfate = 50
    Ferrous_Fumerate = 55
    NaFeEDTA = 64
    Electrolytic_Iron = 38
    Folic_Acid = 29
    Zinc_Oxide =  104
    Zinc_Sulfate = 105
    Cyanocobalamin = 4
    Vitamin_A_Palmitate = 86

    def pretty(self):
        return self.name.replace('_', ' ')


In [71]:
CompoundEnum(86).pretty()

&#39;Vitamin A Palmitate&#39;

In [72]:
{c.value for c in CompoundEnum}

{4, 29, 38, 50, 55, 64, 86, 104, 105}

In [73]:
def check_nutrient_compounds(nutrient: structures.Nutrient) -> str:
    who_recommendations = [c.value for c in CompoundEnum]
    if all(int(float(n.name)) in who_recommendations for n in nutrient.compounds):
        return 'All recommended'
    elif any(int(float(n.name)) in who_recommendations for n in nutrient.compounds):
        return 'Both'
    else:
        return 'None'


In [75]:
for food in data[1].foods:
    if food.name == 'wheat':
        printer.pprint([check_nutrient_compounds(n) for n in food.nutrients])
        #printer.pprint(all_comps)
        #printer.pprint(food.nutrients)
        print('***********')
    else:
        continue

[&#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;]
***********
[&#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;]
***********
[&#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;]
***********
[&#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;, &#39;All recommended&#39;]
***********


In [76]:
for country in data:
    for food in country.foods:
        for nutrient in food.nutrients:
            nutrient.recommendation_status = check_nutrient_compounds(nutrient)

In [77]:
printer.pprint(next((c for c in data if c.code == 100)).foods)

[   Food(name=&#39;salt&#39;, nutrients=[Nutrient(name=&#39;6.0&#39;, compounds=[Compound(name=&#39;33.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;7.0&#39;, compounds=[Compound(name=&#39;50.0&#39;), Compound(name=&#39;55.0&#39;)], recommendation_status=&#39;All recommended&#39;)]),
    Food(name=&#39;salt&#39;, nutrients=[Nutrient(name=&#39;6.0&#39;, compounds=[Compound(name=&#39;33.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;7.0&#39;, compounds=[Compound(name=&#39;50.0&#39;), Compound(name=&#39;55.0&#39;)], recommendation_status=&#39;All recommended&#39;)]),
    Food(name=&#39;wheat&#39;, nutrients=[Nutrient(name=&#39;1.0&#39;, compounds=[Compound(name=&#39;2.0&#39;), Compound(name=&#39;3.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;2.0&#39;, compounds=[Compound(name=&#39;4.0&#39;), Compound(name=&#39;5.0&#39;)], recommendation_status=&#39;Both&#39;), Nutrient(name=&#39;5.0&#39;, compounds=[Compound(name=&#39;29.0&

In [92]:
country_reference = pd.read_csv('../data/code-name.csv')
country_reference.head()
country_dict = dict(zip(country_reference.country_code, country_reference.country_name))
country_reference.head()

Unnamed: 0,country_code,country_name
0,1.0,Armenia
1,2.0,Afghanistan
2,3.0,Albania
3,4.0,Algeria
4,5.0,American Samoa


In [113]:
merged = country_reference.merge(df, how='inner', left_on=['country_name'], right_on=['COUNTRY'])
merged.head(10)

Unnamed: 0,country_code,country_name,COUNTRY,GDP (BILLIONS),CODE
0,1.0,Armenia,Armenia,10.88,ARM
1,2.0,Afghanistan,Afghanistan,21.71,AFG
2,3.0,Albania,Albania,13.4,ALB
3,4.0,Algeria,Algeria,227.8,DZA
4,5.0,American Samoa,American Samoa,0.75,ASM
5,6.0,Andorra,Andorra,4.8,AND
6,7.0,Angola,Angola,131.4,AGO
7,8.0,Antigua and Barbuda,Antigua and Barbuda,1.24,ATG
8,9.0,Argentina,Argentina,536.2,ARG
9,10.0,Australia,Australia,1483.0,AUS


In [123]:
merged.loc[7]

country_code                        8
country_name      Antigua and Barbuda
COUNTRY           Antigua and Barbuda
GDP (BILLIONS)                   1.24
CODE                              ATG
Name: 7, dtype: object

In [124]:
[d for d in data if d.code == 8]

[Country(code=8, name=&#39;Antigua and Barbuda&#39;, foods=[Food(name=&#39;wheat&#39;, nutrients=[Nutrient(name=&#39;3.0&#39;, compounds=[Compound(name=&#39;1.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;7.0&#39;, compounds=[Compound(name=&#39;1.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;8.0&#39;, compounds=[Compound(name=&#39;71.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;9.0&#39;, compounds=[Compound(name=&#39;72.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;11.0&#39;, compounds=[Compound(name=&#39;80.0&#39;)], recommendation_status=&#39;None&#39;)]), Food(name=&#39;wheat&#39;, nutrients=[Nutrient(name=&#39;3.0&#39;, compounds=[Compound(name=&#39;1.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;7.0&#39;, compounds=[Compound(name=&#39;1.0&#39;)], recommendation_status=&#39;None&#39;), Nutrient(name=&#39;8.0&#39;, compounds=[Compound(name=&#39;71.0&#39;)], recommendation_s

In [126]:
statuses = dict()

def get_wheat_status(row):
    country = next((d for d in data if d.code == row.country_code), 'No ass')
    if country == 'No ass':
        print(country)
        return country
    food = next((f for f in country.foods if f.name == 'wheat'), 'No ass')
    if food == 'No ass':
        print(food)
        return food
    iron = next((n for n in food.nutrients if n.name == '7.0'), 'No ass')
    if iron == 'No ass':
        print(iron)
        return iron    
    print(iron)
    return iron.recommendation_status

merged['recommendation'] = merged.apply(lambda row: get_wheat_status(row), axis=1)
merged.head()

No ass
name=&#39;7.0&#39; compounds=[Compound(name=&#39;64.0&#39;)] recommendation_status=&#39;All recommended&#39;
No ass
No ass
No ass
No ass
No ass
name=&#39;7.0&#39; compounds=[Compound(name=&#39;1.0&#39;)] recommendation_status=&#39;None&#39;
name=&#39;7.0&#39; compounds=[Compound(name=&#39;55.0&#39;)] recommendation_status=&#39;All recommended&#39;
No ass
No ass
name=&#39;7.0&#39; compounds=[Compound(name=&#39;65.0&#39;)] recommendation_status=&#39;None&#39;
name=&#39;7.0&#39; compounds=[Compound(name=&#39;1.0&#39;)] recommendation_status=&#39;None&#39;
name=&#39;7.0&#39; compounds=[Compound(name=&#39;1.0&#39;)] recommendation_status=&#39;None&#39;
No ass
No ass
No ass
name=&#39;7.0&#39; compounds=[Compound(name=&#39;50.0&#39;), Compound(name=&#39;55.0&#39;)] recommendation_status=&#39;All recommended&#39;
No ass
name=&#39;7.0&#39; compounds=[Compound(name=&#39;1.0&#39;)] recommendation_status=&#39;None&#39;
name=&#39;7.0&#39; compounds=[Compound(name=&#39;39.0&#39;), Compound(na

Unnamed: 0,country_code,country_name,COUNTRY,GDP (BILLIONS),CODE,recommendation
0,1.0,Armenia,Armenia,10.88,ARM,No ass
1,2.0,Afghanistan,Afghanistan,21.71,AFG,All recommended
2,3.0,Albania,Albania,13.4,ALB,No ass
3,4.0,Algeria,Algeria,227.8,DZA,No ass
4,5.0,American Samoa,American Samoa,0.75,ASM,No ass


In [140]:
import plotly.graph_objects as go

df2 = px.data.election()
gapminder = px.data.gapminder().query("year==2007")

dff=pd.merge(merged, gapminder, how='left', left_on='country_name', right_on='country')


fig = px.choropleth(dff, locations="iso_alpha",
                    color="recommendation", 
                    hover_name="COUNTRY", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.update_geos(
    visible=False, resolution=50,
    showcountries=True, countrycolor="RebeccaPurple"
)
fig.update_layout(height=300, margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


   country_code    country_name         COUNTRY  GDP (BILLIONS) CODE  \
0           1.0         Armenia         Armenia           10.88  ARM   
1           2.0     Afghanistan     Afghanistan           21.71  AFG   
2           3.0         Albania         Albania           13.40  ALB   
3           4.0         Algeria         Algeria          227.80  DZA   
4           5.0  American Samoa  American Samoa            0.75  ASM   

    recommendation      country continent    year  lifeExp         pop  \
0           No ass          NaN       NaN     NaN      NaN         NaN   
1  All recommended  Afghanistan      Asia  2007.0   43.828  31889923.0   
2           No ass      Albania    Europe  2007.0   76.423   3600523.0   
3           No ass      Algeria    Africa  2007.0   72.301  33333216.0   
4           No ass          NaN       NaN     NaN      NaN         NaN   

     gdpPercap iso_alpha  iso_num  
0          NaN       NaN      NaN  
1   974.580338       AFG      4.0  
2  5937.029526

In [None]:
import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

fig = go.Figure(data=go.Choropleth(
    locations = merged['CODE'],
    z = df['GDP (BILLIONS)'],
    text = df['COUNTRY'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'GDP<br>Billions US$',
))

fig.update_layout(
    title_text='2014 Global GDP',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)

fig.show()

In [89]:
import plotly.express as px

for d in data:
    d.name = country_dict[d.code]


TypeError: &#39;Country&#39; object is not subscriptable

In [91]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
df.head()

Unnamed: 0,COUNTRY,GDP (BILLIONS),CODE
0,Afghanistan,21.71,AFG
1,Albania,13.4,ALB
2,Algeria,227.8,DZA
3,American Samoa,0.75,ASM
4,Andorra,4.8,AND
