In [125]:
# Standard libraries
import sys
import os
import json
import importlib
import string
import math
import re
import pprint
# Third party libraries
import pandas as pd
import numpy as np
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

pp = pprint.PrettyPrinter(indent=2, width=100)
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
print('Plotly ver: ', plotly.__version__)
print('Pandas ver: ', pd.__version__)

Plotly ver:  3.2.1
Pandas ver:  0.23.4


## Initialization Function  - read the first sheet of the spreadsheet 
Run in startup before calling functions 

In [2]:
df1 = pd.read_excel("./chronicgdayconsumers.xlsx", sheet_name = 'L1_Consumers_only_g_day', header = 2)

In [123]:
# df1.head(20)
# df1.info()

##  `plot_by_L1_and_population()` function

In [4]:
def plot_by_L1_and_population(df, L1_key, population_key, display_mean = False):
    '''
    df            :  L1 dataframe from spreeadsheet
    L1_key        :  L1 Food group to filter on
    population_key:  Population Class to filter on 
    display_mean  :  Display mean trend
    '''
    population_filter = (df['Pop Class'] == population_key)
    L1_filter         = (df['Foodex L1'] == 'Grains and grain-based products')
    stats             = df[L1_filter & population_filter]
    g_stats = stats.groupby('Country', sort = False).max()
#     g_stats

    y_vals = {}
    for country in g_stats.index:
        y_vals[country] = [ g_stats.loc[country][i] for i in ['P5', 'P10','P10','Median','P95','P95','P99']]
#         print(country, ' : ', y_vals[country])
    results = pd.DataFrame(y_vals)
#     results.head(20)

#     print(L1_key)
    data = []
    for col in results.columns:
        data.append(  go.Box( y=results[col], name=col, showlegend=False ) )

    if display_mean:
        data.append( go.Scatter( x = results.columns, y = results.mean(), mode='lines', name='mean' ) )
        
    layout = go.Layout(
        title='Consumption of '+L1_key+' for '+population_key,
        xaxis=dict(
            title='x Axis',
            titlefont=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        ),
        yaxis=dict(
            title='y Axis',
            titlefont=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=data, layout=layout)

    # IPython notebook
    return py.iplot(fig, filename='pandas-box-plot')
    
    # If want to return in Python"
    # url = py.plot(data, filename='pandas-box-plot')
    


### Function call for `plot_by_L1_and_Population`

In [5]:
country_key = 'Austria'
population_key = 'Adults'
L1_key = 'Grains and grain-based products'

## Call Function 
##--------------------------------------------------------
plot_by_L1_and_population(df1, L1_key, population_key, display_mean = False)

##  `sum_consumption_by_all_countries()` function

In [117]:
def sum_consumption_by_all_countries(df):
    g_stats = df1.groupby(['Country', 'Survey', 'Pop Class'], sort = False)['Mean'].sum().groupby(['Country', 'Pop Class'], sort = False).mean()
    return g_stats.round(0)

### Function call for `sum_consumption_by_all_countries()`

In [121]:
## Call Function 
##--------------------------------------------------------
answer = sum_consumption_by_all_countries(df1)
print(answer)

Country         Pop Class      
Austria         Adults             3839.0
                Elderly            3058.0
                Very elderly       3545.0
                Other children     2410.0
                Adolescents        2089.0
Belgium         Adolescents        3487.0
                Adults             3614.0
                Elderly            3242.0
                Very elderly       2975.0
                Toddlers           1799.0
                Other children     1867.0
Bulgaria        Infants            1257.0
                Toddlers           1626.0
                Other children     1904.0
Cyprus          Adolescents        1740.0
Czech Republic  Other children     2421.0
                Adolescents        3237.0
                Adults             4246.0
Germany         Infants            1265.0
                Toddlers           1510.0
                Other children     1919.0
                Adolescents        3043.0
                Adults             4466.0
  

### Example of retrieving information for one country

In [122]:
print(answer['Austria'].to_dict())
# print(answer['Toddlers'])

{'Adults': 3839.0, 'Very elderly': 3545.0, 'Elderly': 3058.0, 'Other children': 2410.0, 'Adolescents': 2089.0}


## Misc Code

In [None]:
def sum_consumption_by_country(df, country_key):
    country_filter = (df['Country'] == country_key)
    g_stats = df[country_filter].groupby(['Country', 'Survey', 'Pop Class'], sort = False)['Mean'].sum().groupby('Pop Class', sort = False).mean().sort_index()
    return g_stats.round(0)


In [119]:
## Call Function 
##--------------------------------------------------------
answer = sum_consumption_by_country(df1, 'Denmark')
print(answer.to_dict())
print(answer['Toddlers'])

{'Adults': 3745.0, 'Toddlers': 1482.0, 'Infants': 1253.0, 'Very elderly': 3516.0, 'Adolescents': 2670.0, 'Elderly': 3512.0, 'Other children': 2176.0}
1482.0


In [None]:
#df2.head()
#df3.head()
#df4.head()
print(df1.columns)
L1_foodgroups = df1['Foodex L1' ].unique()
print('L1 Food Groups ', len(L1_foodgroups))
pp.pprint(L1_foodgroups)
print(df1['Survey'].unique())

In [79]:
# df1[ (df1['Country'] == 'Denmark') & (df1['Pop Class'] =='Very elderly') ]

In [86]:
pd.options.display.max_rows = 100
# df1[ (df1['Country'] == 'Denmark') & (df1['Survey'] == 'Danish Dietary Survey')][['Pop Class','Nr Consumers', 'Foodex L1']]      ##.groupby('Survey').max()
g_stats = df1.groupby(['Country', 'Survey', 'Pop Class'], sort = False)['Mean'].sum().groupby(['Country', 'Pop Class'], sort = False).mean()
g_stats

Country         Pop Class      
Austria         Adults             3839.437782
                Elderly            3057.579458
                Very elderly       3544.986866
                Other children     2409.847723
                Adolescents        2088.809800
Belgium         Adolescents        3486.831708
                Adults             3613.969194
                Elderly            3241.531193
                Very elderly       2975.445205
                Toddlers           1798.948146
                Other children     1867.366346
Bulgaria        Infants            1257.148264
                Toddlers           1625.762263
                Other children     1904.010407
Cyprus          Adolescents        1740.196706
Czech Republic  Other children     2420.936852
                Adolescents        3237.228492
                Adults             4245.855166
Germany         Infants            1264.790288
                Toddlers           1510.009145
                Other childr

In [37]:
answer.groupby('Pop Class', sort = False).mean()

Unnamed: 0_level_0,Nr Consumers,% Consumers,Mean,STD,P5,P10,Median,P95,P97.5,P99
Pop Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Other children,6048.0,15.376962,2175.527104,1296.8472,591.170571,808.258643,1918.410464,4620.230071,5286.549929,6479.363
Adolescents,6604.0,15.437793,2670.35711,1946.447901,528.753726,768.606071,2201.432393,6349.908929,7657.122464,9999.237714
Adults,36479.0,16.041058,3745.038567,2668.616688,628.394405,985.688857,3194.205179,8620.107857,10348.450143,12585.850143
Elderly,4667.5,16.032823,3511.660884,2374.17332,709.479214,1044.611571,2992.090964,7783.178786,9593.492429,11106.734429
Very elderly,250.5,15.741667,3516.182687,2162.502378,870.488143,1178.561071,3146.006036,7613.816786,8328.061786,8328.061786
Infants,10652.0,12.895884,1252.9266,992.12396,124.130143,233.431571,1038.443857,3249.442,3684.190714,4441.557857
Toddlers,13680.0,14.918212,1481.534565,863.0298,447.648714,616.361571,1306.977286,3016.441,3495.953857,4195.454714
