In [None]:
# Standard libraries
import sys
import os
import json
import importlib
import string
import math
import re
import pprint
# Third party libraries
import pandas as pd
import numpy as np
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

pp = pprint.PrettyPrinter(indent=2, width=100)
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
plotly.__version__

In [2]:
df1 = pd.read_excel("./chronicgdayconsumers.xlsx", sheet_name = 'L1_Consumers_only_g_day', skiprows=[0,1])

In [3]:
df1.head(20)
# df1.info()

Unnamed: 0,Country,Survey,Pop Class,Foodex L1,Metrics,Nr Consumers,% Consumers,Mean,STD,P5,P10,Median,P95,P97.5,P99,Comment
0,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Grains and grain-based products,A.01.000001,305.0,0.99026,248.263443,120.639687,75.0,100.0,241.0,477.25,527.75,578.25,
1,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Vegetables and vegetable products (including f...,A.01.000317,246.0,0.798701,112.080081,92.801409,10.0,20.0,93.6875,303.0,356.5,431.0,
2,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Starchy roots and tubers,A.01.000467,109.0,0.353896,88.383028,53.13549,32.5,35.0,70.5,219.5,243.75,250.0,
3,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,"Legumes, nuts and oilseeds",A.01.000486,71.0,0.230519,34.176056,40.927405,3.0,5.0,20.0,100.0,190.5,270.0,
4,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Fruit and fruit products,A.01.000544,263.0,0.853896,191.564544,149.530116,25.0,35.0,152.5,447.0,554.25,809.0,
5,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Meat and meat products (including edible offal),A.01.000727,251.0,0.814935,101.658665,92.67572,12.5,21.5,86.0,258.0,320.5,407.5,
6,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,"Fish and other seafood (including amphibians, ...",A.01.000876,64.0,0.207792,75.154687,43.44134,10.5,15.0,77.75,142.5,148.0,211.0,
7,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Milk and dairy products,A.01.000948,294.0,0.954545,157.444048,137.73337,20.0,25.0,123.5,390.0,528.5,637.5,
8,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Eggs and egg products,A.01.001252,33.0,0.107143,31.515152,10.36331,15.0,30.0,30.0,60.0,60.0,60.0,
9,Austria,Austrian Study on Nutritional Status 2010-12 -...,Adults,Sugar and confectionary,A.01.001267,223.0,0.724026,24.166749,24.008705,2.5,4.15,16.65,74.5,88.8,114.5,


### Function call

In [8]:
country_key = 'Austria'
population_key = 'Adults'
L1_key = 'Grains and grain-based products'

## Call Function 
##--------------------------------------------------------
plot_by_L1_and_population(df1, L1_key, population_key, display_mean = False)

###  `plot_by_L1_and_population()` function

In [5]:
def plot_by_L1_and_population(df, L1_key, population_key, display_mean = False):
    '''
    df            :  L1 dataframe from spreeadsheet
    L1_key        :  L1 Food group to filter on
    population_key:  Population Class to filter on 
    display_mean  :  Display mean trend
    '''
    population_filter = (df1['Pop Class'] == population_key)
    L1_filter         = (df1['Foodex L1'] == 'Grains and grain-based products')
    stats             = df1[L1_filter & population_filter]
    g_stats = stats.groupby('Country', sort = False).max()
#     g_stats

    y_vals = {}
    for country in g_stats.index:
        y_vals[country] = [ g_stats.loc[country][i] for i in ['P5', 'P10','P10','Median','P95','P95','P99']]
#         print(country, ' : ', y_vals[country])
    results = pd.DataFrame(y_vals)
#     results.head(20)

#     print(L1_key)
    data = []
    for col in results.columns:
        data.append(  go.Box( y=results[col], name=col, showlegend=False ) )

    if display_mean:
        data.append( go.Scatter( x = results.columns, y = results.mean(), mode='lines', name='mean' ) )
        
    layout = go.Layout(
        title='Consumption of '+L1_key+' for '+population_key,
        xaxis=dict(
            title='x Axis',
            titlefont=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        ),
        yaxis=dict(
            title='y Axis',
            titlefont=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=data, layout=layout)

    # IPython notebook
    return py.iplot(fig, filename='pandas-box-plot')
    
    # If want to return in Python"
    # url = py.plot(data, filename='pandas-box-plot')
    

In [None]:
#df2.head()
#df3.head()
#df4.head()
print(df1.columns)
L1_foodgroups = df1['Foodex L1' ].unique()
print('L1 Food Groups ', len(L1_foodgroups))
pp.pprint(L1_foodgroups)
print(df1['Survey'].unique())