In [1]:
import pathlib
import pandas as pd
import numpy as np
import plotly
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

path_dataset = pathlib.Path("../dataset/")

# read in the cleaned data
df_after = pd.read_csv(path_dataset/"recipenlg-cookies_cleaned.csv")
col_ingd_cup = [col for col in df_after.columns if "(c.)" in col]
df_after = df_after[df_after.to_drop == 0].reset_index(drop=True)[["title", "ingredients", "link"] + col_ingd_cup]
df_after["chocolate chips (c.)"] = df_after["chocolate chips (c.)"].astype("float")
df_after[col_ingd_cup] = df_after[col_ingd_cup].fillna(0)

# convert each row from cup measurements to percentage
df_after_pct = df_after[col_ingd_cup].div(df_after[col_ingd_cup].sum(axis=1), axis=0)
df_after_pct.rename(columns=lambda col: col.replace("(c.)", "(%)"), inplace=True)
col_ingd_pct = df_after_pct.columns
df_after = pd.concat([df_after, df_after_pct], axis=1)
del df_after_pct
df_after.head()

Unnamed: 0,title,ingredients,link,white sugar (c.),brown sugar (c.),flour (c.),egg (c.),vanilla (c.),chocolate chips (c.),salt (c.),...,butter (%),baking powder (%),nuts (%),shortening (%),margarine (%),milk (%),water (%),oats (%),cinnamon (%),peanut butter (%)
0,Chicago Crunchy Chocolate Chip Cookies,"['3 1/2 c. flour', '3 tsp. baking soda', '1 c....",www.cookbooks.com/Recipe-Details.aspx?id=758790,1.0,1.0,3.5,0.2,0.125,1.5,0.0208,...,0.055736,0.0,0.0,0.0,0.0,0.006967,0.0,0.111473,0.0,0.0
1,Oatmeal Chocolate Chip Cookies,"['1 c. butter flavored shortening', '1 1/4 c. ...",www.cookbooks.com/Recipe-Details.aspx?id=288543,0.5,1.25,1.75,0.4,0.0417,2.0,0.0,...,0.0,0.0,0.123648,0.123648,0.0,0.015456,0.0,0.0,0.0,0.0
2,Chocolate Chip Cookies,"['1 1/2 c. salad oil', '1 c. sugar', '2 c. bro...",www.cookbooks.com/Recipe-Details.aspx?id=13529,1.0,2.0,4.5,0.8,0.0417,2.0,0.0417,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,The Best Chocolate Chip Cookies,"['1/2 lb. real butter', '1 c. brown sugar', '1...",www.cookbooks.com/Recipe-Details.aspx?id=1050198,1.0,1.0,2.0,0.4,0.0208,4.0,0.0208,...,0.105449,0.002197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,White Chocolate Chip Cookies,"['2 1/4 c. flour, sifted', '1 tsp. baking soda...",www.cookbooks.com/Recipe-Details.aspx?id=997342,0.75,0.75,2.25,0.6,0.0,1.5,0.0208,...,0.130863,0.0,0.098147,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
df_avg = df_after[col_ingd_pct].mean(axis=0).sort_values(ascending=False).reset_index()
df_avg.columns = ["ingredient", "pct"]
df_avg["ingredient"] = [item.replace(" (%)", "") for item in df_avg.ingredient]

ingd_to_cate = pd.DataFrame(
    np.array([
        ["white sugar", "sweeteners"],
        ["brown sugar", "sweeteners"],
        ["flour", "flour"],
        ["egg", "eggs"],
        ["vanilla", "flavorings"],
        ["chocolate chips", "toppings"],
        ["salt", "flavorings"],
        ["baking soda", "leaveners"],
        ["butter", "fats"],
        ["baking powder", "leaveners"],
        ["nuts", "toppings"],
        ["shortening", "fats"],
        ["margarine", "fats"],
        ["milk", "liquids"],
        ["water", "liquids"],
        ["oats", "toppings"],
        ["cinnamon", "flavorings"],
        ["peanut butter", "flavorings"]
    ]), columns=["ingredient", "category"]
)

df_avg = df_avg.join(ingd_to_cate.set_index("ingredient"), on="ingredient")
df_avg.head()

Unnamed: 0,ingredient,pct,category
0,flour,0.329321,flour
1,chocolate chips,0.214697,toppings
2,brown sugar,0.107879,sweeteners
3,white sugar,0.09123,sweeteners
4,butter,0.064508,fats


## Sunburst chart (multilevel pie charts)

> Reference: [tutorial](https://plotly.com/python/sunburst-charts/)

In [65]:
fig = px.sunburst(
    df_avg.assign(pct=df_avg.pct*100),
    path=["category", "ingredient"],
    values="pct",
    color="category",
    color_discrete_sequence=px.colors.qualitative.Antique,
    title="Chocolate chip cookies",
    custom_data=["pct"],
    width=600, height=600
)
fig.update_traces(
    hovertemplate='%{label}<br>pct=%{customdata[0]:5.2f}%<extra></extra>',
    textinfo="percent entry+label"
)
fig.show()

## Histogram

In [67]:
df_after[col_ingd_pct].head()

Unnamed: 0,white sugar (%),brown sugar (%),flour (%),egg (%),vanilla (%),chocolate chips (%),salt (%),baking soda (%),butter (%),baking powder (%),nuts (%),shortening (%),margarine (%),milk (%),water (%),oats (%),cinnamon (%),peanut butter (%)
0,0.111473,0.111473,0.390155,0.022295,0.013934,0.167209,0.002319,0.006967,0.055736,0.0,0.0,0.0,0.0,0.006967,0.0,0.111473,0.0,0.0
1,0.061824,0.15456,0.216383,0.049459,0.005156,0.247295,0.0,0.002572,0.0,0.0,0.123648,0.123648,0.0,0.015456,0.0,0.0,0.0,0.0
2,0.095922,0.191845,0.431651,0.076738,0.004,0.191845,0.004,0.004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.105449,0.105449,0.210899,0.04218,0.002193,0.421797,0.002193,0.002193,0.105449,0.002197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.098147,0.098147,0.294441,0.078518,0.0,0.196294,0.002722,0.002722,0.130863,0.0,0.098147,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
from ipywidgets import interact, Dropdown

geo = {'USA':['CHI','NYC'],'Russia':['MOW','LED']}
countryW = Dropdown(options = geo.keys())
cityW = Dropdown()

@interact(country = countryW, city = cityW)
def print_city(country, city):
    cityW.options = geo[country] # Here is the trick, i.e. update cityW.options based on country, namely countryW.value.
    print(country, city)

interactive(children=(Dropdown(description='country', options=('USA', 'Russia'), value='USA'), Dropdown(descri…

In [23]:
from ipywidgets import interact, Dropdown
dict_cate = dict()
for cate in set(ingd_to_cate.category):
    dict_cate[cate] = ingd_to_cate.ingredient.loc[ingd_to_cate.category == cate].tolist()

cateWidget = Dropdown(options = dict_cate.keys())

@interact(category=cateWidget)
def plot_hist(category=cateWidget):
    fig = go.Figure()
    for ingd in ingd_to_cate.ingredient[ingd_to_cate.category==category]:
        fig.add_trace(go.Histogram(
            x=df_after[f"{ingd} (%)"],
            name=ingd,
            xbins=dict(start=0)
        ))

    fig.update_layout(
        bargap=0.2,
        #barmode='stack',
        width=800,
        height=500
    )
    fig.update_traces(opacity=0.75)
    fig.show()

interactive(children=(Dropdown(description='category', options=('sweeteners', 'leaveners', 'flour', 'liquids',…