In [None]:
# Intalación de las librerías (Necesario en Colab)
!pip install dash==2.0.0
!pip install dash-html-components
!pip install dash-core-components
!pip install dash-table
!pip install jupyter-dash
!pip install dash-bootstrap-components
!pip install --upgrade plotly

In [None]:
#Descargamos los datsets

#Información de las recetas
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1iWapfGtNyn98RHnvX46wmI_ret8_TBYl' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1iWapfGtNyn98RHnvX46wmI_ret8_TBYl" -O clean_recipes_sample.pkl
#Información nutrimental
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XgWyi71W4Y7GFybFOMHQrkzRWrRCqn9X' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XgWyi71W4Y7GFybFOMHQrkzRWrRCqn9X" -O clean_nutrition_sample.pkl
#Información nutrimental rango 2
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1Q_hH3BpNiEfid3U0--Pbu884dryFicoO' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1Q_hH3BpNiEfid3U0--Pbu884dryFicoO" -O clean_nutrition2d_sample.pkl
#Información de los ingredientes rango 2
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1TlcJsXX-EiPANql4AA7rKelExawmJW4P' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1TlcJsXX-EiPANql4AA7rKelExawmJW4P" -O clean_ingredients2d_sample.pkl
#Borramos las cookies
!  rm -rf /tmp/cookies.txt


In [None]:
#Importamos las librerías necesarias
import dash
from dash import dash_table
from dash import dcc,html
from dash.dependencies import Input, Output 
import dash_bootstrap_components as dbc
from jupyter_dash import JupyterDash
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import seaborn as sns

# ReciPy
#### Jazmín López Chacón

En este archivo nos enfocaremos en la visualización.

In [None]:
#Cargamos los datos
recipes = pd.read_pickle('clean_recipes_sample.pkl')
ingr_reducedDF = pd.read_pickle('clean_ingredients2d_sample.pkl')
nutrition = pd.read_pickle('clean_nutrition_sample.pkl')
nutri_reducedDF = pd.read_pickle('clean_nutrition2d_sample.pkl')

In [None]:
#Definimos los colores
paleta = sns.color_palette('gist_heat',4)
colores = ['rgba'+str(tuple(np.append(255*np.array(c),0.65))) for c in paleta]

In [None]:
#Detalles de las columnas de la información nutrimental
cols_info =['calorías', 'grasas totales (PDV)', 'azúcares (PDV)', 'sodio (PDV)', 
            'proteína(PDV)', 'grasas saturadas (PDV)', 'carbohidratos (PDV)']
colsDict={key:val for key,val in zip(nutrition.columns[:-1],cols_info)}

In [None]:
# Función para hacer gráficas de cajas
def get_distribution(column,dataframe=None):
    # column: str, nombre de la columna de la que se verá la información
    # dataframe:str, nombre del dataframe que se va a considerar
    fig = go.Figure()

    full_title = 'Distribuciones de '+colsDict[column]+' de recetas de Food.com'

    if dataframe==None:
        df = nutrition.copy()
        df['name'] = recipes['name']
        fig.add_trace(go.Box(
            y=df[column],text=df.name,
            marker_color='goldenrod',
            name="Todas las recetas",
            customdata=df.index
            )                
        )



    else:
        base_subttl = "<br><sup>Agrupamiento de recetas de Food.com usando "
        if dataframe=='nutri':
            aux_df = nutri_reducedDF.copy()
            sub_ttl = "la información nutrimental"
        else:
            aux_df = ingr_reducedDF.copy()
            sub_ttl = "los ingredientes"

        for i in range(4):
                idxs = aux_df[aux_df.cluster==i].index
                df = nutrition.loc[idxs]
                df['name']=recipes['name']
                fig.add_trace(go.Box(
                    y=df[column],text=df.name,
                        name='Grupo '+str(i),
                        marker_color=colores[i],
                        customdata=df.index
                        )
                    )
                
        full_title += base_subttl + sub_ttl + "</sup>"
            
    fig.update_layout(title=full_title ,
                          yaxis=dict(title=column))
    
    return fig

In [None]:
#Función para hacer scatterplots
def crear_visualizacion(representacion, clasificacion):
    # representacion: str, data representation
    # clasificacion: str, clustering to look at
    fig = go.Figure()
    base_ttl = "Agrupamiento de recetas de Food.com usando "
    if representacion=='nutri':
        subtitle = 'Representación de la información nutrimental'
        if clasificacion=='nutri_cls':
            for i in range(4):
                df = nutri_reducedDF[nutri_reducedDF.cluster==i]
                df['name']=recipes.loc[df.index,'name']
                fig.add_trace(go.Scattergl(
                    x=df.x, y=df.y,text=df.name,
                    name='Grupo '+str(i),mode='markers',
                    marker_color=colores[i],
                    customdata=df.index
                    )
                )
            ttl = "la información nutrimental<br><sup>"+subtitle+"</sup>"
        else:
            for i in range(4):
                idx = ingr_reducedDF[ingr_reducedDF.cluster==i].index
                df = nutri_reducedDF.loc[idx]
                df['name']=recipes.loc[df.index,'name']
                fig.add_trace(go.Scattergl(
                    x=df.x, y=df.y,text=df.name,
                    name='Grupo '+str(i),mode='markers',
                    marker_color=colores[i],
                    customdata=df.index
                    )
                )
            
            ttl = "los ingredientes<br><sup>"+subtitle+"</sup>"

    else:
        subtitle = 'Representación de los ingredientes'
        if clasificacion=='nutri_cls':
            subtitle = 'Representación de la información nutrimental'
            for i in range(4):
                idx = nutri_reducedDF[nutri_reducedDF.cluster==i].index
                df = ingr_reducedDF.loc[idx]
                df['name']=recipes.loc[df.index,'name']
                fig.add_trace(go.Scattergl(
                    x=df.x, y=df.y,text=df.name,
                    name='Grupo '+str(i),mode='markers',
                    marker_color=colores[i],
                    customdata=df.index
                    )
                )
            ttl = "los ingredientes<br><sup>"+subtitle+"</sup>"
        else:

            for i in range(4):
                df = ingr_reducedDF[ingr_reducedDF.cluster==i]
                df['name']=recipes.loc[df.index,'name']
                fig.add_trace(go.Scattergl(
                    x=df.x, y=df.y,text=df.name,
                    name='Grupo '+str(i),mode='markers',
                    marker_color=colores[i],
                    customdata=df.index
                    )
                )
            
            ttl = "los ingredientes<br><sup>"+subtitle+"</sup>"
    
    fig.update_layout(
            title=base_ttl+ttl,
            xaxis=dict(title='Componente Principal 1'),
            yaxis=dict(title='Componente Principal 2')
            )
    return fig

In [None]:
#Controles de los scatter plots
scatter_controls = dbc.Card(
    [
        html.Div(
            [
                dbc.Label("Representación de recetas"),
                dcc.RadioItems(
                    id="repre",
                    options=[{"label":'Información Nutrimental', "value":'nutri'},
                             {"label":'Ingredientes', "value":'ingr'}],
                    labelStyle={'display': 'block'},
                    value="nutri",
                    style={"margin-bottom": "15px"},
                ),
            ]
        ),
        html.Div(
            [
                dbc.Label("Clasificación a visualizar"),
                dcc.RadioItems(
                    id="info",
                    options=[{"label":'Información Nutrimental', "value":'nutri_cls'},
                             {"label":'Ingredientes', "value":'ingr_cls'}],
                    labelStyle={'display': 'block'},
                    value="nutri_cls",
                ),
            ]
        )
        
    ],
    body=True,
)

In [None]:
#Controles para las gŕaficas de caja
distribution_controls = dbc.Card(
    [
        html.Div(
            [
                dbc.Label("Agrupamiento"),
                dcc.Dropdown(
                    id="repre",
                    options= [
                        {"label":"Información Nutrimental", "value":"nutri"},
                        {"label":"Ingredientes", "value":"ingr"}
                    ],
                    value=None,
                ),
             
                dbc.Label("Información a mostrar"),
                dcc.Dropdown(
                    id="info",
                    options= [
                        {"label": val, "value": key} for val,key in zip(cols_info,nutrition.columns[:-1])
                    ],
                    value="cal",
                ),
            ]
        ),
        
    ],
    body=True,
)

In [None]:
#Función para obtener la información de la receta
def get_recipe_info(clickdata):
    if clickdata is None:
        recipe_name = 'Ninguna receta seleccionada'
        desc=''
        #Informacion nutrimental 
        data_nutri = [{"atr":name, "value": 0} for col,name in colsDict.items()]
        #Ingredientes
        data_ingr = [{"counter":0,"ingr":''}]
        #Pasos
        data_step = [{"counter":0,"step":''}]
        #Cluster
        data_cluster = [{"criteria":'Nutrientes',"group":-1},{"criteria":'Ingredientes',"group":-1}]
        

    else:
        idx = clickdata['points'][0]['customdata']
        recipe_name = clickdata['points'][0]['text']
        recipe_info = recipes.loc[idx]
        desc = recipe_info.description

        #Informacion nutrimental 
        nutri_info = nutrition.loc[idx,nutrition.columns[:-1]]
        data_nutri = [{"atr":name, "value": nutri_info[col]} for col,name in colsDict.items()]
        
        #Ingredientes
        data_ingr = [{"ingr":ing,"counter":i+1} for i,ing in enumerate(recipe_info.ingredients)]

        #Pasos
        data_step = [{"counter":i+1,"step":stp} for i,stp in enumerate(recipe_info.steps)]

        #Cluster
        g_nutri = nutri_reducedDF.loc[idx]['cluster']
        g_ingr = ingr_reducedDF.loc[idx]['cluster']
        data_cluster = [{"criteria":'Nutrientes',"group":g_nutri},{"criteria":'Ingredientes',"group":g_ingr}]


    #Informacion nutrimental 
    columns_nutri =  [{"name": 'Información nutrimental', "id":'atr'}, {"name":'Información nutrimental',"id":'value'}]
    nutri_tab = dash_table.DataTable(
        data=data_nutri, columns=columns_nutri,
        style_as_list_view=True,
        merge_duplicate_headers=True,
        style_header={'fontWeight': 'bold'}
        )
    
    #Ingredientes
    columns_ingr =  [{"name":'Ingredientes', "id":"counter"},{"name": 'Ingredientes', "id":'ingr'}]
    ingr_tab = dash_table.DataTable(
        data=data_ingr, columns=columns_ingr,
        style_as_list_view=True,
        merge_duplicate_headers=True,
        style_data={'whiteSpace': 'normal',
        'height': 'auto',},
        style_header={'fontWeight': 'bold'}
        )
    
    #Pasos
    columns_step =  [{"name":'Pasos', "id":"counter"},{"name": 'Pasos', "id":'step'}]
    step_tab = dash_table.DataTable(
        data=data_step, columns=columns_step,
        style_as_list_view=True,
        merge_duplicate_headers=True,
        style_data={'whiteSpace': 'normal',
        'height': 'auto',},
        style_header={'fontWeight': 'bold'}
        )
    
    #Cluster
    columns_cluster =  [{"name":['Agrupamiento','Criterio'], "id":"criteria"},{"name": ['Agrupamiento','Grupo'], "id":'group'}]
    cluster_tab = dash_table.DataTable(
        data=data_cluster, columns=columns_cluster,
        style_as_list_view=False,
        merge_duplicate_headers=True,
        style_data={'whiteSpace': 'normal',
        'height': 'auto',},
        style_header={'fontWeight': 'bold'}
        )
    
    
    return [html.H3(recipe_name),html.Hr(), html.H6(desc)],nutri_tab,ingr_tab,step_tab, cluster_tab

In [None]:
#App
exS = [dbc.themes.BOOTSTRAP]

app = JupyterDash(__name__, external_stylesheets=exS)



app.layout = dbc.Container(
    [
        dcc.Tabs(
            id="tabs",
            value ="scatter",
            children =[ 
            dcc.Tab(label='Agrupamiento', value='scatter'),
            dcc.Tab(label='Distribuciones', value='distributions'),
            ]
        ),
        html.H1("Comparación de K-medias para recetas de Food.com"),
        html.Hr(),
        dbc.Row(
            [
                dbc.Col(
                    id="controls",md=4
                ),
                dbc.Col(dcc.Graph(id="graph",  figure={}), md=8),
            ],
            align="center",
        ),
        dbc.Row([
                 dbc.Col(id="recipe-name")
        ]),
        dbc.Row(
            [
                dbc.Col(
                    id="nutri_info",md=3
                ),
                dbc.Col(
                    id="ingrs_info",md=3
                ),
                dbc.Col(
                    id="steps_info",md=4
                ),
                dbc.Col(
                    id="cluster_info",md=2
                )
            ]
        )
    ],
    fluid=True,
)
@app.callback(
    Output("controls","children"),
    Input("tabs", "value")
)
def set_controls(tab):
    #tab: str, tab name
    if tab=="scatter":
        return scatter_controls
    else:
        return distribution_controls

@app.callback(
    Output("graph", "figure"),
    [
        Input("tabs", "value"),
        Input("repre", "value"),
        Input("info", "value")
    ],
)
def do_graph(tab,repre,info):
    if tab=="scatter":
       return crear_visualizacion(repre, info)

    else:
        if repre==None:
            return get_distribution(info)
        else:
            return get_distribution(info,repre)


@app.callback(
    [
     Output("recipe-name","children"),
     Output("nutri_info", "children"),
     Output("ingrs_info", "children"),
     Output("steps_info", "children"),
     Output("cluster_info", "children"),
    ],
    [
        Input("graph", "clickData")
    ],
)
def info(clickdata):
    return get_recipe_info(clickdata)

In [None]:
app.run_server(debug=True,port=1010)