In [1]:
from itertools import combinations
from itertools import product

import networkx as nx
import pandas as pd

In [2]:
recipes_info_df = pd.read_csv('out/recipes_info.csv')

In [3]:
recipes_data_df = pd.read_csv('out/recipes_data.csv')

In [4]:
top_ingredients = [
    'i_agar',
    'i_agua',
    'i_agar-agar en polvo',
    'i_azúcar',
    'i_sal',
    'i_hoja de gelatina',
    'i_perifollo',
    'i_aceite de oliva',
    'i_sal maldon',
    'i_glucosa',
    'i_nata líquida',
    'i_mantequilla',
    'i_aceite',
    'i_cebollino fresco',
    'i_aceite de girasol',
    'i_nata',
    'i_harina de trigo',
    'i_limón',
    'i_claras de huevo',
    'i_isomalt',
]

In [5]:
all_ingredients = [c for c in recipes_data_df.columns if c.startswith('i_')]

In [6]:
ingr_combinations = [
    (x, y)
    for x, y in combinations(all_ingredients, 2)
    if x in top_ingredients or y in top_ingredients
]

In [7]:
%%time

ls = []
for j, x in recipes_data_df.iterrows():
    df = pd.DataFrame(
        [[recipes_info_df.year[j]] + [int(x[i1] == 1 and x[i2] == 1) for i1, i2 in ingr_combinations]],
        columns=['year'] + ingr_combinations,
    )
    ls.append(df)
ingr_combs_df = pd.concat(ls, ignore_index=True)

CPU times: user 44min 51s, sys: 1.94 s, total: 44min 53s
Wall time: 44min 54s


In [8]:
top_techniques = [
    't_hervir',
    't_pasar',
    't_cocer',
    't_horno',
    't_triturar',
    't_mezclar',
    't_estirar',
    't_reposar',
    't_pelar',
    't_colar',
    't_secar',
    't_escurrir',
    't_disolver',
    't_escaldar',
    't_sal',
    't_puré',
    't_agua',
    't_sartén',
    't_espuma',
    't_montar',
]

In [9]:
all_techniques = [c for c in recipes_data_df.columns if c.startswith('t_')]

In [10]:
tech_combinations = [
    (x, y)
    for x, y in combinations(all_techniques, 2)
    if x in top_techniques or y in top_techniques
]

In [11]:
%%time

ls = []
for j, x in recipes_data_df.iterrows():
    df = pd.DataFrame(
        [[recipes_info_df.year[j]] + [int(x[t1] == 1 and x[t2] == 1) for t1, t2 in tech_combinations]],
        columns=['year'] + tech_combinations,
    )
    ls.append(df)
tech_combs_df = pd.concat(ls, ignore_index=True)

CPU times: user 4min 28s, sys: 72 ms, total: 4min 28s
Wall time: 4min 28s


In [12]:
ingr_tech_combinations = [
    (x, y)
    for x, y in product(all_ingredients, all_techniques)
    if x in top_ingredients or y in top_techniques
]

In [13]:
%%time

ls = []
for j, x in recipes_data_df.iterrows():
    df = pd.DataFrame(
        [[recipes_info_df.year[j]] + [int(x[i] == 1 and x[t] == 1) for i, t in ingr_tech_combinations]],
        columns=['year'] + ingr_tech_combinations,
    )
    ls.append(df)
ingr_tech_combs_df = pd.concat(ls, ignore_index=True)

CPU times: user 47min, sys: 1.36 s, total: 47min 1s
Wall time: 47min 1s


In [14]:
sum_ingr_combs_by_year = ingr_combs_df.groupby('year').sum()

In [15]:
for c in sum_ingr_combs_by_year.columns:
    if sum_ingr_combs_by_year[c].sum() == 0:
        sum_ingr_combs_by_year = sum_ingr_combs_by_year.drop(c, axis=1)

In [16]:
sum_tech_combs_by_year = tech_combs_df.groupby('year').sum()

In [17]:
for c in sum_tech_combs_by_year.columns:
    if sum_tech_combs_by_year[c].sum() == 0:
        sum_tech_combs_by_year = sum_tech_combs_by_year.drop(c, axis=1)

In [18]:
sum_ingr_tech_combs_by_year = ingr_tech_combs_df.groupby('year').sum()

In [19]:
for c in sum_ingr_tech_combs_by_year.columns:
    if sum_ingr_tech_combs_by_year[c].sum() == 0:
        sum_ingr_tech_combs_by_year = sum_ingr_tech_combs_by_year.drop(c, axis=1)

In [20]:
sum_ingr_combs_by_year.to_csv('out/ingr_combs.csv')

In [21]:
sum_tech_combs_by_year.to_csv('out/tech_combs.csv')

In [22]:
sum_ingr_tech_combs_by_year.to_csv('out/ingr_tech_combs.csv')