In [13]:
import pandas as pd
import json
import pyarrow
from statistics import mean

In [14]:
with open("./data/diary.json", "r") as file:
    diary = json.load(file)

with open("./data/tags.json", "r") as file:
    all_tags = json.load(file)

In [15]:
COMMON_FOODS_CONSTANT = 3
MINUMUM_VOTES = 1

def quantity_to_vote(quantity):
    return [10, 7, 3, 0][quantity - 1]

def compute_vote(all_qty, total_avg):
    if not all_qty:
        return -1
    avg = mean(all_qty)
    votes_part = len(all_qty) / (len(all_qty) + COMMON_FOODS_CONSTANT) * avg
    population_part = COMMON_FOODS_CONSTANT / (len(all_qty) + COMMON_FOODS_CONSTANT) * total_avg
    return votes_part + population_part

In [16]:
meals = []
total_avg = mean([quantity_to_vote(de["quantity"]) for de in diary])
for meal, tags in all_tags.items():
    diary_entries = [entry for entry in diary if entry["name"].lower() == meal]
    all_qty = [quantity_to_vote(de["quantity"]) for de in diary_entries]
    avg = mean(all_qty) if all_qty else -1
    vote = compute_vote(all_qty, total_avg)
    meals.append({ "tags": tags, "avg": avg, "vote": vote, "meal": meal })

In [17]:
all_used_tags = list({ tag for meal, tags in all_tags.items() for tag in tags })

In [18]:
df = pd.DataFrame(meals)

In [19]:
df.explode("tags")

Unnamed: 0,tags,avg,vote,meal
0,pasta,-1.0,-1.000000,pasta con gli aromi d'estate
0,primo,-1.0,-1.000000,pasta con gli aromi d'estate
0,carboidrati,-1.0,-1.000000,pasta con gli aromi d'estate
1,uova,10.0,9.229972,frittata
1,parmigiano,10.0,9.229972,frittata
...,...,...,...,...
129,spinacino,0.0,2.392188,insalata spinacino
129,verdura,0.0,2.392188,insalata spinacino
129,contorno,0.0,2.392188,insalata spinacino
130,arancia,0.0,1.345606,arancia


In [20]:
df

Unnamed: 0,tags,avg,vote,meal
0,"[pasta, primo, carboidrati]",-1.00,-1.000000,pasta con gli aromi d'estate
1,"[uova, parmigiano, secondo, proteine]",10.00,9.229972,frittata
2,"[fagiolini, contorno, verdura]",-1.00,-1.000000,fagiolini all'olio
3,[frutta],10.00,8.588283,frutta fresca
4,"[pane, carboidrati]",-1.00,-1.000000,pane
...,...,...,...,...
126,"[zucca, contorno, verdura]",0.75,3.504242,zucca al forno
127,"[verza, mela, contorno, verdura]",0.00,2.691212,verza stufata con mele renette a pezzetti e ch...
128,"[verza, mela, contorno, verdura]",-1.00,-1.000000,verza stufata con mele renette a pezzi e chiod...
129,"[spinacino, verdura, contorno]",0.00,2.392188,insalata spinacino


In [21]:
df.to_parquet("./data/meals.parquet")