In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pqdm.processes import pqdm

pd.set_option("display.precision", 16)

In [29]:
food = pd.read_csv("food.csv")
nutrient = pd.read_csv("nutrient.csv")

In [30]:
#Profiles: intakeconfig to palatable for the csv    
#Join csvs into 1
#function (params: profile, current_stste) -> list of items (sorted) 
#funtion to score overall (profile, total list of the day) -> dd%

In [31]:
import json
import os
import matplotlib.pyplot as plt

nutrient_definitions = json.load(open("nutrient_definitions.json"))
intake_config = json.load(open("intake_config.json"))

class Food:
    def __init__(self, fdc_id, name, nutrients):
        self.fdc_id = fdc_id
        self.name = name
        self.nutrients = nutrients

    @staticmethod
    def from_fdc_id(fdc_id):
        relevant_food = food[food['fdc_id']==fdc_id].iloc[0]
        nutrient_rows = nutrient[nutrient['fdc_id']==fdc_id]
        nutrient_objects = []
        for n_i, n_def in enumerate(nutrient_definitions):
            relevant_nutrient = nutrient_rows[nutrient_rows['nutrient_id']==n_def['id']].iloc[0]
            #print(relevant_nutrient)
            nutrient_objects.append({
                'name': n_def['name'],
                'amount_per_100g': relevant_nutrient['amount_per_100g'],
                'id': n_def['id'],
                'unit': n_def['unitName']
            })
        return Food(fdc_id, name=relevant_food['name'], nutrients=nutrient_objects)
    
    def macro_ratio(self):
        water = [n for n in self.nutrients if n['id'] == 255][0]['amount_per_100g']
        protein = [n for n in self.nutrients if n['id'] == 203][0]['amount_per_100g']
        fat = [n for n in self.nutrients if n['id'] == 204][0]['amount_per_100g']
        carb = [n for n in self.nutrients if n['id'] == 205][0]['amount_per_100g']
        plt.pie([water, protein, fat, carb], labels=["water", "protein", "fat", "carb"], autopct='%1.1f%%')

    
    def __repr__(self):
        return f"{self.name} has {len(self.nutrients)} nutrients"
    
    def __str__(self):
        return self.__repr__(self)

In [32]:
def construct(food_id):
    milk = Food.from_fdc_id(food_id)
    df = pd.DataFrame()
    df["name"] = [milk.name]
    for i in range(len(milk.nutrients)):
        if (milk.nutrients[i]['unit']) == 'µg':
            df[milk.nutrients[i]['name']] = [(milk.nutrients[i]['amount_per_100g']/1000000)]
        elif (milk.nutrients[i]['unit']) == 'mg':
            df[milk.nutrients[i]['name']] = [(milk.nutrients[i]['amount_per_100g']/1000)]
        elif (milk.nutrients[i]['unit']) == 'kcal':
            df[milk.nutrients[i]['name']] = [(milk.nutrients[i]['amount_per_100g'])]
        else:
            df[milk.nutrients[i]['name']] = [(milk.nutrients[i]['amount_per_100g'])]
    return df

In [33]:
data = pd.DataFrame()
frames = []
for i in tqdm(food.fdc_id):
    frames.append(construct(i))
data = pd.concat(frames)

100%|███████████████████████████████████████| 5624/5624 [01:56<00:00, 48.40it/s]


In [34]:
def choose_profile (stage_gender, age_in_years):
    for p in intake_config:
        min_ = ((p['profile']['minAgeMonths']/12)+p['profile']['minAgeYears'])
        max_ = ((p['profile']['maxAgeMonths']/12)+p['profile']['maxAgeYears'])
        
        if (age_in_years >= min_ and age_in_years <= max_ and 
            p['profile']['lifeStageGroup'] == stage_gender):
            return p;

def parse_profile (stage_gender, age_in_years):
    ret = pd.DataFrame()
    profile = choose_profile(stage_gender, age_in_years)['RDIs']
    num_matches = 0
    for n_def in nutrient_definitions:
        for nutrient_name, amt in profile.items():
            num_words = 2 if any([w in nutrient_name.lower() for w in ["vitamin", "total"]]) else 1
            search_for = " ".join(nutrient_name.split(" ")[:num_words])
            if search_for in n_def['name']:
                n_name = nutrient_name.replace("/d", "")
                amount = amt
                
                test_str = n_name
                test_str=test_str.replace("(","*(")
                test_str=test_str.replace(")",")*")
                x=test_str.split("*")
                res=[]
                for i in x:
                    if i.startswith("(") and i.endswith(")"):
                        res.append(i)
                unit = res
                # n_name, unit, amount
                #print(unit)
                if unit == ['(μg)']:
                    ret[n_def['name']] = [amount/1000000]
                elif unit == ['(mg)']:
                    ret[n_def['name']] = [amount/1000]
                elif unit == ['(g)']:
                    ret[n_def['name']] = [amount]
                
    return ret


In [99]:
import math

def find_best_vector(current_state, target_vector, sample):
    distance = math.sqrt(sum([(t - a)**2 for t, a in zip(target_vector, current_state)]))
    
    best_vector = sample[0]
    best_distance = math.sqrt(sum([(t - (a + v))**2 for t, a, v in zip(target_vector, current_state, best_vector)]))
    
    distances = [math.sqrt(sum([(t - (a + v))**2 for t, a, v in zip(target_vector, current_state, vector)])) for vector in sample]
    min_distance = min(distances)
    max_distance = max(distances)
    ranges = [(max_distance - d) / (max_distance - min_distance) for d in distances]
    percentages = [r / sum(ranges) for r in ranges]
    ranked_vectors = sorted(zip(sample, percentages), key=lambda x: x[1], reverse=True)
    
    return ranked_vectors

def _recommend(curr_state, stage_gender, age_in_years):
    ret_stack = pd.DataFrame()
    ret = []
    profile = parse_profile(stage_gender, age_in_years)
    
    ucm = curr_state[profile.columns.values].sum(axis=0)
    dbm = data[profile.columns.values]
    profile = profile[dbm.columns.values]
    profile = profile[ucm.index.values]
    
    profile = profile.fillna(value=0)
    dbm = dbm.fillna(value=0)
    ucm.fillna(value=0, inplace=True)


    needed = (profile.values - ucm.values)
    
    
    # find and optimize for needed
    for i in tqdm(range(len(dbm))):
        r = pd.DataFrame()
        r['food_name'] = [data.iloc[i]['name']]
        r['calories'] = [data.iloc[i]['Energy']]
        r['protien'] = [data.iloc[i]['Protein']]
        r['carbohydrates'] = [data.iloc[i]['Carbohydrate, by difference']]
        r['fats'] = [data.iloc[i]['Total lipid (fat)']]
        r['water_content'] = [data.iloc[i]['Water']]
        dist_from_perf = (((abs(needed - dbm.iloc[i].values))))
        r['nutrient_metrics'] = [sum(dist_from_perf[0])]
        ret.append(r)
    
    ret_stack = pd.concat(ret)
    ret_stack = ret_stack.sort_values(by=['nutrient_metrics'])

    return ret_stack

def recommend(curr_state, stage_gender, age_in_years, cal, high_prot, drink):
    metrics = _recommend(curr_state,stage_gender, age_in_years)
    
    metrics = metrics[metrics.fats < 30]
    
    if (cal > 1):
        metrics = metrics.sort_values(by=['calories'], ascending=False)
    elif (cal < 1):
        metrics = metrics.sort_values(by=['calories'], ascending=True)
    metrics = metrics[:int(len(metrics)/4)]
    
    if (high_prot):
        metrics = metrics.sort_values(by=['protien'], ascending=False)
    metrics = metrics[:int(len(metrics)/4)]
    
    if (drink):
        metrics = metrics[metrics.water_content > 70]
        metrics = metrics.sort_values(by=['water_content'], ascending=False)
    if (not drink):
        metrics = metrics.sort_values(by=['water_content'], ascending=True)
        
    metrics = metrics[:int(len(metrics)/4)]
    
    return metrics

h = recommend(data.iloc[1000:1015],'male', 25, 1, False, False)
    
    

100%|██████████████████████████████████████| 5624/5624 [00:09<00:00, 595.72it/s]


In [100]:
h

Unnamed: 0,food_name,calories,protien,carbohydrates,fats,water_content,nutrient_metrics
0,Nutritional powder mix (EAS Soy Protein Powder),405.0,47.6199999999999974,43.9399999999999977,3.5699999999999998,2.8300000000000001,85.9146129000000087
0,"Milk, dry, not reconstituted, fat free (skim)",362.0,36.1599999999999966,51.9799999999999969,0.7700000000000000,3.1600000000000001,86.3422900999999996
0,"Milk, dry, not reconstituted, NS as to fat con...",362.0,36.1599999999999966,51.9799999999999969,0.7700000000000000,3.1600000000000001,86.3422900999999996
0,"Papad, grilled or broiled",371.0,25.5599999999999987,59.8699999999999974,3.2500000000000000,3.4900000000000002,95.2230947999999842
0,Nutritional powder mix (Kellogg's Special K20 ...,380.0,35.2000000000000028,58.3999999999999986,0.6000000000000000,3.9500000000000002,89.3354219000000001
...,...,...,...,...,...,...,...
0,"Bread, cinnamon, toasted",293.0,10.3599999999999994,54.0700000000000003,3.9399999999999999,29.3399999999999999,87.7332431999999898
0,"Bread, whole wheat, made from home recipe or p...",293.0,11.4000000000000004,51.6099999999999994,4.6799999999999997,29.3399999999999999,85.9318738999999994
0,"Bread, NS as to major flour, toasted",293.0,10.3599999999999994,54.0700000000000003,3.9399999999999999,29.3399999999999999,87.7332431999999898
0,"Bread, pita with fruit",279.0,8.1600000000000001,59.5099999999999980,1.0500000000000000,29.4200000000000017,90.2574350000000010


In [101]:
(data.iloc[1000:1015].Energy.sum())

1871.0