In [1]:
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize

In [None]:
# !head data/mfp-diaries.tsv

## Exploratory Data Analysis

In [2]:
df = pd.read_csv('data/mfp-diaries.tsv',
                  sep='\t',
                  header=None,
                  names=['userId','diary_date','food_entries','daily_goal'])

In [3]:
df.head()

Unnamed: 0,userId,diary_date,food_entries,daily_goal
0,1,2014-09-14,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2924}..."
1,1,2014-09-15,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2430}..."
2,1,2014-09-16,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 1862}..."
3,1,2014-09-17,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2251}..."
4,1,2014-09-18,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2001}..."


In [4]:
len(df['userId'].unique())

9896

In [None]:
# entry = json.loads(df.iloc[2, 2])
# entry

In [None]:
# entry[0].keys()

## UserId, Entry_Date, Daily_Goal  (Need to Parse Data)

In [5]:
df_goal = df.drop(['food_entries'], axis=1)

In [6]:
df_goal.head()

Unnamed: 0,userId,diary_date,daily_goal
0,1,2014-09-14,"{""total"": [{""name"": ""Calories"", ""value"": 2924}..."
1,1,2014-09-15,"{""total"": [{""name"": ""Calories"", ""value"": 2430}..."
2,1,2014-09-16,"{""total"": [{""name"": ""Calories"", ""value"": 1862}..."
3,1,2014-09-17,"{""total"": [{""name"": ""Calories"", ""value"": 2251}..."
4,1,2014-09-18,"{""total"": [{""name"": ""Calories"", ""value"": 2001}..."


In [7]:
parse_df = df_goal.drop(['userId', 'diary_date'], axis=1)
parse_df.tail()

Unnamed: 0,daily_goal
587182,"{""total"": [{""name"": ""Calories"", ""value"": 1979}..."
587183,"{""total"": [{""name"": ""Calories"", ""value"": 2141}..."
587184,"{""total"": [{""name"": ""Calories"", ""value"": 543},..."
587185,"{""total"": [{""name"": ""Calories"", ""value"": 2024}..."
587186,"{""total"": [{""name"": ""Calories"", ""value"": 1496}..."


In [8]:
tg_dict = json.loads(parse_df['daily_goal'][0])
tg_dict

{'total': [{'name': 'Calories', 'value': 2924},
  {'name': 'Carbs', 'value': 340},
  {'name': 'Fat', 'value': 114},
  {'name': 'Protein', 'value': 186},
  {'name': 'Sodium', 'value': 3658},
  {'name': 'Sugar', 'value': 109}],
 'goal': [{'name': 'Calories', 'value': 3173},
  {'name': 'Carbs', 'value': 396},
  {'name': 'Fat', 'value': 105},
  {'name': 'Protein', 'value': 160},
  {'name': 'Sodium', 'value': 2300},
  {'name': 'Sugar', 'value': 119}]}

## Parse Function

In [9]:
"""Creates list of keys and values from the first dictionary, and zips them together"""
def daily_totals(tg_dict):    
    total_cols = [list(tg_dict.keys())[0] + "_" + x['name'].lower() 
                  for x in tg_dict['total']]
    total_values = [y['value'] 
                  for y in tg_dict['total']]

    total_dict = {}
    for col, val in zip(total_cols, total_values):
        total_dict[col] = val
    return total_dict

"""Creates list of keys and values from the second dictionary, and zips them together"""
def daily_goals(tg_dict):
    goal_cols = [list(tg_dict.keys())[1] + "_" + k['name'].lower()
                 for k in tg_dict['goal']]
    goal_values = [i['value']
                 for i in tg_dict['goal']]
    
    goal_dict = {}
    for col, val in zip(goal_cols, goal_values):
        goal_dict[col] = val
    return goal_dict

"""Combines new keys and values for 'Total' and 'Goals' into single dictionary"""
def Merge(dict1, dict2): 
    res = {**dict1, **dict2} 
    return res

"""Iterrates through entire parsed dataframe and appends merged rows"""
def final_parsed_dict():
    results = []
    for row in parse_df['daily_goal'].values:
        tg_dict = json.loads(row)
        results.append(Merge(daily_goals(tg_dict), daily_totals(tg_dict)))
    return results

In [10]:
f = pd.DataFrame(final_parsed_dict())

In [11]:
f.head().T

Unnamed: 0,0,1,2,3,4
goal_calcium,,,,,
goal_calories,3173.0,1572.0,1832.0,1685.0,1597.0
goal_carbs,396.0,196.0,229.0,210.0,199.0
goal_chol,,,,,
goal_fat,105.0,52.0,61.0,56.0,53.0
goal_fiber,,,,,
goal_iron,,,,,
goal_mon fat,,,,,
goal_ply fat,,,,,
goal_potass.,,,,,


In [14]:
final_final = pd.concat([df_goal, f], axis=1)
final_final2 = final_final.drop(['daily_goal'], axis=1)

In [15]:
final_final2.head().T

Unnamed: 0,0,1,2,3,4
userId,1,1,1,1,1
diary_date,2014-09-14,2014-09-15,2014-09-16,2014-09-17,2014-09-18
goal_calcium,,,,,
goal_calories,3173,1572,1832,1685,1597
goal_carbs,396,196,229,210,199
goal_chol,,,,,
goal_fat,105,52,61,56,53
goal_fiber,,,,,
goal_iron,,,,,
goal_mon fat,,,,,


In [18]:
g = f.iloc[0,:17].sum()
g

6253.0

In [19]:
t = f.iloc[0,18:34].sum()
t

7331.0

In [None]:
column = []
def met_goal(row):
    for i in food:
        g = food.iloc[i,:17].sum()
        t = food.iloc[i,18:35].sum()
        if g > t:
            return 1
        else:
            return 0
        column.append(i)

### Goal:
Create 1 column per dictionary in both total key and goal key

- total
    + total_calories
    + total_carbs
    + total_fat
    + total_protein
    + total_sodium
    + total_sugar
- goal
    + goal_calories
    + goal_carbs
    + goal_fat
    + goal_protein
    + goal_sodium
    + goal_sugar

In [None]:
# json_dict.keys()[0]
# Gives Error: 'dict_keys' object does not support indexing, hence needs to be set to a list to iterate.