In [1]:
#Dependencies
import pandas as pd
import json
from pandas.io.json import json_normalize

In [2]:
# reading the JSON data using json.load()
file = '../data/HealthyAll.json'
with open(file) as train_file:
    dict_train = json.load(train_file)

#convert from JSON to pandas DataFrame    
health_df = json_normalize(dict_train)

#Create dataframe with only relevant columns (totalNutrients quantities and the serving size)
reduced = health_df[['recipe.yield', 'recipe.totalNutrients.ENERC_KCAL.quantity', 'recipe.totalNutrients.FAT.quantity', 
                     'recipe.totalNutrients.FASAT.quantity','recipe.totalNutrients.FATRN.quantity', 
                     'recipe.totalNutrients.FAMS.quantity', 'recipe.totalNutrients.FAPU.quantity',
                     'recipe.totalNutrients.CHOCDF.quantity','recipe.totalNutrients.FIBTG.quantity', 
                     'recipe.totalNutrients.SUGAR.quantity', 'recipe.totalNutrients.PROCNT.quantity', 
                     'recipe.totalNutrients.CHOLE.quantity',
                     'recipe.totalNutrients.NA.quantity', 'recipe.totalNutrients.CA.quantity', 
                     'recipe.totalNutrients.MG.quantity', 'recipe.totalNutrients.K.quantity', 
                     'recipe.totalNutrients.FE.quantity', 'recipe.totalNutrients.ZN.quantity', 
                     'recipe.totalNutrients.P.quantity', 'recipe.totalNutrients.VITA_RAE.quantity',
                     'recipe.totalNutrients.VITC.quantity', 'recipe.totalNutrients.THIA.quantity', 
                     'recipe.totalNutrients.RIBF.quantity', 'recipe.totalNutrients.NIA.quantity',
                     'recipe.totalNutrients.VITB6A.quantity', 'recipe.totalNutrients.FOLDFE.quantity',
                     'recipe.totalNutrients.FOLFD.quantity', 'recipe.totalNutrients.VITB12.quantity', 
                     'recipe.totalNutrients.VITD.quantity', 'recipe.totalNutrients.TOCPHA.quantity',
                     'recipe.totalNutrients.VITK1.quantity', 'recipe.totalNutrients.WATER.quantity']].copy()

#Rename columns
newNames = ['Servings', 'Calories', 'Fat', 'Sat_Fat', 'Trans_Fat', 'Mono_Fat', 'Poly_Fat', 'Carbs', 'Fiber',
           'Sugars', 'Protein', 'Cholesterol', 'Sodium', 'Calcium', 'Magnesium', 'Potassium', 'Iron', 'Zinc', 'Phosphorus',
           'Vit_A', 'Vit_C', 'B1', 'B2', 'B3', 'B6', 'Folate_eq', 'Folate_food', 'B12', 'Vit_D', 'Vit_E', 'Vit_K', 'Water']

reduced.columns = newNames

In [3]:
#Build new dataframe (reduced2) with totalNutrients divided by serving size. To standardize the nutrition to 'per serving' values

reduced2 = reduced.copy()

#Adds copies of all columns/serving size
for i in newNames:
    
        reduced2[f'{i}/s'] = reduced2[i]/reduced2['Servings']
        
#Removes original columns
for i in newNames:
    reduced2 = reduced2.drop([i], axis=1)

#Set NaN values to 0
reduced2 = reduced2.fillna(0)


In [4]:
#Add column to categorize 'Healthy status': 1 = healthy, 0 = unhealthy

healthColumn = []
healthLabel = []
for i in range(len(reduced2)):
    healthColumn.append(1)
    healthLabel.append('healthy')
reduced2['Healthy'] = healthColumn
reduced2['HealthyLabel'] = healthLabel
reduced2['Recipe'] = health_df['recipe.label']

In [5]:
reduced2.head()

Unnamed: 0,Servings/s,Calories/s,Fat/s,Sat_Fat/s,Trans_Fat/s,Mono_Fat/s,Poly_Fat/s,Carbs/s,Fiber/s,Sugars/s,...,Folate_eq/s,Folate_food/s,B12/s,Vit_D/s,Vit_E/s,Vit_K/s,Water/s,Healthy,HealthyLabel,Recipe
0,1.0,100.783428,3.706567,0.478814,0.001744,1.19011,1.662028,12.935162,2.35161,0.802769,...,91.229498,91.229498,2.2e-05,0.004374,0.598908,6.954946,58.288488,1,healthy,5-Ingredient Quinoa Salad with Edamame and Car...
1,1.0,347.14,10.43595,5.1867,0.0028,0.836965,1.37193,50.16695,5.145125,11.966875,...,210.80625,45.32625,0.9478,19.04,0.67375,0.252,180.252638,1,healthy,5-Minute Honey Yogurt Quinoa Parfait recipes
2,1.0,186.432627,7.025285,0.571038,0.047022,4.549795,1.417736,27.970713,7.459813,2.474852,...,46.731444,46.731444,0.0,0.0,1.843758,20.966975,124.783506,1,healthy,African-Spiced Black Barley With Okra And Toma...
3,1.0,137.67575,5.925203,0.385312,0.0,3.360537,1.417505,19.81507,3.0048,13.54949,...,73.1375,73.1375,1.5,50.4,5.95085,145.159,177.116418,1,healthy,Almond Butter Spinach Smoothie
4,1.0,198.456,8.087615,1.228474,0.00042,5.501382,0.972848,29.50635,7.25365,4.189283,...,104.624,104.624,0.0,0.0,1.559845,32.576975,229.906301,1,healthy,Aloo Gobhi (Potato and Cauliflower) recipes


In [6]:
#Repeat steps for unhealthy recipes then combine(concat) the two

# reading the JSON data using json.load()
file = '../data/UnHealthyAll.json'
with open(file) as train_file:
    dict_train = json.load(train_file)

    
unhealth_df = json_normalize(dict_train)

unreduced = unhealth_df[[ 'recipe.yield', 'recipe.totalNutrients.ENERC_KCAL.quantity', 'recipe.totalNutrients.FAT.quantity', 
                     'recipe.totalNutrients.FASAT.quantity','recipe.totalNutrients.FATRN.quantity', 
                     'recipe.totalNutrients.FAMS.quantity', 'recipe.totalNutrients.FAPU.quantity',
                     'recipe.totalNutrients.CHOCDF.quantity','recipe.totalNutrients.FIBTG.quantity', 
                     'recipe.totalNutrients.SUGAR.quantity', 'recipe.totalNutrients.PROCNT.quantity', 
                     'recipe.totalNutrients.CHOLE.quantity',
                     'recipe.totalNutrients.NA.quantity', 'recipe.totalNutrients.CA.quantity', 
                     'recipe.totalNutrients.MG.quantity', 'recipe.totalNutrients.K.quantity', 
                     'recipe.totalNutrients.FE.quantity', 'recipe.totalNutrients.ZN.quantity', 
                     'recipe.totalNutrients.P.quantity', 'recipe.totalNutrients.VITA_RAE.quantity',
                     'recipe.totalNutrients.VITC.quantity', 'recipe.totalNutrients.THIA.quantity', 
                     'recipe.totalNutrients.RIBF.quantity', 'recipe.totalNutrients.NIA.quantity',
                     'recipe.totalNutrients.VITB6A.quantity', 'recipe.totalNutrients.FOLDFE.quantity',
                     'recipe.totalNutrients.FOLFD.quantity', 'recipe.totalNutrients.VITB12.quantity', 
                     'recipe.totalNutrients.VITD.quantity', 'recipe.totalNutrients.TOCPHA.quantity',
                     'recipe.totalNutrients.VITK1.quantity', 'recipe.totalNutrients.WATER.quantity']].copy()

unreduced.columns = newNames

unreduced2 = unreduced.copy()

for i in newNames:
    unreduced2[f'{i}/s'] = unreduced2[i]/unreduced2['Servings']
 
for i in newNames:
    unreduced2 = unreduced2.drop([i], axis=1)
    
unreduced2 = unreduced2.fillna(0)

#------changed healthColumn to [0,1]...unhealthy recipes!-------
healthColumn = []
healthLabel = []
for i in range(len(unreduced2)):
    healthColumn.append(0)
    healthLabel.append('unhealthy')
    
unreduced2['Healthy'] = healthColumn
unreduced2['HealthyLabel'] = healthLabel
unreduced2['Recipe'] = health_df['recipe.label']

In [7]:
#Combine the healthy and unhealthy dataframes and reset the index
combined_df = pd.concat([reduced2, unreduced2])
combined_df = combined_df.reset_index()
   #drop second 'index' column produced from the concat 
combined_df = combined_df.drop('index', axis=1)
combined_df.tail()

Unnamed: 0,Servings/s,Calories/s,Fat/s,Sat_Fat/s,Trans_Fat/s,Mono_Fat/s,Poly_Fat/s,Carbs/s,Fiber/s,Sugars/s,...,Folate_eq/s,Folate_food/s,B12/s,Vit_D/s,Vit_E/s,Vit_K/s,Water/s,Healthy,HealthyLabel,Recipe
1844,1.0,885.555968,53.018457,21.638624,0.465476,22.651813,3.703198,41.120828,3.09277,5.914407,...,151.388358,73.868358,2.002937,8.52,2.797755,43.488101,288.697325,0,unhealthy,Stirfried Kale with split mung & coconut
1845,1.0,1199.745198,52.064223,21.695331,0.055055,20.98781,6.19358,132.531893,9.903497,10.157808,...,698.420402,95.898856,0.8554,20.16,3.487766,9.842727,299.033738,0,unhealthy,Strascinati with broccoli
1846,1.0,575.813376,29.716701,17.255565,0.668482,7.652011,1.418145,48.349441,2.459127,2.626872,...,34.096367,34.096367,0.675327,18.282771,0.563872,2.312221,120.323226,0,unhealthy,Strawberry Arugula Quinoa Salad
1847,1.0,223.868965,8.640837,3.993991,0.006804,2.284186,1.262668,26.627345,5.083299,11.188909,...,63.463372,40.945372,0.227013,4.082331,3.523929,25.027532,204.756052,0,unhealthy,Stuffed Potatoes
1848,1.0,453.05303,29.118689,15.106494,0.08253,8.686451,1.648327,34.804224,2.571149,5.466125,...,40.777834,40.777834,0.306823,18.406219,0.9251,12.385577,137.497637,0,unhealthy,Sugar Snap Pea & Barley Salad


In [8]:
#Write to csv. Change ModelData file name for new API data
combined_df.to_csv("../data/ModelData_1.csv", encoding="utf-8", index=False)

In [9]:
#import data
data = pd.read_csv("../data/ModelData_2.csv")
data.head()

Unnamed: 0,Servings/s,Calories/s,Fat/s,Sat_Fat/s,Trans_Fat/s,Mono_Fat/s,Poly_Fat/s,Carbs/s,Fiber/s,Sugars/s,...,Vit_D/s,Vit_E/s,Vit_K/s,Water/s,CarbScore,CholoScore,TotalFatScore,SugarScore,HealthyGradScore,HealthyScore
0,1,100.783428,3.706567,0.478814,0.001744,1.19011,1.662028,12.935162,2.35161,0.802769,...,0.004374,0.598908,6.954946,58.288488,1,1,1,1,4,1
1,1,93.527653,3.372725,1.217548,0.058185,1.620999,0.330805,14.52459,4.714058,6.700123,...,1.065,1.597009,50.059982,176.442965,1,1,1,1,4,1
2,1,101.527163,3.950959,0.675617,0.0,2.540638,0.436856,14.24266,2.691121,3.243868,...,0.28125,1.147884,27.677754,119.293643,1,1,1,1,4,1
3,1,81.389119,3.745693,0.558064,0.0,2.499658,0.50154,10.870846,4.103485,5.607749,...,0.0,2.261851,62.910781,214.230832,1,1,1,1,4,1
4,1,76.27423,3.713546,0.424643,0.0,1.780703,1.329019,9.893677,3.403681,5.222974,...,0.0,1.356905,47.798536,137.342582,1,1,1,1,4,1


Unnamed: 0,Servings/s,Calories/s,Fat/s,Sat_Fat/s,Trans_Fat/s,Mono_Fat/s,Poly_Fat/s,Carbs/s,Fiber/s,Sugars/s,...,Folate_eq/s,Folate_food/s,B12/s,Vit_D/s,Vit_E/s,Vit_K/s,Water/s,Healthy,HealthyLabel,Recipe
0,1.0,100.783428,3.706567,0.478814,0.001744,1.19011,1.662028,12.935162,2.35161,0.802769,...,91.229498,91.229498,2.2e-05,0.004374,0.598908,6.954946,58.288488,1,healthy,5-Ingredient Quinoa Salad with Edamame and Car...
1,1.0,347.14,10.43595,5.1867,0.0028,0.836965,1.37193,50.16695,5.145125,11.966875,...,210.80625,45.32625,0.9478,19.04,0.67375,0.252,180.252638,1,healthy,5-Minute Honey Yogurt Quinoa Parfait recipes
2,1.0,186.432627,7.025285,0.571038,0.047022,4.549795,1.417736,27.970713,7.459813,2.474852,...,46.731444,46.731444,0.0,0.0,1.843758,20.966975,124.783506,1,healthy,African-Spiced Black Barley With Okra And Toma...
3,1.0,137.67575,5.925203,0.385312,0.0,3.360537,1.417505,19.81507,3.0048,13.54949,...,73.1375,73.1375,1.5,50.4,5.95085,145.159,177.116418,1,healthy,Almond Butter Spinach Smoothie
4,1.0,198.456,8.087615,1.228474,0.00042,5.501382,0.972848,29.50635,7.25365,4.189283,...,104.624,104.624,0.0,0.0,1.559845,32.576975,229.906301,1,healthy,Aloo Gobhi (Potato and Cauliflower) recipes


In [11]:
newcindy = combined_df.merge(data, left_on='Calories/s', right_on='Calories/s')

In [12]:
newcindy.head()

Unnamed: 0,Servings/s_x,Calories/s,Fat/s_x,Sat_Fat/s_x,Trans_Fat/s_x,Mono_Fat/s_x,Poly_Fat/s_x,Carbs/s_x,Fiber/s_x,Sugars/s_x,...,Vit_D/s_y,Vit_E/s_y,Vit_K/s_y,Water/s_y,CarbScore,CholoScore,TotalFatScore,SugarScore,HealthyGradScore,HealthyScore
0,1.0,137.67575,5.925203,0.385312,0.0,3.360537,1.417505,19.81507,3.0048,13.54949,...,50.4,5.95085,145.159,177.116418,0,1,1,1,3,1
1,1.0,292.684,12.251076,7.4454,0.465476,3.03882,0.765798,38.538953,10.587028,21.137642,...,8.52,1.805587,83.558444,245.481163,0,1,1,1,3,1
2,1.0,355.75825,9.909223,5.521155,0.316005,2.331924,0.865184,54.879566,4.15625,6.61916,...,13.7325,1.393608,34.685063,131.297206,0,1,1,1,3,1
3,1.0,81.389119,3.745693,0.558064,0.0,2.499658,0.50154,10.870846,4.103485,5.607749,...,0.0,2.261851,62.910781,214.230832,1,1,1,1,4,1
4,1.0,74.442119,3.612578,0.527582,0.0,2.467856,0.457903,9.490296,2.92936,2.637884,...,0.0,1.786836,49.450356,121.611842,1,1,1,1,4,1
