In [1]:
# conda create --no-default-packages -n co2e python=3.7 pandas numpy scipy scikit-learn matplotlib requests
# source activate co2e 
# pip install jupyterlab
# python -m ipykernel install --user --name co2e --display-name "CO2e 3.7"

In [2]:
import sys
import os
import re

import glob
import numpy as np
import pandas as pd

import requests 
import json

from pulp import LpProblem, LpMinimize, LpVariable, lpSum,  LpStatus

with open('config.json') as config_file:
    conf = json.load(config_file)
    
PATH_TO_ATRIFY_EXPORT = conf['PATH_TO_ATRIFY_EXPORT']
ATRIFY_API_KEY = conf['ATRIFY_API_KEY']

# pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Get German products

In [4]:
path_to_folders = PATH_TO_ATRIFY_EXPORT

c = 0
json_files_all = []
for folder in os.listdir(path_to_folders):

    path_to_jsons = os.path.join(path_to_folders, folder)
    json_files = [os.path.join(path_to_jsons, pos_json) for pos_json in os.listdir(path_to_jsons) if pos_json.endswith('.json')]
    json_files_all = json_files_all + json_files
    c += len(json_files)  


# Product category CO2 lookup

In [5]:
# based on product category footprint from UK and Danish sources: 
# - Tesco Supermarkets
# - The Danish Ministry of Food, Agriculture and Fisheries
# - https://www.taylorfrancis.com/books/e/9781351208475/chapters/10.1201/9781351208475-22
# - 'T1 - Systematic review of greenhouse gas emissions for different fresh food categories. AU - Stephen Clune, Enda Crossin, Karli Verghese.  PY - 2016/04/19

co2e_kg = {'Alternative Meat/Poultry/Other Animal Sausages - Prepared/Processed': 6,
 'Alternative Meat/Poultry/Other Animals Species - Prepared/Processed': 6,
 'Baking/Cooking Supplies (Frozen)': 1.5,
 'Baking/Cooking Supplies (Shelf Stable)': 1.5, 
 'Beef - Prepared/Processed': 18,
 'Biscuits/Cookies (Frozen)': 2.5, 
 'Biscuits/Cookies (Shelf Stable)': 2.5,
 'Bread (Frozen)': 1.25, 
 'Bread/Bakery Products Variety Packs': 1.25,
 'Cereal/Grain/Pulse Products Variety Packs': 1.25,
 'Chicken - Prepared/Processed': 1.476, 
 'Chicken - Unprepared/Unprocessed': 2.65,
 'Chocolate and Chocolate/Sugar Candy Combinations - Confectionery': 5,
 'Confectionery Based Spreads (Shelf Stable)': 5,
 'Confectionery Products Variety Packs': 5,
  'Desserts (Frozen)': 5,
 'Dough Based Products / Meals - Not Ready to Eat - Savoury (Frozen)': 2,
 'Dried Breads (Shelf Stable)': 1.1105,
 'Egg Based Products / Meals - Not Ready to Eat (Frozen)': 6.9,
 'Fish - Prepared/Processed (Frozen)': 6.9,
 'Fish - Prepared/Processed (Perishable)': 6.9,
 'Flour - Cereal/Pulse (Shelf Stable)': 1.1,
 'Food/Beverage/Tobacco Variety Packs': 2.5,
 'Fruit - Unprepared/Unprocessed (Frozen)': 2.13,
 'Fruit/Nuts/Seeds Mixes - Prepared/Processed (Shelf Stable)': 1,
 'Grain Based Products / Meals - Not Ready to Eat - Savoury (Frozen)': 1,
 'Grain Based Products / Meals - Not Ready to Eat - Savoury (Shelf Stable)': 1,
 'Ice Cream/Ice Novelties (Frozen)': 1,
 'Meat Substitutes (Frozen)': 2.8,
'Oils/Fats Edible Variety Packs': 9.5,
 'Other Sauces Dipping/Condiments/Savoury Toppings/Savoury Spreads/Marinades (Perishable)': 2,
 'Other Sauces Dipping/Condiments/Savoury Toppings/Savoury Spreads/Marinades (Shelf Stable)': 2 ,
 'Pasta/Noodles - Not Ready to Eat (Frozen)': 2.8, 
 'Pickled Vegetables': 1.5,
 'Pies/Pastries - Sweet (Frozen)': 3, 
 'Pies/Pastries - Sweet (Shelf Stable)': 3,
 'Pies/Pastries/Pizzas/Quiches - Savoury (Frozen)':5 ,
 'Pork - Prepared/Processed': 3.6, 
 'Pork Sausages - Prepared/Processed': 3.6,
 'Potatoes': 0.37, 
 'Prepared/Preserved Foods Variety Packs': 3,
 'Sauces - Cooking (Shelf Stable)': 2.25, 
 'Snacks Variety Packs': 2,
 'Soups - Prepared (Shelf Stable)': 2, 
 'Storage/Haulage Boxes (Empty)': 3,
 'Sugar/Sugar Substitutes (Shelf Stable)': 1, 
 'Sweet Potatoes': 0.37,
 'Tomato Ketchup/Ketchup Substitutes (Shelf Stable)':  2.25,
 'Turkey - Prepared/Processed': 1.476, 
 'Veal - Prepared/Processed': 19,
 'Vegetable Based Products / Meals - Not Ready to Eat (Frozen)': 3,
 'Vegetable Based Products / Meals - Ready to Eat (Perishable)': 1.17,
 'Vegetables - Prepared/Processed (Frozen)': 2.13,
 'Vegetables - Unprepared/Unprocessed (Frozen)': 1.25, 
 'Vinegars': 2.24}

In [None]:
head = {"accept": "application/json", "apikey": ATRIFY_API_KEY}

product_data = []
product_data2 = []
for file in json_files_all:

    with open(file) as json_file:
        f = json.load(json_file)
        
        # unique product identifier 
        gtin = f['tradeItem']['gtin']['_text']
        # manufacturer identifier, use together with gtin and locale to retrieve product data via api
        gln = f["tradeItem"]['informationProviderOfTradeItem']["gln"]['_text']
        informationProviderOfTradeItem = f["tradeItem"]['informationProviderOfTradeItem']["partyName"]['_text']
        # target market
        tm = f["tradeItem"]["targetMarket"]["targetMarketCountryCode"]["_text"]
        
        # product category code - maps to product hierarchy in api
        gpcCategoryCode = f["tradeItem"]["gdsnTradeItemClassification"]["gpcCategoryCode"]['_text']
        gpcCategoryName = f["tradeItem"]["gdsnTradeItemClassification"]["gpcCategoryName"]['_text']
        
        # "Gluten Free Claim" - replace for this later: nutritionalClaimNutrientElementCode + nutritionalClaimTypeCode 
        if '"gpcAttributeTypeName": {"_text": "Gluten Free Claim"}' in json.dumps(f):
            gpcAttributeTypeName = "Gluten Free Claim"
        else:
            gpcAttributeTypeName = "No Gluten Free Claim"
            
        # Packaging: some wrong info
        try:
            packagingMaterialTypeCode = f["tradeItem"]["tradeItemInformation"]["extension"]["packagingInformationModule"]["packaging"][0]['packagingMaterial']["packagingMaterialTypeCode"]["_text"]
        except:
            # to b replaced with average
            packagingMaterialTypeCode = "No info"
            
        # Organic
        try:
            organicClaim = f["tradeItem"]["tradeItemInformation"]["extension"]["farmingAndProcessingInformationModule"]["tradeItemOrganicInformation"]["organicClaim"]['organicTradeItemCode']["_text"]
        except:
            organicClaim = 0
            
        # net weight of product (without packaging) , use gross weight when not available
        try:
            measurementUnitCode = f["tradeItem"]["tradeItemInformation"]["extension"]["tradeItemMeasurementsModule"]\
                            ["tradeItemMeasurements"]["tradeItemWeight"]["netWeight"]["@measurementUnitCode"]
    
            netWeight = f["tradeItem"]["tradeItemInformation"]["extension"]["tradeItemMeasurementsModule"]\
                            ["tradeItemMeasurements"]["tradeItemWeight"]["netWeight"]["_text"] 

        except:
            measurementUnitCode = f["tradeItem"]["tradeItemInformation"]["extension"]["tradeItemMeasurementsModule"]\
                            ["tradeItemMeasurements"]["tradeItemWeight"]['grossWeight']["@measurementUnitCode"]

            netWeight = f["tradeItem"]["tradeItemInformation"]["extension"]["tradeItemMeasurementsModule"]\
                            ["tradeItemMeasurements"]["tradeItemWeight"]['grossWeight']["_text"]


        if measurementUnitCode == 'GRM':
            netWeight = float(netWeight)/1000
        else:
            netWeight = float(netWeight)

        # get product name and nutriscore
        response = requests.get("https://nutritions.lab.atrify.com/nutrition/{}".format(gtin), headers=head) 
        
        try:
            resp = json.loads(response.text)
            name = resp["name"]
            nutriScoreScore = resp["nutriScoreGrade"]
            fruitAndVegetablePercentage = resp["fruitAndVegetablePercentage"]
        except:
            # non-food items
            name = "Unknown"
            nutriScoreScore = "N/A"
            fruitAndVegetablePercentage = 0
            
        # plastic
        if (("plastic" in packagingMaterialTypeCode.lower()) or ("polymer" in packagingMaterialTypeCode.lower())):
            plastic = "Plastic"
        else:
            plastic = "No plastic"
        
        
        product_data_point = {"gtin": gtin, 
                              "name": name,
                              "gln": gln,
                              "informationProviderOfTradeItem": informationProviderOfTradeItem,
                              "tm": tm,
                              "gpcCategoryCode": gpcCategoryCode,
                              "gpcCategoryName": gpcCategoryName,
                              "packagingMaterialTypeCode": packagingMaterialTypeCode,
                              "organicClaim": organicClaim,
                              "gpcAttributeTypeName": gpcAttributeTypeName,
                              "netWeight": netWeight,
                              "co2Category":  netWeight*co2e_kg[gpcCategoryName],
                              "nutriScoreScore": nutriScoreScore,
                              "fruitAndVegetablePercentage": fruitAndVegetablePercentage,
                              "plastic": plastic
                             }

        product_data.append(product_data_point)


In [10]:
print(product_data[0])

{'gtin': '04260429002634', 'name': {'de': 'Naturreis (Langkorn)'}, 'gln': '4260429410002', 'informationProviderOfTradeItem': 'Nola GmbH', 'tm': '276', 'gpcCategoryCode': '10000601', 'gpcCategoryName': 'Cereal/Grain/Pulse Products Variety Packs', 'packagingMaterialTypeCode': 'POLYMER_LDPE', 'organicClaim': '5', 'gpcAttributeTypeName': 'No Gluten Free Claim', 'netWeight': 5.0, 'co2Category': 6.25, 'nutriScoreScore': 'A', 'fruitAndVegetablePercentage': 0, 'plastic': 'Plastic'}


In [11]:
with open('/home/elena/Dropbox/Plan8/GS1 Hack/product_data_v4.json', 'w') as fout:
    json.dump(product_data , fout)

# Product Recommendations

# Minimise CO2e for a given nutritional profile

When we collect a month of purchase data, we can help users achieve their personal goals, such as: 

* reduce CO2 footprit whilst maintaining nutritional balance
* reduce calories whilst maintaining nutritional balance
* increase % frui & veg 
* reduce plastic and meat consumption whist maintaining protein levels

How? By giving product and category proportions recommendations

After collecting several months we will be able to see progress over time

In [67]:
prob = LpProblem("Diet Nutri CO2e Problem", LpMinimize)

In [68]:
# Create a list of the food items
food_items = list(df['gtin'])

In [69]:
# Create a dictinary of Co2e for all food items
costs = dict(zip(df['gtin'],df['co2Category']))

In [70]:
# Create a dictionary of calories for all food items
# testing with another numeric field...
calories = dict(zip(df['gtin'],df['netWeight']))

In [None]:
# # Create a dictionary of total fat for all food items
# fat = dict(zip(food_items,df['Total_Fat (g)']))

In [None]:
# # Create a dictionary of carbohydrates for all food items
# carbs = dict(zip(food_items,df['Carbohydrates (g)']))

In [71]:
# to avoid negative optimal food amounts
food_vars = LpVariable.dicts("Food",food_items,lowBound=0,cat='Continuous')

In [72]:
# add the main objective function
prob += lpSum([costs[i]*food_vars[i] for i in food_items])

In [73]:
# swap fixed calorie constraints for values from user's shopping
prob += lpSum([calories[f] * food_vars[f] for f in food_items]) >= 1000.0
prob += lpSum([calories[f] * food_vars[f] for f in food_items]) <= 2000.0

In [None]:
# add 20% constraints on food groups (sum by prod category) - 20%*(sum by prod category)
# because users are unlikely to dramatically change their behaviour

In [None]:
# # Fat
# prob += lpSum([fat[f] * food_vars[f] for f in food_items]) >= 20.0, "FatMinimum"
# prob += lpSum([fat[f] * food_vars[f] for f in food_items]) <= 50.0, "FatMaximum"

# # Carbs
# prob += lpSum([carbs[f] * food_vars[f] for f in food_items]) >= 130.0, "CarbsMinimum"
# prob += lpSum([carbs[f] * food_vars[f] for f in food_items]) <= 200.0, "CarbsMaximum"

# # Fiber
# prob += lpSum([fiber[f] * food_vars[f] for f in food_items]) >= 60.0, "FiberMinimum"
# prob += lpSum([fiber[f] * food_vars[f] for f in food_items]) <= 125.0, "FiberMaximum"

# # Protein
# prob += lpSum([protein[f] * food_vars[f] for f in food_items]) >= 100.0, "ProteinMinimum"
# prob += lpSum([protein[f] * food_vars[f] for f in food_items]) <= 150.0, "ProteinMaximum"


In [74]:
prob.solve()
print("Status:", LpStatus[prob.status])

Status: Optimal


In [75]:
for v in prob.variables():
    if v.varValue>0:
        print(v.name, "=", v.varValue)

Food_NAS_Whole_Orange_Squash_3_litre = 0.033333333
