# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang, CFFS Data Analyst
***

## Part III: Update Information and Mapping

# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang, CFFS Data Analyst
***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
#pip install -r requirements.txt

In [1]:
import numpy as np
import pandas as pd
import pdpipe as pdp
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest
from datetime import datetime

  from tqdm.autonotebook import tqdm


In [2]:
# Set the root path, change the the current working directory into the project folder
path = "C:/Users/smvan/CFFS-S23/CFFS-22-23"
# path = os.getcwd()
os.chdir(path)

In [3]:
# Enable reading data table in the scrolling window if you prefer
pd.set_option("display.max_rows", None, "display.max_columns", None)

***
## Import Preprocessed Datasets

In [4]:
# Creates a DataFrame from reading a csv file 
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-17305,2022 Hummus prep,1600.0,g,N,1600.0,g
1,P-16793,2022 Pulled Pork Prep,6.0,Kg,Y,6000.0,g
2,P-18380,2023 Babaganoush Prep,750.0,g,N,750.0,g
3,P-18458,2023 Fresh burger Patty prep,2.6,Kg,Y,2600.0,g
4,P-18575,2023 Wings Hot sauce prep,1650.0,g,Y,1650.0,g


In [5]:
# Creates a DataFrame from reading a csv file 
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [6]:
# Creates a DataFrame from reading a csv file 
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [7]:
# Creates a DataFrame from reading a csv file 
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [8]:
# Creates a DataFrame from reading a csv file 
land_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "land_factors.csv"))
land_factors.rename(columns={'km^2 land use/kg product': 'Land Use (m^2)'}, inplace=True)
land_factors['Land Use (m^2)'] *= 1000
land_factors.head()

Unnamed: 0,Category ID,Food Category,Land Use (m^2)
0,1,beef & buffalo meat,0.12645
1,2,lamb/mutton & goat meat,0.1432
2,3,pork (pig meat),0.02102
3,4,"poultry (chicken, turkey)",0.01151
4,5,butter,0.01395


In [9]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [10]:
# new_items = pd.read_csv("data/mapping/new items added/New_Items_Added_11.csv")
# new_items = pd.read_csv("data/mapping/new items added/New_Items_2023/New_Items_Added_2023_08_01.csv")
# CHECK FOR CHANGES REQUIRES

new_items = pd.read_csv("data/mapping/new items added/AMS_data/New_Items_2023/New_Items_Added_2023-11-28.csv")
new_items

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N


In [11]:
item_nonstd = pd.read_csv("data/cleaning/AMS_data/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Recipe,Description
0,I-1971,0.25,ea,R-17284,LIMES
1,I-15803,1.0,can,R-17284,Red Bull Watermelon
2,I-5505,0.25,HEAD,R-18292,Lettuce - Romaine
3,I-11706,1.0,ea,R-18292,Glry Side Fries 2023
4,I-13308,1.0,ea,R-18292,"TORTILLA 12"" FLOUR PRESSED"
5,I-15514,0.5,slice,R-18292,VEG BACON VEGAN FZN
6,I-4082,3.0,ea,R-15423,bread bun slider brioche
7,I-1789,1.0,HEAD,P-14356,CUCUMBER LONG ENGLISH MED
8,I-2919,6.0,ea,P-14356,Yaki Nori half Cut
9,I-13956,6.0,PORT,P-14356,Sushi Rice


In [12]:
preps_nonstd = pd.read_csv("data/cleaning/AMS_data/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6.0,PORT,N,,
1,P-14560,2022 Caesar Wrap prep,1.0,ea,N,,
2,P-9003,2022 Gallery Burger prep,1.0,ea,N,,
3,P-17358,2022 Poutine Prep,1.0,PORT,N,,
4,P-15006,2022 Power Punch Salad prep,1.0,PORT,N,,
5,P-16795,2022 Pulled Pork Sandwich prep,1.0,PORT,N,,
6,P-14552,2022 Vegan Pulled Pork Prep,1.0,ea,N,,
7,P-18327,2023 Appi Platter prep,1.0,PORT,N,,
8,P-18453,2023 GM Tempeh curry prep,1.0,PORT,N,,
9,P-18451,2023 Gm truffle Fries prep,1.0,PORT,N,,


In [13]:
conv_updatecov = pd.read_csv("data/cleaning/update/AMS_data/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004,1.0,can,250.0,ml
2,I-5505,0.001263,0.25,HEAD,49.5,g
3,I-11706,0.008547,1.0,ea,117.0,g
4,I-13308,0.02454,1.0,ea,40.75,g
5,I-15514,0.025,0.5,slice,20.0,g
6,I-4082,0.025,3.0,ea,120.0,g
7,I-1789,0.004975,1.0,HEAD,201.0,g
8,I-2919,0.769231,6.0,ea,7.8,g
9,I-13956,0.011494,6.0,PORT,522.0,g


# Update Conversion_Added.csv

In [18]:
conversions_added = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Conversions_Added.csv"))
conversions_added

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [19]:
# Concatenate the DataFrames
combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)
combined_df

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004,1.0,can,250.0,ml
2,I-5505,0.001263,0.25,HEAD,49.5,g
3,I-11706,0.008547,1.0,ea,117.0,g
4,I-13308,0.02454,1.0,ea,40.75,g
5,I-15514,0.025,0.5,slice,20.0,g
6,I-4082,0.025,3.0,ea,120.0,g
7,I-1789,0.004975,1.0,HEAD,201.0,g
8,I-2919,0.769231,6.0,ea,7.8,g
9,I-13956,0.011494,6.0,PORT,522.0,g


In [20]:
# Add the conv_updatecov file to the masterfile, Conversions_Added
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Conversions_Added.csv")
combined_df.to_csv(path, index = False, header = True)

***
## Import Update Info

In [21]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Preps_UpdateUom.csv"))
# Here we can see that UOM examples are: each, ea, slice
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6,PORT,N,1511.82,g
1,P-14560,2022 Caesar Wrap prep,1,ea,N,433.59,g
2,P-9003,2022 Gallery Burger prep,1,ea,N,501.82,g
3,P-17358,2022 Poutine Prep,1,PORT,N,705.8,g
4,P-15006,2022 Power Punch Salad prep,1,PORT,N,416.73,g


In [22]:
# The row of the DatFrame that contains the PrepId == P-15006
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-15006"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
4,P-15006,2022 Power Punch Salad prep,1,PORT,N,416.73,g


In [23]:
# Select the file path for new items list with category id
# New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "New_Items_Added_10.csv"))
# New_Items_Added.tail(15)

# CHECK FOR CHANGES REQUIRES

New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "AMS_data", "New_Items_2023", "New_Items_Added_2023-11-28.csv"))
New_Items_Added.tail(15)

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
441,I-1052,55,BACARDI LIGHT RUM,200.0,ml,18.5,fl oz,Y
442,I-1945,55,JUICE PINEAPPLE CAN,350.0,ml,18.5,ml,Y
443,I-4825,32,smirnoff green apple,200.0,ml,18.5,fl oz,Y
444,I-5960,55,St Remy Brandy,150.0,ml,18.5,fl oz,Y
445,I-1869,57,VINEGAR FUJI,20.0,L,40.0,fl oz,N
446,I-3251,17,Dashi Kombu dried kelp,3.0,oz,40.0,oz,N
447,I-1093,55,MALIBU liquor,375.0,ml,18.0,fl oz,Y
448,I-1109,55,Meagher's Triple Sec,375.0,ml,18.0,fl oz,Y
449,I-16461,31,Apricot Brandy 375mL,375.0,ml,18.0,fl oz,Y
450,I-16601,55,Sawmill Creek -Rose 4L,8.0,L,18.0,fl oz,Y


In [24]:
# # Import list of items that adjusted GHGe factor manually
# Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Manual_Adjust_Factors.csv"))
# Manual_Factor['Land Use (m^2)'] = 0
# Manual_Factor.head()

### Update Correct Uom for Preps

In [25]:
# Update prep list with manually adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [26]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [27]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-17305,2022 Hummus prep,1600.0,g,N,1600.0,g
1,P-16793,2022 Pulled Pork Prep,6.0,Kg,Y,6000.0,g
2,P-18380,2023 Babaganoush Prep,750.0,g,N,750.0,g
3,P-18458,2023 Fresh burger Patty prep,2.6,Kg,Y,2600.0,g
4,P-18575,2023 Wings Hot sauce prep,1650.0,g,Y,1650.0,g


In [28]:
Preps.shape

(24, 7)

In [29]:
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [30]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [31]:
New_Items_Added.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


### Import List of New Items with Emission Factors Category ID Assigned

In [33]:
frames = [Items_Assigned, New_Items_Added]
Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [34]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N


In [35]:
Items_Assigned_Updated.shape

(456, 8)

In [36]:
# Double brackets used to specify the column as a dataframe and not a series
# converting the 'CategoryID' column in the Items_Assigned_Updated dataframe to numeric data type using the pd.to_numeric()
# use the apply column to apply the coversion to the entire column
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [37]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N


In [38]:
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [39]:
# DEFINITION OF MAPPING: assigning certain attributes to data points based on criteria or predefined rules.

# Map GHG footprint factors
# merges two data frames Items_Assigned_Updated and ghge_factors on the columns CategoryID and Category ID, respectively. The 
# resulting data frame is stored in mapping.

# how='left' specifies that a left join is done between Items_Assigned_Updated and ghge_factors.
# left_on='CategoryID' specifies that the join condition for Items_Assigned_Updated should be based on the 'CategoryID' column.
# right_on='Category ID' specifies that the join condition for ghge_factors should be based on the 'Category ID' column.

mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
# if row in mapping has CategoryId == NaN then it assigns the value of the column Active Total Supply Chain Emissions (kg CO2 / kg food)
# to zero
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# drops the columns Category ID and Food Category
mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,0.3942
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,0.0
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,0.4306
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,0.622
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,0.0
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N,1.5225
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N,1.6042
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N,1.6042
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N,1.6042
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N,1.5225


In [40]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,0.3942,2.7
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,0.0,0.0
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,0.4306,2.7
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,0.622,7.9
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,0.0,6.75
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N,1.5225,14.8
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N,1.6042,5.9
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N,1.6042,5.9
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N,1.6042,5.9
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N,1.5225,14.8


In [41]:
# Map land footprint factors
mapping = pd.merge(mapping, land_factors.loc[:,['Category ID','Food Category','Land Use (m^2)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Land Use (m^2)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,0.3942,2.7,0.00042
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,0.0,0.0,0.0
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,0.4306,2.7,0.00071
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,0.622,7.9,0.0023
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,0.0,6.75,0.0
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N,1.5225,14.8,0.00489
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N,1.6042,5.9,0.00961
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N,1.6042,5.9,0.00961
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N,1.6042,5.9,0.00961
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N,1.5225,14.8,0.00489


In [42]:
# Map water footprint factors
# mapping: DataFrame that is used to assign sustainability-related factors (greenhouse gas emissions, nitrogen loss, and 
# water footprint) to food items based on their category and other attributes. 
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,0.3942,2.7,0.00042,37.4,1345.5
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,0.0,0.0,0.0,1.0,1.0
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,0.4306,2.7,0.00071,3.5,4.7
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,0.622,7.9,0.0023,54.5,2483.4
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,0.0,6.75,0.0,20.225,1134.925
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N,1.5225,14.8,0.00489,419.2,12821.7
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N,1.6042,5.9,0.00961,0.0,0.0
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N,1.6042,5.9,0.00961,0.0,0.0
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N,1.6042,5.9,0.00961,0.0,0.0
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N,1.5225,14.8,0.00489,419.2,12821.7


In [43]:
mapping[mapping["ItemId"] == "I-1874"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
16,I-1874,38,GARLIC WHOLE PEELED,200.0,g,30.0,oz,N,0.3062,7.9,0.00032,9.9,37.9


In [44]:
mapping["CategoryID"].isnull().sum()

0

### Manully Adjust Footprint Factor for Specific Items

In [45]:
# # For Manual_Factor: dataframe with items that adjusted GHGe factor manually.
# # It takes the id for Manual_Factor for the index being iterated and if it is equal to item id of mapping dataframe then it sets
# # the values of the columns to the manually adjusted values
# # Note: the values for the columns in mapping DataFrame is adjusted and not Manual_Factor DataFrame
# for index, row in Manual_Factor.iterrows():
#     itemId = Manual_Factor.loc[index, 'ItemId']
#     ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
#     nitro = Manual_Factor.loc[index, 'g N lost/kg product']
#     water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
#     land = Manual_Factor.loc[index, 'Land Use (m^2)']
#     str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
#     mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
#     mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
#     mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
#     mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water
#     mapping.loc[mapping['ItemId'] == itemId, 'Land Use (m^2)'] = land

In [46]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                  int64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Land Use (m^2)                                            float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [47]:
mapping.shape

(456, 13)

In [48]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,0.3942,2.7,0.00042,37.4,1345.5
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,0.0,0.0,0.0,1.0,1.0
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,0.4306,2.7,0.00071,3.5,4.7
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,0.622,7.9,0.0023,54.5,2483.4
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,0.0,6.75,0.0,20.225,1134.925
5,I-13308,24,"TORTILLA 12"" FLOUR PRESSED",1.0,ea,1.0,ea,N,1.5225,14.8,0.00489,419.2,12821.7
6,I-15514,16,VEG BACON VEGAN FZN,0.5,slice,1.0,slice,N,1.6042,5.9,0.00961,0.0,0.0
7,I-16699,16,CHICKEN TENDER Vegan,100.0,g,1.0,g,N,1.6042,5.9,0.00961,0.0,0.0
8,I-17976,16,SO-Cheese Vegan Parm Shred,20.0,g,1.0,g,N,1.6042,5.9,0.00961,0.0,0.0
9,I-4082,24,bread bun slider brioche,3.0,ea,1.0,ea,N,1.5225,14.8,0.00489,419.2,12821.7


In [49]:
ingredients = pd.read_csv("data/preprocessed/AMS_data/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Recipe
0,I-1971,0.25,ea,R-17284
1,I-8228,1.0,fl oz,R-17284
2,I-15803,1.0,can,R-17284
3,I-5505,0.25,HEAD,R-18292
4,I-8667,2.0,fl oz,R-18292
5,I-11706,1.0,ea,R-18292
6,I-13308,1.0,ea,R-18292
7,I-15514,0.5,slice,R-18292
8,I-16699,100.0,g,R-18292
9,I-16780,1.5,fl oz,R-18292


In [50]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list:
        absent_list.append(item)

# absent_list contains the IngredientIds that are not present in mapping but are present in the ingredients DataFrame
print(absent_list)

['I-11706', 'I-16780', 'I-14715', 'I-15427', 'I-13956', 'P-14372', 'P-15606', 'I-16221', 'I-16572', 'I-6243', 'I-17039', 'I-17040', 'I-9013', 'I-13414', 'I-17486', 'I-16570', 'I-16574', 'I-16575', 'I-12266', 'I-17316', 'I-16787', 'P-5814', 'I-17037', 'I-11125', 'I-14905', 'I-15024', 'I-14826', 'I-16856', 'I-16748', 'P-18335', 'I-16844', 'I-17314', 'I-16223', 'I-14504', 'I-15477', 'I-9017', 'P-15013', 'I-16760', 'I-16778', 'I-17352', 'P-15419', 'P-17305', 'I-17378', 'I-17360', 'I-15051', 'I-16571', 'P-16793', 'I-15007', 'I-15008', 'I-15010', 'I-16794', 'I-16860', 'I-16784', 'I-16785', 'I-16786', 'I-16834', 'I-12209', 'I-16789', 'I-18349', 'I-16857', 'I-17350', 'I-15001', 'P-6811', 'I-15486', 'I-16855', 'P-14551', 'I-15401', 'I-16777', 'I-5816', 'P-18381', 'I-15037', 'I-16862', 'I-17013', 'I-17014', 'P-18336', 'I-17941', 'P-18380', 'P-18458', 'I-18274', 'P-18530', 'P-18052', 'I-15038', 'I-15412', 'I-14984', 'I-16994', 'I-18317', 'I-18268', 'I-14150', 'P-18321', 'P-18329', 'I-7119', 'I-14

In [52]:
# Converts mapping DataFrame to the Mapping.csv file
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)