![](ubc_header.png)

# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang
****

## Set up and Import Libraries

In [1]:
#install libraries if needed
#!pip3 install pdpipe
#!pip install watermark

In [2]:
import numpy as np
import pandas as pd
import pdpipe as pdp
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest

In [3]:
#set the root path, change the directory into the project folder
os.chdir("/Users/silvia/cffs-label")

In [4]:
#enable reading data in the scrolling window 
pd.set_option("display.max_rows", None, "display.max_columns", None)

## Load Data Files

### Set Data File Path

In [5]:
#selecting data file path for the chosen venue ('Open Kitchen', 'Mercante')
filepath_list = glob.glob(os.path.join(os.getcwd(), "data", "raw", "Open Kitchen","*.oc"))

In [6]:
filepath_list

['/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06182021_0938.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_0918.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06182021_0918.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_1141.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06182021_1001.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_1155.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06182021_0927.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_0956.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_1202.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_1111.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06182021_0933.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR_Export_06232021_1150.oc',
 '/Users/silvia/cffs-label/data/raw/Open Kitchen/IPR

### Import Items List

In [7]:
ItemId = []
Description = []
CaseQty = []
CaseUOM = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/items.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for item in xtree.iterfind('Item'):
            ItemId.append(item.attrib['id'])
            Description.append(item.findtext('Description'))
            CaseQty.append(item.findtext('CaseQty'))
            CaseUOM.append(item.findtext('CaseUOM'))
            PakQty.append(item.findtext('PakQty'))
            PakUOM.append(item.findtext('PakUOM'))
            InventoryGroup.append(item.findtext('InventoryGroup'))

        
Items = pd.DataFrame({'ItemId': ItemId, 'Description': Description, 'CaseQty': CaseQty, 
                      'CaseUOM': CaseUOM, 'PakQty': PakQty, 'PakUOM': PakUOM, 'InventoryGroup': InventoryGroup}
                    ).drop_duplicates()

Items.reset_index(drop=True, inplace=True)

In [8]:
Items

Unnamed: 0,ItemId,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4271,APPLES GRANNY SMITH,113.0,ea,1.0,CT,PRODUCE
1,I-4971,ARTICHOKE 1/4 SALAD CUT TFC,6.0,LG CAN,2.5,Kg,PRODUCE
2,I-2305,BACON PANCETTA,1.0,Kg,1.0,Kg,MEAT
3,I-1207,BAGUETTE FRENCH,24.0,each,1.0,CT,BREAD
4,I-17203,BALSAMIC GLAZE,2.0,bottle,2.0,L,FOOD - GROCERY
5,I-3141,BASE VEG CONC LIQ G/FREE,4.0,pak,946.0,ml,FOOD - GROCERY
6,I-3619,BAY LEAF WHL SHAKER TFC,8.0,each,84.0,g,SPICES
7,I-59287,BEEF GRD MUSH/ONION BLEND,5.0,Kg,1.0,Kg,MEAT
8,I-4521,BEETS BC,25.0,lb,1.0,lb,PRODUCE
9,I-13050,BEETS GOLDEN UBC,1.0,lb,1.0,lb,PRODUCE


In [9]:
Items.shape

(579, 7)

In [10]:
Items.dtypes

ItemId            object
Description       object
CaseQty           object
CaseUOM           object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [11]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Items_List.csv")
Items.to_csv(path, index = False, header = True)

### Import Ingredients List

In [12]:
IngredientId = []
Conversion = []
InvFactor = []
Qty = []
Recipe = []
Uom = []

for filepath in filepath_list:
    path = filepath + '/Ingredients.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Ingredient'):
            IngredientId.append(x.attrib['ingredient'])
            Conversion.append(x.attrib['conversion'])
            InvFactor.append(x.attrib['invFactor'])
            Qty.append(x.attrib['qty'])
            Recipe.append(x.attrib['recipe'])
            Uom.append(x.attrib['uom'])
    
Ingredients = pd.DataFrame({'IngredientId': IngredientId, 'Qty': Qty,'Uom': Uom, 'Conversion': Conversion, 
                      'InvFactor': InvFactor,'Recipe': Recipe}).drop_duplicates()

Ingredients.reset_index(drop=True, inplace=True)

In [13]:
Ingredients

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe
0,P-18746,1.0,Kg,1.0,1.0,P-10241
1,I-3388,1.0,L,1.0,0.3058,P-10496
2,I-4660,2.27,Kg,2.20462,0.6942,P-10496
3,I-3451,2.56,L,1.0,1.28,P-13933
4,I-4679,1.0,BUNCH,1.0,0.0063,P-18318
5,I-4793,10.0,Kg,2.20462,1.2048,P-18746
6,I-3643,225.0,g,0.001,0.1837,P-18907
7,I-6026,1000.0,g,1.0,0.8163,P-18907
8,I-6807,2.27,Kg,2.20462,1.4188,P-19175
9,I-4626,10.0,CT,1.0,2.6316,P-21077


In [14]:
Ingredients.shape

(5240, 6)

In [15]:
Ingredients.dtypes

IngredientId    object
Qty             object
Uom             object
Conversion      object
InvFactor       object
Recipe          object
dtype: object

In [16]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Ingredients_List.csv")
Ingredients.to_csv(path, index = False, header = True)

### Import Preps List

In [17]:
PrepId = []
Description = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/Preps.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prep'):
            PrepId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            PakQty.append(x.findtext('PakQty'))
            PakUOM.append(x.findtext('PakUOM'))
            InventoryGroup.append(x.findtext('InventoryGroup'))
    
Preps = pd.DataFrame({'PrepId': PrepId, 'Description': Description,
                  'PakQty': PakQty, 'PakUOM':PakUOM, 'InventoryGroup': InventoryGroup}).drop_duplicates()

Preps.reset_index(drop=True, inplace=True)

In [18]:
Preps

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup
0,P-55516,BAKED|Lasagna|Spin Mushroom,5.55,Kg,
1,P-54666,BAKED|Pasta|Chicken Alfredo,6.176,Kg,
2,P-54664,BAKED|Pasta|Chorizo Penne,7.36,Kg,
3,P-56502,BAKED|Pasta|Shrimp Pesto,5.76,Kg,
4,P-56433,BATCH|Shrimp Remoulade,1.6,Kg,
5,P-50500,CHIFFONADE|Basil,190.0,g,
6,P-24750,CHOPPED|Cilantro,0.5,Kg,
7,P-53735,CHOPPED|pickle,900.0,g,
8,P-58450,COOKED|Bahn Mi Chicken,8.5,Kg,
9,P-26068,COOKED|Caramelized Onion,1.2,Kg,PREP


In [19]:
Preps.shape

(742, 5)

In [20]:
Preps.dtypes

PrepId            object
Description       object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [21]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Preps_List.csv")
Preps.to_csv(path, index = False, header = True)

### Import Products List

In [22]:
ProdId = []
Description = []
SalesGroup = []

for filepath in filepath_list:
    path = filepath + '/Products.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prod'):
            ProdId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            SalesGroup.append(x.findtext('SalesGroup'))
        
Products = pd.DataFrame({'ProdId': ProdId, 'Description': Description, 'SalesGroup': SalesGroup}).drop_duplicates()

Products.reset_index(drop=True, inplace=True)

In [23]:
Products

Unnamed: 0,ProdId,Description,SalesGroup
0,R-61778,ALF|Flatbread|4 Cheese,OK - AL FORNO
1,R-61780,ALF|Flatbread|Apple & Pancetta,OK - AL FORNO
2,R-61749,ALF|Flatbread|BBQ Chicken,OK - AL FORNO
3,R-50859,ALF|Flatbread|Bruschetta,OK - AL FORNO
4,R-50788,ALF|Flatbread|Caprese,OK - AL FORNO
5,R-61781,ALF|Flatbread|Chick Artichoke,OK - AL FORNO
6,R-50858,ALF|Flatbread|Greek,OK - AL FORNO
7,R-61498,ALF|Flatbread|Hawaii Five-0,OK - AL FORNO
8,R-50562,ALF|Flatbread|Italian,OK - AL FORNO
9,R-56337,ALF|Flatbread|Mediterranean,OK - AL FORNO


In [24]:
Products.shape

(441, 3)

In [25]:
Products.dtypes

ProdId         object
Description    object
SalesGroup     object
dtype: object

In [26]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Products_List.csv")
Products.to_csv(path, index = False, header = True)

### Import Conversions List

In [27]:
ConversionId = []
Multiplier = []
ConvertFromQty = []
ConvertFromUom = []
ConvertToQty = []
ConvertToUom = []

for filepath in filepath_list:
    path = filepath + '/Conversions.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Conversion'):
            ConversionId.append(x.attrib['id'])
            Multiplier.append(x.attrib['multiplier'])
            ConvertFromQty.append(x.find('ConvertFrom').attrib['qty'])
            ConvertFromUom.append(x.find('ConvertFrom').attrib['uom'])
            ConvertToQty.append(x.find('ConvertTo').attrib['qty'])
            ConvertToUom.append(x.find('ConvertTo').attrib['uom'])
    
    
Conversions = pd.DataFrame({'ConversionId': ConversionId, 'Multiplier': Multiplier, 'ConvertFromQty': ConvertFromQty,
                           'ConvertFromUom': ConvertFromUom, 'ConvertToQty': ConvertToQty, 'ConvertToUom': ConvertToUom}
                          ).drop_duplicates()

Conversions.reset_index(drop=True, inplace=True)

In [28]:
Conversions

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,,1.0,1.0,XXX,1.0,L
1,,0.87719298,1.0,1.14L,1.14,L
2,,0.66666667,1.0,1.5L,1.5,L
3,,0.57142857,1.0,1.75 L,1.75,L
4,,0.5,1.0,2L,2.0,L
5,,0.25,1.0,4L,4.0,L
6,,0.08333333,1.0,FOOT,12.0,INCH
7,,0.0625,1.0,16L,16.0,L
8,,0.0591716,1.0,1/2LTR,16.9,fl oz
9,,0.03937008,1.0,750ML,25.4,fl oz


In [29]:
Conversions.shape

(294, 6)

In [30]:
Conversions.dtypes

ConversionId      object
Multiplier        object
ConvertFromQty    object
ConvertFromUom    object
ConvertToQty      object
ConvertToUom      object
dtype: object

In [31]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Conversions_List.csv")
Conversions.to_csv(path, index = False, header = True)