![](ubc_header.png)

# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang
****

## Set up and Import Libraries

In [1]:
#install libraries if needed
#!pip3 install pdpipe
#!pip install watermark

In [2]:
import numpy as np
import pandas as pd
import pdpipe as pdp
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest

In [3]:
#set the root path, change the directory into the project folder
os.chdir("/Users/silvia/cffs-label")

In [4]:
#enable reading data in the scrolling window 
pd.set_option("display.max_rows", None, "display.max_columns", None)

****

## Load Data Files

### Set Data File Path

In [5]:
#selecting data file path for the chosen venue and time range
filepath_list = glob.glob(os.path.join(os.getcwd(), "data", "raw", "OK 21-22 Sep-Dec","*.oc"))
filepath_list

['/Users/silvia/cffs-label/data/raw/OK 21-22 Sep-Dec/OK Al Forno_Custom Kitchen_Dim Sum_Global.oc',
 '/Users/silvia/cffs-label/data/raw/OK 21-22 Sep-Dec/OK Square Meal.oc',
 '/Users/silvia/cffs-label/data/raw/OK 21-22 Sep-Dec/OK Sandwich Kitchen_Sides_Soup.oc',
 '/Users/silvia/cffs-label/data/raw/OK 21-22 Sep-Dec/OK Grill Kitchen Break_Grill Kitchen Day_Grill Kitchen Features.oc']

### Import Items List

In [6]:
#Read items .xml files in the filepath_list and construct a dataframe
ItemId = []
Description = []
CaseQty = []
CaseUOM = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/items.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for item in xtree.iterfind('Item'):
            ItemId.append(item.attrib['id'])
            Description.append(item.findtext('Description'))
            CaseQty.append(item.findtext('CaseQty'))
            CaseUOM.append(item.findtext('CaseUOM'))
            PakQty.append(item.findtext('PakQty'))
            PakUOM.append(item.findtext('PakUOM'))
            InventoryGroup.append(item.findtext('InventoryGroup'))

        
Items = pd.DataFrame({'ItemId': ItemId, 'Description': Description, 'CaseQty': CaseQty, 
                      'CaseUOM': CaseUOM, 'PakQty': PakQty, 'PakUOM': PakUOM, 'InventoryGroup': InventoryGroup}
                    ).drop_duplicates()

Items.reset_index(drop=True, inplace=True)

In [7]:
Items

Unnamed: 0,ItemId,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-7631,5 SPICE POWDER,1.0,ea,1.0,lb,SPICES
1,I-4971,ARTICHOKE 1/4 SALAD CUT TFC,6.0,LG CAN,2.5,Kg,PRODUCE
2,I-4473,AVOCADO (20CT) MX,20.0,CT,1.0,HEAD,PRODUCE
3,I-4973,AVOCADO PULP CHUNKY,12.0,bag,454.0,g,PRODUCE
4,I-4496,BAK CHOY BABY BC,30.0,lb,1.0,lb,PRODUCE
5,I-17203,BALSAMIC GLAZE,2.0,bottle,2.0,L,FOOD - GROCERY
6,I-3141,BASE VEG CONC LIQ G/FREE,4.0,pak,946.0,ml,FOOD - GROCERY
7,I-3619,BAY LEAF WHL SHAKER TFC,8.0,each,84.0,g,SPICES
8,I-2567,BEAN GREEN WHL GRADE A IQF,6.0,bag,1.5,Kg,PRODUCE
9,I-3148,BEANS TURTLE BLK DRY,10.0,Kg,1.0,Kg,FOOD - GROCERY


In [8]:
Items.shape

(488, 7)

In [9]:
Items.dtypes

ItemId            object
Description       object
CaseQty           object
CaseUOM           object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [10]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Items_List.csv")
Items.to_csv(path, index = False, header = True)

### Import Ingredients List

In [11]:
#Read ingredients .xml files in the filepath_list and construct a dataframe
IngredientId = []
Conversion = []
InvFactor = []
Qty = []
Recipe = []
Uom = []

for filepath in filepath_list:
    path = filepath + '/Ingredients.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Ingredient'):
            IngredientId.append(x.attrib['ingredient'])
            Conversion.append(x.attrib['conversion'])
            InvFactor.append(x.attrib['invFactor'])
            Qty.append(x.attrib['qty'])
            Recipe.append(x.attrib['recipe'])
            Uom.append(x.attrib['uom'])
    
Ingredients = pd.DataFrame({'IngredientId': IngredientId, 'Qty': Qty,'Uom': Uom, 'Conversion': Conversion, 
                      'InvFactor': InvFactor,'Recipe': Recipe}).drop_duplicates()

Ingredients.reset_index(drop=True, inplace=True)

In [12]:
Ingredients

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe
0,I-3388,1.0,L,1.0,0.3058,P-10496
1,I-4660,2.27,Kg,2.20462,0.6942,P-10496
2,I-4598,1.0,CT,1.0,0.0013,P-12954
3,I-4679,1.0,BUNCH,1.0,0.0063,P-18318
4,I-4792,10.0,Kg,2.20462,1.2048,P-18746
5,I-3643,225.0,g,0.001,0.1837,P-18907
6,I-6026,1000.0,g,1.0,0.8163,P-18907
7,I-6807,2.27,Kg,2.20462,1.4188,P-19175
8,I-4626,10.0,CT,1.0,2.6316,P-21077
9,I-4381,1.0,lb,1.0,1.1111,P-24452


In [13]:
Ingredients.shape

(3227, 6)

In [14]:
Ingredients.dtypes

IngredientId    object
Qty             object
Uom             object
Conversion      object
InvFactor       object
Recipe          object
dtype: object

In [15]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Ingredients_List.csv")
Ingredients.to_csv(path, index = False, header = True)

### Import Preps List

In [16]:
#Read preps .xml files in the filepath_list and construct a dataframe
PrepId = []
Description = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/Preps.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prep'):
            PrepId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            PakQty.append(x.findtext('PakQty'))
            PakUOM.append(x.findtext('PakUOM'))
            InventoryGroup.append(x.findtext('InventoryGroup'))
    
Preps = pd.DataFrame({'PrepId': PrepId, 'Description': Description,
                  'PakQty': PakQty, 'PakUOM':PakUOM, 'InventoryGroup': InventoryGroup}).drop_duplicates()

Preps.reset_index(drop=True, inplace=True)

In [17]:
Preps

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup
0,P-56398,BATCH|Guacamole,2.75,Kg,PREP
1,P-24750,CHOPPED|Cilantro,0.5,Kg,
2,P-41574,COOKED|Black Beans,30.0,Kg,PREP
3,P-26068,COOKED|Caramelized Onion,1.2,Kg,PREP
4,P-28258,COOKED|Chow Mein,48.081,Kg,PREP
5,P-50795,COOKED|Corn,300.0,g,
6,P-50497,COOKED|Pasta|Spag|WW,4.0,Kg,PREP
7,P-26143,COOKED|Rice|Brown Basmati,4.536,Kg,PREP
8,P-34121,COOKED|StirFryChicken,4.0,Kg,PREP
9,P-61738,CST|Sauces,60.0,ml,


In [18]:
Preps.shape

(493, 5)

In [19]:
Preps.dtypes

PrepId            object
Description       object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [20]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Preps_List.csv")
Preps.to_csv(path, index = False, header = True)

### Import Products List

In [21]:
#Read products .xml files in the filepath_list and construct a dataframe
ProdId = []
Description = []
SalesGroup = []

for filepath in filepath_list:
    path = filepath + '/Products.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prod'):
            ProdId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            SalesGroup.append(x.findtext('SalesGroup'))
        
Products = pd.DataFrame({'ProdId': ProdId, 'Description': Description, 'SalesGroup': SalesGroup}).drop_duplicates()

Products.reset_index(drop=True, inplace=True)

In [22]:
Products

Unnamed: 0,ProdId,Description,SalesGroup
0,R-30154,ADD|Crackers,OK - CUSTOM KITCHEN
1,R-56337,ALF|Flatbread|Mediterranean,OK - AL FORNO
2,R-61779,ALF|Flatbread|Mushroom Pesto,OK - AL FORNO
3,R-50590,ALF|Flatbread|OK,OK - AL FORNO
4,R-50494,ALF|Flatbread|Proscuitto,OK - AL FORNO
5,R-61742,ALF|Flatbread|Shrimp Pesto,OK - AL FORNO
6,R-51217,BNO|Burrito|Super Tofu,OK - CUSTOM KITCHEN
7,R-37170,CST|Beef,OK - CUSTOM KITCHEN
8,R-37171,CST|Chicken,OK - CUSTOM KITCHEN
9,R-51828,CST|Fried Rice Friday|Meat,OK - CUSTOM KITCHEN


In [23]:
Products.shape

(321, 3)

In [24]:
Products.dtypes

ProdId         object
Description    object
SalesGroup     object
dtype: object

In [25]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Products_List.csv")
Products.to_csv(path, index = False, header = True)

### Import Conversions List

In [26]:
#Read conventions .xml files in the filepath_list and construct a dataframe
ConversionId = []
Multiplier = []
ConvertFromQty = []
ConvertFromUom = []
ConvertToQty = []
ConvertToUom = []

for filepath in filepath_list:
    path = filepath + '/Conversions.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Conversion'):
            ConversionId.append(x.attrib['id'])
            Multiplier.append(x.attrib['multiplier'])
            ConvertFromQty.append(x.find('ConvertFrom').attrib['qty'])
            ConvertFromUom.append(x.find('ConvertFrom').attrib['uom'])
            ConvertToQty.append(x.find('ConvertTo').attrib['qty'])
            ConvertToUom.append(x.find('ConvertTo').attrib['uom'])
    
    
Conversions = pd.DataFrame({'ConversionId': ConversionId, 'Multiplier': Multiplier, 'ConvertFromQty': ConvertFromQty,
                           'ConvertFromUom': ConvertFromUom, 'ConvertToQty': ConvertToQty, 'ConvertToUom': ConvertToUom}
                          ).drop_duplicates()

Conversions.reset_index(drop=True, inplace=True)

In [27]:
Conversions

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,,1.0,1.0,XXX,1.0,L
1,,0.87719298,1.0,1.14L,1.14,L
2,,0.66666667,1.0,1.5L,1.5,L
3,,0.57142857,1.0,1.75 L,1.75,L
4,,0.5,1.0,2L,2.0,L
5,,0.25,1.0,4L,4.0,L
6,,0.08333333,1.0,FOOT,12.0,INCH
7,,0.0625,1.0,16L,16.0,L
8,,0.0591716,1.0,1/2LTR,16.9,fl oz
9,,0.03937008,1.0,750ML,25.4,fl oz


In [28]:
Conversions.shape

(270, 6)

In [29]:
Conversions.dtypes

ConversionId      object
Multiplier        object
ConvertFromQty    object
ConvertFromUom    object
ConvertToQty      object
ConvertToUom      object
dtype: object

In [30]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Conversions_List.csv")
Conversions.to_csv(path, index = False, header = True)

***
## Data Summary

In [31]:
datasum = pd.DataFrame([Items.shape, Preps.shape, Ingredients.shape, Products.shape, Conversions.shape],
                       columns = ['count', 'columns'], 
                       index = ['Items', 'Preps', 'Ingredients', 'Products', 'Conversions'])
datasum

Unnamed: 0,count,columns
Items,488,7
Preps,493,5
Ingredients,3227,6
Products,321,3
Conversions,270,6
