![](ubc_header.png)

# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang
****

## Set up and Import Libraries

In [1]:
#install libraries if needed
#!pip3 install pdpipe
#!pip install watermark

In [2]:
import numpy as np
import pandas as pd
import pdpipe as pdp
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest

In [3]:
#set the root path, change the directory into the project folder
os.chdir("/Users/silvia/cffs-label")

In [4]:
#enable reading data in the scrolling window 
pd.set_option("display.max_rows", None, "display.max_columns", None)

## Load Data Files

### Set Data File Path

In [5]:
#selecting data file path for the chosen venue and time range
filepath_list = glob.glob(os.path.join(os.getcwd(), "data", "raw", "Harvest 21","*.oc"))
filepath_list

['/Users/silvia/cffs-label/data/raw/Harvest 21/OK Oct 22 Request.oc']

### Import Items List

In [6]:
#Read items .xml files in the filepath_list and construct a dataframe
ItemId = []
Description = []
CaseQty = []
CaseUOM = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/items.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for item in xtree.iterfind('Item'):
            ItemId.append(item.attrib['id'])
            Description.append(item.findtext('Description'))
            CaseQty.append(item.findtext('CaseQty'))
            CaseUOM.append(item.findtext('CaseUOM'))
            PakQty.append(item.findtext('PakQty'))
            PakUOM.append(item.findtext('PakUOM'))
            InventoryGroup.append(item.findtext('InventoryGroup'))

        
Items = pd.DataFrame({'ItemId': ItemId, 'Description': Description, 'CaseQty': CaseQty, 
                      'CaseUOM': CaseUOM, 'PakQty': PakQty, 'PakUOM': PakUOM, 'InventoryGroup': InventoryGroup}
                    ).drop_duplicates()

Items.reset_index(drop=True, inplace=True)

In [7]:
Items

Unnamed: 0,ItemId,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4472,AVOCADO MX,20.0,CT,1.0,CT,PRODUCE
1,I-27410,BACON 3MM NATURALLY SMKD,5.0,Kg,1.0,Kg,MEAT
2,I-17203,BALSAMIC GLAZE,2.0,bottle,2.0,L,FOOD - GROCERY
3,I-3619,BAY LEAF WHL SHAKER TFC,8.0,each,84.0,g,SPICES
4,I-3149,BEANS TURTLE BLACK,6.0,LG CAN,2.84,L,FOOD - GROCERY
5,I-4557,CABBAGE RED(50 LBS) BC,1.0,lb,1.0,CT,PRODUCE
6,I-4589,CARROTS JUMBO BC,50.0,lb,1.0,lb,PRODUCE
7,I-7953,CHEESE BLUE CRUMBLED,2.0,BLOCK,2.0,Kg,DAIRY
8,I-14181,CHEESE CHED SLICED 14G,24.0,pak,18.0,slice,DAIRY
9,I-51962,CHEESE CURD POUTINE BULK,5.0,bag,2000.0,g,DAIRY


In [8]:
Items.shape

(78, 7)

In [9]:
Items.dtypes

ItemId            object
Description       object
CaseQty           object
CaseUOM           object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [10]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Items_List.csv")
Items.to_csv(path, index = False, header = True)

### Import Ingredients List

In [11]:
#Read ingredients .xml files in the filepath_list and construct a dataframe
IngredientId = []
Conversion = []
InvFactor = []
Qty = []
Recipe = []
Uom = []

for filepath in filepath_list:
    path = filepath + '/Ingredients.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Ingredient'):
            IngredientId.append(x.attrib['ingredient'])
            Conversion.append(x.attrib['conversion'])
            InvFactor.append(x.attrib['invFactor'])
            Qty.append(x.attrib['qty'])
            Recipe.append(x.attrib['recipe'])
            Uom.append(x.attrib['uom'])
    
Ingredients = pd.DataFrame({'IngredientId': IngredientId, 'Qty': Qty,'Uom': Uom, 'Conversion': Conversion, 
                      'InvFactor': InvFactor,'Recipe': Recipe}).drop_duplicates()

Ingredients.reset_index(drop=True, inplace=True)

In [12]:
Ingredients

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe
0,I-11842,2.0,LOAF,1.0,0.8818,P-13648
1,I-3388,60.0,ml,0.001,26.455,P-13648
2,I-3642,5.0,g,1.0,2.2046,P-13648
3,I-6006,60.0,ml,0.001,26.455,P-13648
4,I-6026,5.0,g,1.0,2.2046,P-13648
5,I-3451,2.56,L,1.0,1.28,P-13933
6,I-4793,10.0,Kg,2.20462,1.2048,P-18746
7,I-3643,225.0,g,0.001,0.1837,P-18907
8,I-6026,1000.0,g,1.0,0.8163,P-18907
9,I-4698,10.0,Kg,4.0,1.3514,P-20239


In [13]:
Ingredients.shape

(203, 6)

In [14]:
Ingredients.dtypes

IngredientId    object
Qty             object
Uom             object
Conversion      object
InvFactor       object
Recipe          object
dtype: object

In [15]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Ingredients_List.csv")
Ingredients.to_csv(path, index = False, header = True)

### Import Preps List

In [16]:
#Read preps .xml files in the filepath_list and construct a dataframe
PrepId = []
Description = []
PakQty = []
PakUOM = []
InventoryGroup = []

for filepath in filepath_list:
    path = filepath + '/Preps.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prep'):
            PrepId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            PakQty.append(x.findtext('PakQty'))
            PakUOM.append(x.findtext('PakUOM'))
            InventoryGroup.append(x.findtext('InventoryGroup'))
    
Preps = pd.DataFrame({'PrepId': PrepId, 'Description': Description,
                  'PakQty': PakQty, 'PakUOM':PakUOM, 'InventoryGroup': InventoryGroup}).drop_duplicates()

Preps.reset_index(drop=True, inplace=True)

In [17]:
Preps

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup
0,P-26047,BOILED|Hard Boiled Eggs FT,50.0,ea,PREP
1,P-58362,BRINED|Turkey|Breast,9.0,Kg,PREP
2,P-34084,COOKED|Bacon,20.0,g,PREP
3,P-55141,COOKED|Charred Corn,2.0,Kg,
4,P-48870,COOKED|Chicken Stirfry,6.3,Kg,
5,P-51134,COOKED|Hard Boiled Egg,2.0,un,
6,P-46546,CRUMBLED|Feta,2.9,Kg,
7,P-43962,"DICED|Cucumber|1/4""",3.3,Kg,PREP
8,P-38554,"DICED|Cucumber|3/4""",3.3,Kg,PREP
9,P-9765,"DICED|Red Pepper|1/4""",1.0,Kg,


In [18]:
Preps.shape

(43, 5)

In [19]:
Preps.dtypes

PrepId            object
Description       object
PakQty            object
PakUOM            object
InventoryGroup    object
dtype: object

In [20]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Preps_List.csv")
Preps.to_csv(path, index = False, header = True)

### Import Products List

In [21]:
#Read products .xml files in the filepath_list and construct a dataframe
ProdId = []
Description = []
SalesGroup = []

for filepath in filepath_list:
    path = filepath + '/Products.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Prod'):
            ProdId.append(x.attrib['id'])
            Description.append(x.findtext('Description'))
            SalesGroup.append(x.findtext('SalesGroup'))
        
Products = pd.DataFrame({'ProdId': ProdId, 'Description': Description, 'SalesGroup': SalesGroup}).drop_duplicates()

Products.reset_index(drop=True, inplace=True)

In [22]:
Products

Unnamed: 0,ProdId,Description,SalesGroup
0,R-58265,DIM SUM|Chicken Gyoza (3),OK - DIM SUM
1,R-63876,DIMSUM|Shrimp & Pork Wonton,OK - DIM SUM
2,R-22618,GRL|Add Cheese|1 slice,FT GRILL DAY
3,R-28293,GRL|Crispy Chicken Strips,OK - GRILL KITCHEN DAY
4,R-35015,GRL|French Fries,OK - GRILL KITCHEN DAY
5,R-54875,GRL|Poutine|Original,OK - GRILL KITCHEN DAY
6,R-55142,GRL|Salad|Baja,OK - GRILL KITCHEN DAY
7,R-34864,GRL|Salad|Chef,OK - GRILL KITCHEN DAY
8,R-60537,GRL|Salad|Chef's Side,OK - GRILL KITCHEN DAY
9,R-50574,GRL|Salad|Cobb,OK - GRILL BRUNCH


In [23]:
Products.shape

(18, 3)

In [24]:
Products.dtypes

ProdId         object
Description    object
SalesGroup     object
dtype: object

In [25]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Products_List.csv")
Products.to_csv(path, index = False, header = True)

### Import Conversions List

In [26]:
#Read conventions .xml files in the filepath_list and construct a dataframe
ConversionId = []
Multiplier = []
ConvertFromQty = []
ConvertFromUom = []
ConvertToQty = []
ConvertToUom = []

for filepath in filepath_list:
    path = filepath + '/Conversions.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for x in xtree.iterfind('Conversion'):
            ConversionId.append(x.attrib['id'])
            Multiplier.append(x.attrib['multiplier'])
            ConvertFromQty.append(x.find('ConvertFrom').attrib['qty'])
            ConvertFromUom.append(x.find('ConvertFrom').attrib['uom'])
            ConvertToQty.append(x.find('ConvertTo').attrib['qty'])
            ConvertToUom.append(x.find('ConvertTo').attrib['uom'])
    
    
Conversions = pd.DataFrame({'ConversionId': ConversionId, 'Multiplier': Multiplier, 'ConvertFromQty': ConvertFromQty,
                           'ConvertFromUom': ConvertFromUom, 'ConvertToQty': ConvertToQty, 'ConvertToUom': ConvertToUom}
                          ).drop_duplicates()

Conversions.reset_index(drop=True, inplace=True)

In [27]:
Conversions

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,,1.0,1.0,XXX,1.0,L
1,,0.87719298,1.0,1.14L,1.14,L
2,,0.66666667,1.0,1.5L,1.5,L
3,,0.57142857,1.0,1.75 L,1.75,L
4,,0.5,1.0,2L,2.0,L
5,,0.25,1.0,4L,4.0,L
6,,0.08333333,1.0,FOOT,12.0,INCH
7,,0.0625,1.0,16L,16.0,L
8,,0.0591716,1.0,1/2LTR,16.9,fl oz
9,,0.03937008,1.0,750ML,25.4,fl oz


In [28]:
Conversions.shape

(88, 6)

In [29]:
Conversions.dtypes

ConversionId      object
Multiplier        object
ConvertFromQty    object
ConvertFromUom    object
ConvertToQty      object
ConvertToUom      object
dtype: object

In [30]:
path = os.path.join(os.getcwd(), "data", "preprocessed", "Conversions_List.csv")
Conversions.to_csv(path, index = False, header = True)