In [27]:
import requests
import pandas as pd
import json
from stats_can import StatsCan
sc = StatsCan(data_folder='../data_sources/raw_data/')

Note: to install the stats_can library, I used Anaconda Prompt with the following command:

    conda install -c conda-forge stats_can

other documentation on this library: https://stats-can.readthedocs.io/en/latest/ 

In [5]:
# test api access with the url provided in documentation
url = 'https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/14100287/en'

params = {
}

api_result = requests.get(url=url, params=params)

api_response = api_result.json()

print(api_response)

{'status': 'SUCCESS', 'object': 'https://www150.statcan.gc.ca/n1/tbl/csv/14100287-eng.zip'}


### Retrieve time series data for monthly retail averages (MRA)
- use fulltabledownloadcsv end point with MRA table ID

In [3]:
# retrieve data for monthly average retail prices for selected products (table id # 1810024501)
url1 = 'https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/18100245/en'

params = {
}

api_result = requests.get(url=url1, params=params)

api_response = api_result.json()

print(api_response)

{'status': 'SUCCESS', 'object': 'https://www150.statcan.gc.ca/n1/tbl/csv/18100245-eng.zip'}


In [4]:
# retrieve data for farm product prices, crops and livestock (table id # 3210007701)
url2 = 'https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/32100077/en'

params = {
}

api_result = requests.get(url=url2, params=params)

api_response = api_result.json()

print(api_response)

{'status': 'SUCCESS', 'object': 'https://www150.statcan.gc.ca/n1/tbl/csv/32100077-eng.zip'}


In [None]:
# retrieve data for meat, fruits/veg
# no API resource for AgriCanada

### Attempt to download specific time period data using "getDataFromVectorsAndLatestNPeriods" endpoint

In [None]:
# retrieve changed data for specific products (target using VectorID) over a specific time period

# POST URL = 'https://www150.statcan.gc.ca/t1/wds/rest/getDataFromVectorsAndLatestNPeriods'

# POST BODY = [{"vectorId":1353834271, "latestN":3}]


### Attempt to download data using stats_can module instead of the API


In [43]:
# download same MRA table as when using API method above, using full table ID (provincial data, 2017 - 2023)
provincial_MRA = sc.table_to_df("181-002-45-01")

In [44]:
provincial_MRA.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2017-01-01,Canada,2016A000011124,"Beef stewing cuts, per kilogram",Dollars,81,units,0,v1353834271,11.1,12.66,,,,2
1,2017-01-01,Canada,2016A000011124,"Beef striploin cuts, per kilogram",Dollars,81,units,0,v1353834272,11.2,21.94,,,,2
2,2017-01-01,Canada,2016A000011124,"Beef top sirloin cuts, per kilogram",Dollars,81,units,0,v1353834273,11.3,13.44,,,,2
3,2017-01-01,Canada,2016A000011124,"Beef rib cuts, per kilogram",Dollars,81,units,0,v1353834311,11.41,20.17,,,,2
4,2017-01-01,Canada,2016A000011124,"Ground beef, per kilogram",Dollars,81,units,0,v1353834274,11.4,9.12,,,,2


In [45]:
provincial_MRA.tail()

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
99133,2023-10-01,British Columbia,2016A000259,"Sunflower seeds, 400 grams",Dollars,81,units,0,v1458870265,10.109,4.26,,,,2
99134,2023-10-01,British Columbia,2016A000259,"Deodorant, 85 grams",Dollars,81,units,0,v1353834714,10.75,7.42,,,,2
99135,2023-10-01,British Columbia,2016A000259,"Toothpaste, 100 millilitres",Dollars,81,units,0,v1353834715,10.76,4.06,,,,2
99136,2023-10-01,British Columbia,2016A000259,"Shampoo, 400 millilitres",Dollars,81,units,0,v1353834716,10.77,7.14,,,,2
99137,2023-10-01,British Columbia,2016A000259,"Laundry detergent, 4.43 litres",Dollars,81,units,0,v1458870250,10.11,16.48,,,,2


In [31]:
# download specific vector data using a vectorID from the previous table (Beef stewing cuts, per kilogram, Canada)
scdf1 = sc.vectors_to_df_remote("v1353834271", periods = 12)
scdf1

Unnamed: 0_level_0,v1353834271
refPer,Unnamed: 1_level_1
2022-11-01,19.18
2022-12-01,19.02
2023-01-01,17.52
2023-02-01,17.05
2023-03-01,17.08
2023-04-01,18.17
2023-05-01,19.02
2023-06-01,19.63
2023-07-01,19.48
2023-08-01,16.23


In [51]:
# investigate vectorID-based approach

# create list of irrelevant products
irrelevant = ['Baby food', 'Infant formula', 'Deodorant', 'Toothpaste', 'Shampoo', 'Laundry detergent', 'Paper towels', 'tissue', 'Cigarettes', 'gasoline']

# remove rows where the product is irrelevant

for term in irrelevant:
    provincial_MRA = provincial_MRA[~provincial_MRA.Products.str.contains(term)]

provincial_MRA

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2017-01-01,Canada,2016A000011124,"Beef stewing cuts, per kilogram",Dollars,81,units,0,v1353834271,11.1,12.66,,,,2
1,2017-01-01,Canada,2016A000011124,"Beef striploin cuts, per kilogram",Dollars,81,units,0,v1353834272,11.2,21.94,,,,2
2,2017-01-01,Canada,2016A000011124,"Beef top sirloin cuts, per kilogram",Dollars,81,units,0,v1353834273,11.3,13.44,,,,2
3,2017-01-01,Canada,2016A000011124,"Beef rib cuts, per kilogram",Dollars,81,units,0,v1353834311,11.41,20.17,,,,2
4,2017-01-01,Canada,2016A000011124,"Ground beef, per kilogram",Dollars,81,units,0,v1353834274,11.4,9.12,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99129,2023-10-01,British Columbia,2016A000259,"Pasta sauce, 650 millilitres",Dollars,81,units,0,v1458870255,10.105,3.18,,,,2
99130,2023-10-01,British Columbia,2016A000259,"Salad dressing, 475 millilitres",Dollars,81,units,0,v1458870257,10.106,3.35,,,,2
99131,2023-10-01,British Columbia,2016A000259,"Almonds, 200 grams",Dollars,81,units,0,v1458870235,10.107,4.80,,,,2
99132,2023-10-01,British Columbia,2016A000259,"Peanuts, 450 grams",Dollars,81,units,0,v1458870256,10.108,3.86,,,,2


In [56]:
# generate a list of unique products in provincial monthly retail averages table
vector_list_provincial = provincial_MRA['VECTOR'].unique().tolist()
vector_list_provincial

['v1353834271',
 'v1353834272',
 'v1353834273',
 'v1353834311',
 'v1353834274',
 'v1353834275',
 'v1353834276',
 'v1353834312',
 'v1353834277',
 'v1353834278',
 'v1353834279',
 'v1353834313',
 'v1353834280',
 'v1353834281',
 'v1458869929',
 'v1458869931',
 'v1353834314',
 'v1353834282',
 'v1458869922',
 'v1353834283',
 'v1353834284',
 'v1353834285',
 'v1458869932',
 'v1458869923',
 'v1353834286',
 'v1353834287',
 'v1458869921',
 'v1353834288',
 'v1353834289',
 'v1353834290',
 'v1353834291',
 'v1353834292',
 'v1353834293',
 'v1353834294',
 'v1353834295',
 'v1353834296',
 'v1353834315',
 'v1353834297',
 'v1353834298',
 'v1458869934',
 'v1353834299',
 'v1353834300',
 'v1353834316',
 'v1353834317',
 'v1353834301',
 'v1353834302',
 'v1353834303',
 'v1353834304',
 'v1353834305',
 'v1353834306',
 'v1353834307',
 'v1353834308',
 'v1353834318',
 'v1353834319',
 'v1353834309',
 'v1353834310',
 'v1458869933',
 'v1458869928',
 'v1353834320',
 'v1353834321',
 'v1353834322',
 'v1353834323',
 'v13538

In [57]:
# count number of items in list
len(vector_list_provincial)

1143

In [48]:
# retrieve table data for national MRA prices (1995 - 2022)
national_MRA = sc.table_to_df("18-10-0002-01")

Downloading and loading table_18100002


18100002-eng.zip: 100%|██████████| 114k/114k [00:00<00:00, 284kB/s] 


In [49]:
national_MRA.head(25)

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,1995-01-01,Canada,2016A000011124,"Round steak, 1 kilogram",Dollars,81,units,0,v735165,1.1,10.17,,,,2
1,1995-01-01,Canada,2016A000011124,"Sirloin steak, 1 kilogram",Dollars,81,units,0,v735176,1.2,11.25,,,,2
2,1995-01-01,Canada,2016A000011124,"Prime rib roast, 1 kilogram",Dollars,81,units,0,v735187,1.3,12.08,,,,2
3,1995-01-01,Canada,2016A000011124,"Blade roast, 1 kilogram",Dollars,81,units,0,v735198,1.4,7.23,,,,2
4,1995-01-01,Canada,2016A000011124,"Stewing beef, 1 kilogram",Dollars,81,units,0,v735209,1.5,7.12,,,,2
5,1995-01-01,Canada,2016A000011124,"Ground beef, 1 kilogram",Dollars,81,units,0,v735220,1.6,3.73,,,,2
6,1995-01-01,Canada,2016A000011124,"Pork chops, 1 kilogram",Dollars,81,units,0,v735221,1.7,8.36,,,,2
7,1995-01-01,Canada,2016A000011124,"Butt roast, 1 kilogram",Dollars,81,units,0,v735222,1.8,4.9,,,t,2
8,1995-01-01,Canada,2016A000011124,"Chicken, 1 kilogram",Dollars,81,units,0,v735223,1.9,3.72,,,,2
9,1995-01-01,Canada,2016A000011124,"Bacon, 500 grams",Dollars,81,units,0,v735166,1.1,3.0,,,,2


In [50]:
national_MRA.tail(25)

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
18491,2022-02-01,Canada,2016A000011124,"Carrots, 1 kilogram",Dollars,81,units,0,v735189,1.31,2.46,,,,2
18492,2022-02-01,Canada,2016A000011124,"Mushrooms, 1 kilogram",Dollars,81,units,0,v735191,1.33,10.42,,,,2
18493,2022-02-01,Canada,2016A000011124,"Onions, 1 kilogram",Dollars,81,units,0,v735192,1.34,2.37,,,,2
18494,2022-02-01,Canada,2016A000011124,"Potatoes, 4.54 kilograms",Dollars,81,units,0,v735193,1.35,10.33,,,,2
18495,2022-02-01,Canada,2016A000011124,"French fried potatoes, frozen, 1 kilogram",Dollars,81,units,0,v735194,1.36,3.43,,,,2
18496,2022-02-01,Canada,2016A000011124,"Baked beans, canned, 398 millilitres",Dollars,81,units,0,v735195,1.37,1.51,,,,2
18497,2022-02-01,Canada,2016A000011124,"Tomatoes, canned, 796 millilitres",Dollars,81,units,0,v735196,1.38,1.62,,,,2
18498,2022-02-01,Canada,2016A000011124,"Tomato juice, 1.36 litres",Dollars,81,units,0,v735197,1.39,2.94,,,,2
18499,2022-02-01,Canada,2016A000011124,"Ketchup, 1 litre",Dollars,81,units,0,v735199,1.4,4.17,,,,2
18500,2022-02-01,Canada,2016A000011124,"Sugar, white, 2 kilograms",Dollars,81,units,0,v735200,1.41,2.67,,,,2


In [52]:
# remove rows where the product is irrelevant
for term in irrelevant:
    national_MRA = national_MRA[~national_MRA.Products.str.contains(term)]

national_MRA

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,1995-01-01,Canada,2016A000011124,"Round steak, 1 kilogram",Dollars,81,units,0,v735165,1.1,10.17,,,,2
1,1995-01-01,Canada,2016A000011124,"Sirloin steak, 1 kilogram",Dollars,81,units,0,v735176,1.2,11.25,,,,2
2,1995-01-01,Canada,2016A000011124,"Prime rib roast, 1 kilogram",Dollars,81,units,0,v735187,1.3,12.08,,,,2
3,1995-01-01,Canada,2016A000011124,"Blade roast, 1 kilogram",Dollars,81,units,0,v735198,1.4,7.23,,,,2
4,1995-01-01,Canada,2016A000011124,"Stewing beef, 1 kilogram",Dollars,81,units,0,v735209,1.5,7.12,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18502,2022-02-01,Canada,2016A000011124,"Coffee, instant, 200 grams",Dollars,81,units,0,v735202,1.43,7.10,,,,2
18503,2022-02-01,Canada,2016A000011124,Tea (72 bags),Dollars,81,units,0,v735203,1.44,4.49,,,,2
18504,2022-02-01,Canada,2016A000011124,"Cooking or salad oil, 1 litre",Dollars,81,units,0,v735204,1.45,4.45,,,,2
18505,2022-02-01,Canada,2016A000011124,"Soup, canned, 284 millilitres",Dollars,81,units,0,v735205,1.46,1.28,,,,2


In [53]:
# generate a list of unique products in national monthly retail averages table
vector_list_national = national_MRA['VECTOR'].unique().tolist()
vector_list_national

['v735165',
 'v735176',
 'v735187',
 'v735198',
 'v735209',
 'v735220',
 'v735221',
 'v735222',
 'v735223',
 'v735166',
 'v735167',
 'v735168',
 'v735169',
 'v735170',
 'v735171',
 'v735172',
 'v735173',
 'v735174',
 'v735175',
 'v735177',
 'v735178',
 'v735179',
 'v735180',
 'v735181',
 'v735182',
 'v735183',
 'v735184',
 'v735185',
 'v735186',
 'v735188',
 'v735189',
 'v735190',
 'v735191',
 'v735192',
 'v735193',
 'v735194',
 'v735195',
 'v735196',
 'v735197',
 'v735199',
 'v735200',
 'v735201',
 'v735202',
 'v735203',
 'v735204',
 'v735205',
 'v735207',
 'v735208',
 'v735210',
 'v735211',
 'v123342195',
 'v123342196']

In [54]:
# count number of unique vector IDs
len(vector_list_national)

52

In [58]:
# check for like terms between provincial and national vector lists
res = len(set(vector_list_provincial) & set(vector_list_national))
print(f"Number of common vector IDs : {res}")

Number of common vector IDs : 0


In [41]:
# get all available records for provincial data
viddf = sc.vectors_to_df_remote(vector_list_provincial, periods=12)

In [42]:
viddf

Unnamed: 0_level_0,v1159446976,v1159446977,v1159446978,v1159446979,v1159446980,v1159446981,v1159446982,v1159446983,v1159446984,v1159446985,...,v1458870258,v1458870259,v1458870260,v1458870261,v1458870262,v1458870263,v1458870264,v1458870265,v1458870266,v1458870267
refPer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-01,16.78,22.44,14.52,9.69,7.1,8.97,5.89,14.77,8.03,6.76,...,4.89,27.77,3.94,7.37,4.18,3.79,7.09,4.35,3.01,9.04
2022-12-01,14.83,18.62,14.48,10.28,8.56,9.81,6.43,13.44,7.85,7.13,...,4.58,30.26,4.08,8.13,4.17,4.45,7.23,4.45,2.8,9.22
2023-01-01,16.44,29.17,15.28,9.99,8.46,8.54,5.65,13.51,7.61,6.68,...,4.77,30.34,4.36,7.88,4.14,4.28,5.96,4.4,2.65,9.52
2023-02-01,16.46,21.31,17.31,9.86,7.73,8.45,6.49,13.57,8.71,6.64,...,4.76,29.14,4.24,7.5,4.12,4.26,6.1,4.47,3.14,9.28
2023-03-01,15.43,23.49,16.57,10.09,8.13,8.79,6.59,13.74,8.15,6.0,...,4.88,28.01,4.33,7.68,4.18,4.8,5.62,4.59,3.01,9.59
2023-04-01,18.02,26.04,20.88,10.2,8.34,8.81,6.02,15.23,7.9,6.69,...,5.02,28.18,4.52,7.77,4.46,4.98,5.91,4.65,3.1,9.51
2023-05-01,19.08,24.57,19.77,11.02,8.7,8.94,6.21,14.48,9.36,6.57,...,5.01,27.06,4.6,7.81,4.21,5.07,5.19,4.51,2.79,9.91
2023-06-01,19.39,26.4,21.76,11.09,7.7,9.34,6.44,14.51,9.06,6.71,...,4.9,28.14,4.55,7.75,4.47,5.52,5.13,4.21,2.75,9.53
2023-07-01,18.58,37.21,20.14,11.34,8.24,7.94,6.63,15.21,8.98,6.69,...,4.88,27.06,4.64,7.82,4.32,4.99,4.7,4.4,2.85,9.7
2023-08-01,19.93,41.8,25.94,12.06,8.61,8.88,6.04,14.31,8.31,6.82,...,4.71,22.24,4.75,7.65,4.15,4.54,5.11,4.27,2.59,9.68


In [60]:
sc.downloaded_tables

['18100002', '18100245']

Ideas about what functions/features the tool will have:
- predict cost fluctuations in food products
    - Clarify: what LOD? Category or individual products? (individual products would not be a comprehensive resource; not all ingredients are available with StatsCan database)
    - If prediction at the categorical level:
        - users would input their menu items, including ingredients
            - if the user has an ingredient not listed in our database, they can simply select its category
        - we will use Stats Canada retail database to predict monthly price average for its selected products
        
        - our tool would break down their menu into weighted average by category (30% wheat products, 25% protein, etc)

- two stages of analysis:
    - time-series prediction:
        - use historic product price data to predict fluctuations in monthly average cost
    - farm product price data:
        - track changes in supplier sale prices
        - if suppliers start selling at higher/lower price points, that percentage change will also be applied to our tool's predicted product price
