In [2]:
import pandas as pd
# never forget: https://www.lfd.uci.edu/~gohlke/pythonlibs/#fiona
import geopandas as gpd
import requests
pd.set_option('display.max_columns',None)

from zipfile import ZipFile
import tempfile
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from collections import defaultdict
from copy import deepcopy
import pickle

## Get FAF5 data

In [None]:
# zip = ZipFile(target_path)
# with tempfile.TemporaryDirectory() as tmpdirname:
#     zip.extractall(path=tmpdirname)
#     faf5_regions = gpd.read_file(Path(tmpdirname) / 'Freight_Analysis_Framework_(FAF5)_Regions.shp')
#     zip.close()

In [3]:
target_path = 'C:/Users/nicholas.padon/Downloads/FAF5.5.1_2018-2022.zip'
zip = ZipFile(target_path)
with tempfile.TemporaryDirectory() as tmpdirname:
    zip.extractall(path=tmpdirname)
    target_csvs = [f for f in zip.namelist() if f.endswith('csv')]
    faf_pdf = pd.read_csv(zip.open(target_csvs[0]))
    faf_zone_pdf = pd.read_excel(zip.open('FAF5_metadata.xlsx'),sheet_name="FAF Zone (Domestic)")
    zip.close()

In [4]:
print(f"Records in FAF5 data: {len(faf_pdf):,}")

Records in FAF5 data: 2,311,632


In [5]:
faf_mode_map = {
    1:  'Truck',
    2:	'Rail',
    3:	'Water',
    4:	'Air (include truck-air)',
    5:	'Multiple modes & mail',
    6:	'Pipeline',
    7:	'Other and unknown',
    8:	'No domestic mode',
}

In [6]:
is_air_freight = faf_pdf['dms_mode'] == 4
is_pipeline_freight = faf_pdf['dms_mode'] == 6
is_no_modal_freight = faf_pdf['dms_mode'] == 8 # dms_mode = 8 is for petroledum only (sctg2=16)

In [7]:
relevant_faf_pdf = faf_pdf.loc[~(is_air_freight | is_pipeline_freight | is_no_modal_freight)]

In [8]:
relevant_faf_pdf.loc[~relevant_faf_pdf['fr_orig'].isna()]

Unnamed: 0,fr_orig,dms_orig,dms_dest,fr_dest,fr_inmode,dms_mode,fr_outmode,sctg2,trade_type,dist_band,tons_2018,tons_2019,tons_2020,tons_2021,tons_2022,value_2018,value_2019,value_2020,value_2021,value_2022,current_value_2018,current_value_2019,current_value_2020,current_value_2021,current_value_2022,tmiles_2018,tmiles_2019,tmiles_2020,tmiles_2021,tmiles_2022
522752,801.0,20,20,,1.0,1,,1,2,2,0.250764,0.082484,0.188380,0.150455,0.137360,0.725848,0.199822,0.616593,0.563763,0.343908,0.704324,0.190190,0.538303,0.619535,0.465685,0.051156,0.016827,0.038430,0.030693,0.028022
522753,801.0,61,61,,4.0,1,,1,2,1,0.002533,0.002989,0.005145,0.003592,0.003463,0.115759,0.144741,0.264535,0.179402,0.173543,0.112326,0.137764,0.230947,0.197150,0.234995,0.000087,0.000103,0.000177,0.000124,0.000119
522755,801.0,61,63,,4.0,1,,1,2,2,0.000439,0.000518,0.000891,0.000622,0.000600,0.020567,0.025717,0.047001,0.031875,0.030834,0.019957,0.024477,0.041033,0.035028,0.041752,0.000056,0.000066,0.000114,0.000080,0.000077
522757,801.0,61,65,,4.0,1,,1,2,2,0.000155,0.000182,0.000314,0.000219,0.000211,0.007064,0.008832,0.016142,0.010947,0.010590,0.006854,0.008406,0.014093,0.012030,0.014340,0.000037,0.000044,0.000076,0.000053,0.000051
522758,801.0,61,69,,4.0,1,,1,2,2,0.000651,0.000768,0.001323,0.000923,0.000890,0.029742,0.037189,0.067968,0.046094,0.044589,0.028860,0.035396,0.059338,0.050654,0.060378,0.000152,0.000179,0.000308,0.000215,0.000207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1442313,808.0,531,539,,3.0,5,,43,2,2,0.002878,0.003261,0.002540,0.004571,0.005726,0.013477,0.016394,0.014413,0.030046,0.025781,0.013636,0.016831,0.012193,0.027840,0.034511,0.000592,0.000671,0.000522,0.000940,0.001178
1442314,808.0,531,560,,3.0,5,,43,2,6,0.000000,0.000000,0.000000,0.008335,0.000000,0.000000,0.000000,0.000000,0.054780,0.000000,0.000000,0.000000,0.000000,0.050758,0.000000,0.000000,0.000000,0.000000,0.008956,0.000000
1442315,808.0,532,532,,7.0,7,,43,2,1,0.002990,0.003130,0.003649,0.003756,0.004246,0.008988,0.009407,0.010968,0.011290,0.012762,0.009094,0.009658,0.009278,0.010461,0.017084,0.000160,0.000168,0.000196,0.000201,0.000228
1442316,808.0,539,539,,7.0,7,,43,2,3,0.351808,0.368205,0.429298,0.441919,0.499551,1.057441,1.106729,1.290355,1.328292,1.501520,1.069964,1.136235,1.091621,1.230771,2.009978,0.101850,0.106598,0.124284,0.127938,0.144623


In [10]:
relevant_faf_pdf['dms_orig'].isna().value_counts()

dms_orig
False    1648303
Name: count, dtype: int64

In [9]:
relevant_faf_pdf['dms_dest'].isna().value_counts()

dms_dest
False    1648303
Name: count, dtype: int64