# US Seaport Volumes

---
#### (for the top 10 port / 80% of all TEUs)

Note: TEU means "Twenty-foot [Container] Equivalent Unit"

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re
from json import loads, JSONDecodeError
from io import BytesIO

from time import sleep
from joblib import Memory

from urllib.request import urlopen, Request
from urllib.error import HTTPError

from tabula import read_pdf

from google.google import search as googleSearch

In [2]:
# https://www.oaklandseaport.com/performance/facts-figures/

df_oakland = pd.read_excel("https://www.oaklandseaport.com/files/PDF/Oakland%20Monthly%20TEUs%20July%201997-2019.xlsx",
                           skiprows=5, index_col=0, sheet_name=None)

labels = [['Full', 'Full', 'Full', 'Empty', 'Empty', 'Empty', 'All'],
          ['Inbound', 'Outbound', 'Total', 'Inbound', 'Outbound', 'Total', 'Total']]

df_oak_combined = pd.concat(
   v.iloc[:-1] for v in df_oakland.values()
)

df_oak_combined.columns = map(lambda v: "-".join([*v]), zip(*labels))

df_oak_combined['dt'] = df_oak_combined.index.map(
    lambda v: pd.to_datetime(str(v), format="%Y-%m-%d 00:00:00", errors='coerce')
)

df_oak_combined = df_oak_combined.iloc[6:]

df_oak_combined = df_oak_combined.set_index('dt').resample('2M').mean()

## Port of Oakland

In [3]:
def doChart(df, location='Oakland', metric='Full-Total'):
    #metric = 'Full-Total'

    return alt.Chart(df.reset_index()[['dt', metric]].dropna()).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('{}:Q'.format(metric), axis=alt.Axis(title='Volume [TEUs]')),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
    ).properties(
        title='Port of {} {} Container Volume'.format(location, metric),
        width=750,
        height=400
    )

doChart(df_oak_combined)

In [4]:
def doYoyChart(df, location='Oakland', metric='Full-Total'):
    df_yoy = df.reset_index()[['dt', metric]].dropna().set_index('dt').pct_change(12).apply(lambda v: v * 100).reset_index()

    return alt.Chart(df_yoy).mark_bar(width=2, color='blue').encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('{}:Q'.format(metric), axis=alt.Axis(title='Year-over-Year Volume Growth [%]')),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
    ).properties(
        title='Port of {} {} Container Volume Growth'.format(location, metric),
        width=750,
        height=400
    )

doYoyChart(df_oak_combined)

In [5]:
doChart(df_oak_combined, 'Empty-Total')

In [6]:
doYoyChart(df_oak_combined, 'Empty-Total')

In [7]:
doChart(df_oak_combined, 'All-Total')

In [8]:
doYoyChart(df_oak_combined, 'All-Total')

## Port of Long Beach

In [9]:
# http://www.polb.com/economics/stats/teus_archive.asp

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

req = Request("https://thehelm.polb.com/stellar_custom_table/table71/", data=None, headers={ 'User-Agent': uaString })

data = urlopen(req)

df_lb_raw = pd.read_html(BytesIO(data.read()), displayed_only=False)[0].iloc[:-1]

df_lb_raw = df_lb_raw.iloc[:, 1:]

df_lb_raw[df_lb_raw.columns[1:]] = df_lb_raw[df_lb_raw.columns[1:]].apply(pd.to_numeric)

df_lb_raw['dt'] = df_lb_raw['Date'].map(lambda v: pd.to_datetime(v, format='%b %Y'))

#df_lb_raw.head()

In [10]:
doYoyChart(df_lb_raw, location='Long Beach', metric='Total Loaded')

In [11]:
doYoyChart(df_lb_raw, location='Long Beach', metric='Total')

In [12]:
doYoyChart(df_lb_raw, location='Long Beach', metric='Empties')

In [13]:
doYoyChart(df_lb_raw, location='Long Beach', metric='Loaded Inbound')

## Port of Los Angeles

In [23]:
# Port of Los Angeles
# https://www.portoflosangeles.org/business/statistics/container-statistics

# 1995
# https://www.portoflosangeles.org/business/statistics/container-statistics/historical-teu-statistics-2019

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

memory = Memory('data/', verbose=0)

@memory.cache
def fetchLaData(u, year, cols, args = { 'skiprows': 1 }):
 
    req = Request(u, data=None, headers={ 'User-Agent': uaString })

    data = urlopen(req)

    tmp = pd.read_html(BytesIO(data.read()), **args)[0]
    try:
        tmp.columns = ['Month'] + cols
    except ValueError as e:
        #print(tmp.head())
        print(e)
        
    tmp['dt'] = tmp['Month'].apply(lambda v: pd.to_datetime("{}-{}".format(v, year), format="%B-%Y", errors='coerce'))
    
    return tmp

colsPre96 = ['Loaded Imports', 'Empty Imports', 'Total Imports', 
        'Loaded Export', 'Empty Exports', 'Total TEUs', 'Prior Year Change']

cols = ['Loaded Imports', 'Empty Imports', 'Total Imports', 
        'Loaded Export', 'Empty Exports', 'Total Exports', 'Total TEUs', 
        'Prior Year Change']

args = {
    'skiprows': 1
}

urlFor = lambda y: "https://www.portoflosangeles.org/business/statistics/container-statistics/historical-teu-statistics-{}".format(y)

dfs_la = []
for y in range(1995, 2021):
    url = urlFor(y)
    #print(url)
    if y < 1996:
        dfs_la.append(fetchLaData(url, y, colsPre96, args))
    else:
        dfs_la.append(fetchLaData(url, y, cols, {}))

df_la = pd.concat(dfs_la).dropna() #.tail()

df_la[cols[:1]] = df_la[cols[:1]].apply(pd.to_numeric)

for c in df_la.columns:
    if c in ['Month', 'dt', 'Prior Year Change']:
        continue
    df_la[c] = df_la[c].apply(float)

#df_la.head()

In [15]:
doYoyChart(df_la, location='Los Angeles', metric='Loaded Imports')

In [25]:
doYoyChart(df_la, location='Los Angeles', metric='Loaded Export')

In [26]:
doYoyChart(df_la, location='Los Angeles', metric='Total Imports')

In [27]:
doYoyChart(df_la, location='Los Angeles', metric='Total TEUs')

## Port of New York / New Jersey

In [None]:
dataUrl = "https://www.panynj.gov/content/port/en.model.json"

#jqPath = '\'.":children"."/port/en/our-port/facts-and-figures".":items"."root".":items"."responsivegrid".":items"."accordionlist_435572874".":items"\''
    
def doNynjFetch(retries = 5):
    if retries < 0:
        return loads("{}")
    try:
        req = Request(dataUrl, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        dict_data = loads(data.read())

        r = dict_data[":children"]\
                     ["/port/en/our-port/facts-and-figures"]\
                     [":items"]\
                     ["root"]\
                     [":items"]\
                     ["responsivegrid"]\
                     [":items"]\
                     ["accordionlist_435572874"]\
                     [":items"]
        
        return r
    except JSONDecodeError:
        sleep(1)
        return doNynjFetch(retries-1)

nynj_dict_content = doNynjFetch()
    
post2011Cols = ['Import Loads', 'Import Empties', 'Export Loads', 'Export Empties', 'Total Loads', 'Total Empties', 'Total TEUs', 'Total Rail Lifts']
cols = ["Import TEUs", "Export TEUs", "Total TEUs", "Total Rail Lifts"]

dfs_nynj = []
for v in nynj_dict_content.values():
    year = v["linkAriaLabel"]
    #print(year)
    try:
        tmp = pd.read_html(v[":items"]["text"]["text"])
    except KeyError:
        # text_copy
        tmp = pd.read_html(v[":items"]["text_copy"]["text"])
    for n, monthData in enumerate(tmp):
        # skip the YTD section
        #if n < 2:
        #    continue
        
        mtmp = monthData.copy()
        try:
            mtmp.columns = ['Period'] + post2011Cols
            
            # cleanup types before doing math
            mtmp[post2011Cols] = mtmp[post2011Cols].applymap(lambda v: re.sub("[^0-9.]", "", str(v)))
            mtmp[post2011Cols] = mtmp[post2011Cols].apply(lambda v: pd.to_numeric(v, errors='coerce'))
            
            mtmp['Import TEUs'] = mtmp['Import Loads'] + mtmp['Import Empties']
            mtmp['Export TEUs'] = mtmp['Export Loads'] + mtmp['Export Empties']
            
            # Monthy is shown Dec->Jan after 2014, opposite before
            index = 13-n if int(year) > 2014 else n
            mtmp['dt'] = mtmp.iloc[:, 0].map(lambda v: pd.to_datetime("{}-{}".format(index, v), format="%m-%Y", errors='coerce'))
        except ValueError:
            mtmp.columns = ['Period'] + cols
            mtmp['dt'] = mtmp.iloc[:, 0].map(lambda v: pd.to_datetime(v, format="%B %y", errors='coerce'))
        
            mtmp[cols] = mtmp[cols].applymap(lambda v: re.sub("[^0-9.]", "", str(v)))
            mtmp[cols] = mtmp[cols].apply(lambda v: pd.to_numeric(v, errors='coerce'))
        
        dfs_nynj.append(mtmp)

commonFields = ['Period', 'dt', 'Import TEUs', 'Export TEUs', 'Total TEUs']

df_nynj = pd.concat(dfs_nynj)[commonFields].dropna() #.tail(30)

df_nynj = df_nynj.groupby('dt').agg('mean') #.head()

In [None]:
doChart(df_nynj.reset_index(), location="New York/New Jersey", metric="Import TEUs")

In [None]:
doYoyChart(df_nynj.reset_index(), location="New York/New Jersey", metric="Import TEUs")

In [None]:
doYoyChart(df_nynj.reset_index(), location="New York/New Jersey", metric="Export TEUs")

In [None]:
doYoyChart(df_nynj[12:].reset_index(), location="New York/New Jersey", metric="Total TEUs")

## Georgia Ports

In [None]:
cols = ['Year', 'Key', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']

@memory.cache
def doGaFetch(url, retries=5):
    if retries < 0:
        return None
    try:
        req = Request(url, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        tabs = read_pdf(BytesIO(data.read()), pages='all', pandas_options={ 'names': cols })

        # noop to test for the table
        tabs[0]
        
        return tabs
    except KeyError:
        sleep(1)
        return doGaFetch(url, retries-1)
    
tabs = doGaFetch("http://gaports.com/wp-content/uploads/2020/02/Monthly-TEU-Throughput-January-2020.pdf")

# Read the first table in the report
df_ga = tabs[0].iloc[1:19, 1:]

df_ga_melted = df_ga.melt(id_vars='Key')
df_ga_melted = df_ga_melted[df_ga_melted['variable'] != 'Total']
df_ga_melted['dt'] = pd.to_datetime(df_ga_melted['Key'] + '-' + df_ga_melted['variable'], format='%Y-%b')
df_ga_melted['value'] = df_ga_melted['value'].apply(lambda v: pd.to_numeric(str(v).replace(",", ""), errors='coerce'))

df_ga_tst = df_ga_melted[['value', 'dt']].set_index('dt').resample('1M').mean().rename({'value': 'TEU Total'}, axis=1)

df_ga_tst = df_ga_tst.reset_index()

In [None]:
doYoyChart(df_ga_tst, location='Georgia', metric='TEU Total')

In [None]:
# Read the detail table in the rest of the report

df_ga_detail = pd.concat([tabs[0].iloc[19:, :].copy()] + tabs[1:])
df_ga_detail['Year'] = df_ga_detail['Year'].ffill(limit=11)

#df_ga_detail.head(20)

df_ga_detail_melted = df_ga_detail.melt(id_vars=["Key", "Year"])

df_ga_detail_melted = df_ga_detail_melted[df_ga_detail_melted["variable"] != "Total"]

df_ga_detail_melted['dt'] = pd.to_datetime(df_ga_detail_melted['Year'].apply(str) +\
                                           '-' + df_ga_detail_melted['variable'], format="%Y.0-%b", errors='coerce')

df_ga_detail_melted['value'] = df_ga_detail_melted['value']\
                                    .apply(lambda v: str(v).replace(",", ""))\
                                    .apply(lambda v: pd.to_numeric(v, errors='coerce'))

#df_ga_detail_melted.tail()

df_ga_ts = df_ga_detail_melted[['dt', 'Key', 'value']].dropna().pivot(index='dt', columns='Key').reset_index()
df_ga_ts.columns = ['dt', 'Export Empty', 'Export Full', 'Export Total',
                    'Import Empty', 'Import Full', 'Import Total', 'Total All', 'Total Empty', 'Total Full']

df_ga_ts = df_ga_ts.dropna()

In [None]:
doYoyChart(df_ga_ts, location='Georgia', metric='Import Empty')

In [None]:
doYoyChart(df_ga_ts, location='Georgia', metric='Export Total')

## Northwest Seaport Alliance (Ports of Seattle and Tacoma)

In [None]:
pageCount = 7

res = []
for page in range(1, pageCount+1):
    res += googleSearch("site:nwseaportalliance.com volume history", page)

links = [r.link for r in res if 'volume' in r.name.lower() and 'history' in r.name.lower()]

@memory.cache
def fetchAndParseNwSeaportReport(url, retries=5):
    if retries < 0:
        return None
    try:
        tmp = read_pdf(url, pages='all')[0]

        #print(tmp.iloc[:, 0].map(lambda v: re.sub("[\d,]", "", str(v)).strip()).values)
        if tmp.iloc[0, 1] == "Int'l Import full TEUs":
            #print('one')
            tmp.columns = ['Key'] + tmp.iloc[0, 1:].apply(lambda v: str(v).replace("\r", " ").strip()).to_list()

            tmp = tmp.iloc[1:].set_index("Key").T
        
        elif "Break Bulk" in tmp.iloc[:, 0].map(lambda v: re.sub("[\d,]", "", str(v)).strip()).values:

            if pd.isnull(tmp.iloc[0, 0]) and pd.isnull(tmp.iloc[0, 1]):
                #print('two')
                tmp = tmp.iloc[1:, :]
                
                tmp.columns = ['Key'] + tmp.iloc[0, 1:].apply(lambda v: str(v).strip()).to_list()

                tmp = tmp.set_index("Key").iloc[1:].T
                
            elif pd.isnull(tmp.iloc[0, 0]) and not pd.isnull(tmp.iloc[0, 1]):
                #print('twotwo')
                tmp.columns = ['Key'] + tmp.iloc[0, 1:].apply(lambda v: str(v).strip()).to_list()

                tmp = tmp.set_index("Key").iloc[1:].T
            
            else:
                #print('three')
                tmp.columns = ['Key'] + tmp.iloc[0, 1:].apply(lambda v: str(v).strip()).to_list()

                tmp = tmp.set_index("Key").iloc[1:].T
            
        else:
            #print('four')
            tmp.columns = ['Key'] + tmp.iloc[1, 1:].apply(lambda v: str(v).strip()).to_list()

            tmp = tmp.set_index("Key").iloc[2:, 1:].T
        
        tmp.columns = tmp.columns.map(lambda v: re.sub("[\d,]", "", str(v)).strip())
        
        tmp = tmp.rename({ "Break Bulk": "Breakbulk" }, axis=1)
        
        if 'Grain' not in tmp.columns:
            tmp['Grain'] = [''] * tmp.shape[0]
            
        if 'Gypsum' not in tmp.columns:
            tmp['Gypsum'] = [''] * tmp.shape[0]
            
        if 'Vessel Calls' not in tmp.columns:
            tmp['Vessel Calls'] = [''] * tmp.shape[0]
            
        tmp['src'] = [url] * tmp.shape[0]
        return tmp
    except HTTPError:
        print("Failed: " + url)
        return fetchAndParseNwSeaportReport(url, retries-1)
    except KeyError:
        return fetchAndParseNwSeaportReport(url, retries-1)

In [None]:
%%time
%%capture

sea_dfs = []
for u in links:
    sea_dfs.append(fetchAndParseNwSeaportReport(u))

sea_df = pd.concat(sea_dfs)

sea_df['dt'] = sea_df.index.map(lambda v: pd.to_datetime("12-31-" + str(v), format='%m-%d-%Y', errors='coerce'))

sea_df.columns = sea_df.columns.map(lambda s: s.replace("'", ""))
sea_df[sea_df.columns.to_list()[:-3]] = sea_df[sea_df.columns.to_list()[:-3]]\
        .applymap(lambda v: pd.to_numeric(str(v).replace(",", ""), errors='coerce'))

sea_df = sea_df[sea_df['dt'].notna()]
sea_df = sea_df.groupby('dt').agg('mean').reset_index()

sea_df = sea_df.set_index('dt').resample('1M').mean().bfill(limit=11).reset_index()

In [None]:
doChart(sea_df, location="Seattle/Tacoma", metric="Intl Export full TEUs")

In [None]:
doChart(sea_df, location="Seattle/Tacoma", metric="Intl Empty TEUs")

## Port of Houston

In [None]:

req = Request("https://porthouston.com/wp-content/uploads/Container-Volume-TEU-stats-in-depth-January-2020.pdf",
              data=None, headers={ 'User-Agent': uaString })

data = urlopen(req)

cols = ["Date", "Loaded Imports", "Loaded Exports", "Loaded Total",
        "Empty Imports", "Empty Exports", "Empty Total", "Loaded and Empty Total"]

dfs_hs = read_pdf(BytesIO(data.read()), pages='all', pandas_options={ 'names': cols }) #.head(20)

df_hs = pd.concat([dfs_hs[0].iloc[5:]] + dfs_hs[1:])

df_hs[df_hs.columns.to_list()[1:]] = df_hs[df_hs.columns.to_list()[1:]]\
        .applymap(lambda v: pd.to_numeric(v.replace(",", "") if isinstance(v, str) else v, errors='coerce'))

df_hs['dt'] = df_hs['Date'].map(lambda v: pd.to_datetime(str(v), format='%b-%y', errors='coerce'))

#df_hs.tail()

In [None]:
doChart(df_hs, metric="Empty Imports", location="Houston")

In [None]:
doChart(df_hs, metric="Empty Exports", location="Houston")

In [None]:
doYoyChart(df_hs, metric="Empty Imports", location="Houston")

In [None]:
doYoyChart(df_hs, metric="Loaded Total", location="Houston")

## Port of Charleston

In [None]:
req = Request("http://scspa.com/wp-content/uploads/gl078-pc-teu-history.pdf",
              data=None, headers={ 'User-Agent': uaString })

data = urlopen(req)

dfs_ch = read_pdf(BytesIO(data.read()), pages='all') #, pandas_options={ 'skiprows': 1 }) #'names': cols })

def fixColumns(df):
    res = df.iloc[1:].copy()

    # split rail column 
    _ = df.iloc[1:, 1].str.split(" ", n = 1, expand = True) 

    res["RAIL-DRAYS"]= _[0] 

    res["RAIL-RATIO"]= _[1] 

    res = pd.concat([df.iloc[1:, 0], res.iloc[:, 2:]], axis=1)

    # split rail column 
    _ = df.iloc[1:, 2].str.split(" ", n = 1, expand = True) 

    res["PIER-LOADED-EXPORT"]= _[0] 

    res["PIER-LOADED-IMPORT"]= _[1] 

    res = pd.concat([df.iloc[1:, 0], res.iloc[:, 2:]], axis=1)

    # split pier column 
    _ = df.iloc[1:, 3].str.split(" ", n = 2, expand = True) 

    res["PIER-EMPTY-EXPORT"]= _[0]

    res["PIER-EMPTY-IMPORT"]= _[1]

    res["PIER-TOTAL"]= _[2] 

    res = pd.concat([df.iloc[1:, 0], res.iloc[:, 2:]], axis=1)

    # split teu column 
    _ = df.iloc[1:, 7].str.split(" ", n = 1, expand = True) 

    res["TEU-EMPTY-EXPORT"]= _[0]

    res["TEU-EMPTY-IMPORT"]= _[1]

    res = pd.concat([df.iloc[1:, 0], df.iloc[1:, 4:6], res.iloc[:, 8:], df.iloc[1:, -1]], axis=1)

    # fix remaining column labels
    res.columns = ['DATE', 'TEU-LOADED-EXPORT', 'TEU-LOADED-IMPORT'] + res.columns.to_list()[3:-1] + ["TEU-TOTAL"]

    # convert all columns to numeric
    res[res.columns.to_list()[1:]] = res[res.columns.to_list()[1:]]\
            .applymap(lambda v: pd.to_numeric(v.replace(",", "") if isinstance(v, str) else v, errors='coerce'))

    res['dt'] = res['DATE'].apply(lambda v: pd.to_datetime(v, format="%B, %Y", errors='coerce'))

    return res

df_ch = pd.concat([fixColumns(d) for d in dfs_ch])

In [None]:
doChart(df_ch, metric='TEU-EMPTY-EXPORT', location='Charleston')

In [None]:
doChart(df_ch, metric='TEU-LOADED-IMPORT', location='Charleston')

In [None]:
doChart(df_ch, metric='TEU-TOTAL', location='Charleston')

In [None]:
doYoyChart(df_ch, metric='TEU-LOADED-EXPORT', location='Charleston')

In [None]:
doYoyChart(df_ch, metric='TEU-LOADED-IMPORT', location='Charleston')

In [None]:
doYoyChart(df_ch, metric='TEU-TOTAL', location='Charleston')

## Port of Virginia

In [None]:
df_va = pd.read_excel("http://www.portofvirginia.com/excel/Port_of_Virginia_Statistics.xlsx", skiprows=1)
df_va.columns = ['Month', 'Key'] + df_va.columns.to_list()[2:]

df_va = df_va.iloc[:71]
df_va = df_va.dropna(how='all')
df_va['Month'] = df_va['Month'].ffill(limit=4)

#df_va.head(20)

df_va_melted = df_va.melt(id_vars=['Month', 'Key'])
df_va_melted['dt'] = pd.to_datetime(df_va_melted['Month'] + ' ' + df_va_melted['variable'].apply(str), format="%b %Y")

df_va_clean = df_va_melted.pivot(index='dt', columns='Key', values='value')

In [None]:
doChart(df_va_clean, location='Virgina', metric='Import Empties')

In [None]:
doChart(df_va_clean, location='Virgina', metric='Export Empties')

In [None]:
doChart(df_va_clean, location='Virgina', metric='Import Loads')

In [None]:
doChart(df_va_clean, location='Virgina', metric='Export Loads')

In [None]:
doChart(df_va_clean, location='Virgina', metric='Total TEUs')

In [None]:
doYoyChart(df_va_clean, location='Virgina', metric='Import Loads')

In [None]:
doYoyChart(df_va_clean, location='Virgina', metric='Export Loads')

In [None]:
doYoyChart(df_va_clean, location='Virgina', metric='Total TEUs')