## CSX Weekly Freight Reports

---

In [1]:
%%time
%%capture

from tabula import read_pdf
import pandas as pd
import altair as alt
import numpy as np

import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.support.ui import WebDriverWait
from urllib.request import urlopen, Request
from urllib.parse import urlencode
from json import dumps, loads
from joblib import Memory

chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")
    
# Note: deprecated
def fetchVolumeReportUrls(page=1):
    baseUrl = 'https://www.csx.com/index.cfm/investors/volume-trends/?page={}'
    
    try:
        driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
                                  chrome_options=chrome_options)
    except WebDriverException:
        driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver',
                          chrome_options=chrome_options)
        
    driver.get(baseUrl.format(page))
        
    pageString = driver.page_source
    
    driver.close()
    
    # <a href="http://csx.gcs-web.com/static-files/02ae9432-408a-4537-97c2-d0e0d9716e1b" target="_blank" class="new_window">2019 Week 27 AAR</a>
    return re.findall("\<a href=\"([^\"]*)\" target=\"_blank\" class=\"new_window\"\>([^<]*)", pageString)

memory = Memory('data/', verbose=0)

@memory.cache
def parseCsxReport(urlAndWeek):
    try:
        url, weekAndYear = urlAndWeek
    
        # '2013 Week 29'
        year, week = weekAndYear.split(' Week ')
        
    except ValueError:
        try:
            url, weekAndYear = urlAndWeek
    
            # '2013 Week 29'
            year, week = weekAndYear.split('-WK')
        except ValueError:
            print(urlAndWeek)
            return None
    
    #            WEEK   28                QUARTER TO DATE           YEAR TO DATE
    baseCols = ['Cargo', 'Week {0} {1}', 'Week {0} {2}', 'Week {0} pct-change',
                'Week {0} {1} QTD', 'Week {0} {2} QTD', 'Week {0} QTD pct-change',
                'Week {0} {1} YTD', 'Week {0} {2} YTD', 'Week {0} YTD pct-change']

    try:
        cols = [c.format(week.split(' ')[0], int(year), int(year)-1) for c in baseCols]
    except ValueError:
        print(urlAndWeek)
        return None
    
    try:
        tmp = read_pdf(url, pages='all', pandas_options={ 'names': cols })
        df = pd.concat(tmp) if isinstance(tmp, list) else tmp
        return df.iloc[1:, :].set_index('Cargo').applymap(lambda v: float(re.sub('[^0-9.]', '', v)))
    except AttributeError:
        print(urlAndWeek)
        return None
    except ValueError:
        print(urlAndWeek)
        return None
        
#parseReport(('http://csx.gcs-web.com/static-files/f2baf981-9658-41f8-9ec0-400e9fdbde11', '2013 Week 29')).head(30)

#parseReport(('http://csx.gcs-web.com/static-files/2d4f737e-1314-46aa-9e8c-a29b40be2a2b', '2019 Week 49 AAR')).head(30)

def getWeekFrom(u):
    return re.findall("(?:Week-|WK|Week_|w|week)(\d\d?)", u)[0]

#getWeekFrom('https://s2.q4cdn.com/859568992/files/doc_downloads/volume_trends/2014/CSX_AAR-2014-WK20_csx.pdf')

def getUrlsAndWeeks(res):
    urls = []
    for item in res['GetContentAssetListResult']:
        u = item['FilePath']

        year = 2020 if "2020" in u else u.split("/")[-2]
        try:
            week = getWeekFrom(u)
            yearAndWeek = f"{year} Week {week}"
            urls.append((u, yearAndWeek))
        except IndexError:
            continue
            
    return urls

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

reportListUrl = "https://investors.csx.com/Services/ContentAssetService.svc/GetContentAssetList"

params = lambda y: {
    "serviceDto": {
        "ViewType": "2",
        "ViewDate": "",
        "RevisionNumber": "1",
        "LanguageId": "1",
        "Signature": "",
        "ItemCount": -1,
        "StartIndex": 0,
        "TagList": [],
        "IncludeTags": True
    },
    "assetType": "Volume Trends",
    "excludeSelection": 1,
    "year": y
}

headers = {
    'User-Agent': uaString,
    'Content-Type': 'application/json; charset=UTF-8'
}

urls = []
for y in range(2014, 2021):
    
    req = Request(reportListUrl, data=dumps(params(y)).encode('ascii'), headers=headers)

    data = urlopen(req)
    
    res_dict = loads(data.read())
    
    urls = urls + getUrlsAndWeeks(res_dict)

dfs = map(parseCsxReport, urls)

merged = pd.concat(dfs, axis=1, sort=False).T

#merged.head()

subset = merged.filter(axis='index', regex='[0-9]$').copy()

subset['dt'] = subset.index.map(lambda v: pd.to_datetime(v.replace(' AAR', '') + ' 6', format='Week %W %Y %w'))

CPU times: user 1.45 s, sys: 139 ms, total: 1.59 s
Wall time: 5.05 s


In [2]:
def doChart(metric, df, color='grey'):
    pmp = df[['dt', metric]].sort_values('dt').copy()
    pmp2 = pmp.set_index('dt')[metric].resample('M').mean().reset_index().dropna()
    pmp2['Change'] = pmp2[metric].pct_change(12).apply(lambda v: 100 * v)

    c = alt.Chart(pmp2).mark_bar(color=color).encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('Change:Q', axis=alt.Axis(title='52-Week Growth [%]')),
        tooltip=[alt.Tooltip('dt:T', format='%B %Y', title='Period'), alt.Tooltip('Change:Q')]
    ).properties(
        title='CSX Freight Reports: {}'.format(metric),
        height=450,
        width=750,
        background='white'
    )
    
    return c.display()
    
doChart('Primary Metal Products', subset)

In [3]:
metric = 'Lumber & Wood Products'
doChart(metric, subset, color='tan')

In [4]:
metric = 'Total Traffic'
doChart(metric, subset, color='navy')

In [5]:
metric = 'Total Carloads'
doChart(metric, subset, color='navy')

In [6]:
metric = 'Total Intermodal'
doChart(metric, subset, color='navy')

In [7]:
metric = 'Automotive (Motor Vehicles & Equip.)'
metric = 'Motor Vehicles and Parts'
doChart(metric, subset, color='purple')

In [8]:
metric = 'Containers'
doChart(metric, subset, color='navy')

In [9]:
metric = 'Coal'
doChart(metric, subset, color='black')

In [10]:
metric = 'Chemicals'
doChart(metric, subset, color='orange')

In [11]:
metric = 'Food Products'
doChart(metric, subset, color='green')

In [12]:
metric = 'Farm Products (excl. Grain)'
doChart(metric, subset, color='brown')

In [13]:
metric = 'Grain'
doChart(metric, subset, color='tan')

In [14]:
metric = 'Grain Mill Products'
doChart(metric, subset, color='tan')

In [15]:
metric = 'Grain Mill Products'
doChart(metric, subset, color='black')