# Analyzing economic indicators in California after COVID-19

### Load Python tools

In [1]:
import pandas as pd
from urllib.request import urlopen 
import os
import glob
import requests
import matplotlib
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
from sodapy import Socrata
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000

## Worker Adjustment and Retraining Notification Act (WARN) notices in CA

### Read historic WARN filings pulled from PDF posted online

In [2]:
#https://edd.ca.gov/jobs_and_Training/Layoff_Services_WARN.htm
path = 'input/'
files = glob.glob(os.path.join(path, "tabula*.csv"))

In [3]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(date=os.path.basename(f)) for f in files)

### Concatenate the CSVs into one frame and clean up fields

In [4]:
warn_past = pd.concat(file_df, ignore_index=True).drop(['date'], axis=1)

In [5]:
warn_past = warn_past.rename(columns={'no._of_employees': 'employees'})
warn_past['county'] = ''
warn_past = warn_past[['notice_date', 'effective_date', 'received_date', 'company', 'city',
       'county', 'employees', 'layoff_closure']]
warn_past['effective_date'] = warn_past['effective_date'].str.replace(' ', '')

### Read current WARN filings from Excel sheet posted online and clean up fields

In [6]:
#https://edd.ca.gov/jobs_and_Training/Layoff_Services_WARN.htm
warnurl = 'https://edd.ca.gov/jobs_and_Training/warn/WARN_Report.xlsx'

In [8]:
warn_current = pd.read_excel(warnurl, skiprows=3)

ValueError: Your version of xlrd is 2.0.1. In xlrd >= 2.0, only the xls format is supported. Install openpyxl instead.

In [None]:
warn_current = warn_current[warn_current.columns.drop(list(warn_current.filter(regex='Unnamed:')))]

In [None]:
warn_current = warn_current.drop(warn_current.index[4255:4270])
warn_current.columns = warn_current.columns.str.strip().str.lower().str.replace(' ','_')\
    .str.replace('(', '').str.replace(')', '').str.replace('/','_').str.replace('no._of_','')

### Concatenate old and new tables into single dataframe

In [None]:
warnall = pd.concat([warn_current, warn_past])

### Fix dates and remove stray characters and rows

In [None]:
warnall['county'] = warnall['county'].str.strip()
warnall['city'] = warnall['city'].str.strip()
warnall = warnall[warnall['notice_date'] != 'Summary by Month']
warnall = warnall[warnall['notice_date'] != 'Total']
warnall['notice_date'] = pd.to_datetime(warnall['notice_date'])
warnall['effective_date'] = pd.to_datetime(warnall['effective_date'])
warnall['effective_year'] = (warnall['effective_date'].dt.year).astype(str)

### Export to CSV

In [None]:
warnall.to_csv('output/warnall.csv')

### Group WARN notices by year

In [None]:
warngroup = warnall.groupby('effective_year').agg({'employees':'sum'}).reset_index()\
.rename(columns={'effective_year':'notices', 'employees':'employees_effected', 'effective_year':'year'})

In [None]:
warnall.head()

### Group WARN notices by day

In [None]:
warngroupday = warnall.groupby('notice_date').agg({'employees':'sum'}).reset_index()\
.rename(columns={'notice_date':'date', 'employees':'employees_effected', 'effective_year':'year'})

In [None]:
warngroupday.tail(10)

### How has the number of employees affected by WARN noticed changed?

In [None]:
warngroup = warngroup.drop(7)
warngroup.year = warngroup.year.str.replace('.0', '', regex=False)

In [None]:
warngroup

In [None]:
warn_chart = alt.Chart((warngroup))\
    .mark_bar(size=30)\
    .encode(
    x=alt.X('year:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%Y'),\
            scale=alt.Scale(domain=('2014-01-01', '2020-05-01'))),
    y=alt.Y('employees_effected:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=4, format=''),\
           scale=alt.Scale(domain=(1, 500000)))
).properties(width=300, height=300,
     title='Employees affected by mass layoffs'
 )

warn_chart_text_today = (
    alt.Chart((warngroup.query("employees_effected == employees_effected.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("year:T"), y=alt.Y("employees_effected:Q"), text=alt.Text("employees_effected:Q",format=','))
)

(warn_chart + warn_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

### How have WARN noticed changed in recent days? 

In [None]:
warngroupday.head()

In [None]:
warn_chart = alt.Chart((warngroupday.query("date > '01/01/2020'")))\
    .mark_bar(size=5)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y'),\
            scale=alt.Scale(domain=('2020-01-01', '2020-05-01'))),
    y=alt.Y('employees_effected:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=4, format=''),\
           scale=alt.Scale(domain=(0, 40000)))
).properties(width=700, height=300,
     title='Employees affected by mass layoffs'
 )

warn_chart_text_today = (
    alt.Chart((warngroupday.query("employees_effected == employees_effected.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("employees_effected:Q"), text=alt.Text("employees_effected:Q",format=','))
)

(warn_chart + warn_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

### Share of recent WARN notices by county

In [None]:
warn_covid = warnall[warnall['effective_date'] > '2020/03/01']

In [None]:
((warn_covid['county'].value_counts('normalize')*100).round(2).astype(str) + '%').head(10)

### Top companies issuing WARN notices statewide since March 1

In [None]:
warn_covid_companies = warn_covid.groupby(['company']).agg('sum').reset_index()
warn_covid_companies.sort_values(by='employees', ascending=False).head(10)

### Top companies issuing WARN notices in LA County since March 1

In [None]:
warn_covid_companies_la = warn_covid[warn_covid['county'] == 'Los Angeles County']\
    .groupby(['company']).agg('sum').reset_index()
warn_covid_companies_la.sort_values(by='employees', ascending=False).head(10)

In [None]:
warn_covid.head()

---

### County-by-county unemployment for use later

In [None]:
### Local Area Unemployment Statistics (LAUS)

In [None]:
# https://data.edd.ca.gov/Labor-Force-and-Unemployment-Rates/Local-Area-Unemployment-Statistics-LAUS-/e6gw-gvii

In [None]:
laus_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', laus_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("e6gw-gvii", limit=200000)
laus = pd.DataFrame.from_records(results)

In [None]:
laus.columns = laus.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [None]:
laus['date'] = pd.to_datetime(laus['date'])
laus['unemployment_rate'] = laus['unemployment_rate'].astype(float)

In [None]:
laus_state = laus[(laus['seasonally_adjusted_y_n'] == 'Y') &\
               (laus['area_type'] == 'State')]

In [None]:
laus_state.head()

In [None]:
laus_sa = laus[(laus['seasonally_adjusted_y_n'] == 'Y')]

In [None]:
laus_sa.head()

---

### Pull historic unemployment rates by county from BLS

In [None]:
formaturl = lambda x: 'https://www.bls.gov/lau/laucnty' + f'{x[0]}' + f'{x[1]}' + '.xlsx'

In [None]:
metadata = []
for d in pd.date_range(start='01/01/1990', end='12/31/2019', freq='Y'):
    metadata.append(dict(year = d.strftime('%y'), \
                         url = formaturl((d.strftime('%y')))))

In [None]:
df_list = []

for m in metadata:
    df_list.append((pd.read_excel(m['url'],skiprows=4,\
    dtype={'Code': str, 'Code.1': str, 'Code.2': str, 'Year': str,\
          'Unnamed: 5': str})).assign(date=m['year']))
    
df = pd.concat(df_list, sort=False)

In [None]:
df.rename(columns={"Code": "laus", "Code.1": "stfips", "Code.2": "ctyfips", "Year": "year", "(%)":"rate",\
                   "County Name/State Abbreviation": "county", "Force": "labor_force", "Employed": "employed",\
                    "Unemployed": "unemployed"}, inplace=True)

In [None]:
df = df.drop(['Unnamed: 5', 'date'], axis=1)

In [None]:
df = df.drop([0], axis=0)
df = df.dropna()

In [None]:
counties = pd.DataFrame(df)

In [None]:
ca_cty_annual = pd.DataFrame(counties[counties['county'].str.contains(', CA')]).reset_index()

In [None]:
ca_cty_annual['county'] = ca_cty_annual['county'].str.replace(' County, CA','')
ca_cty_annual['county'] = ca_cty_annual['county'].str.replace(' County/city, CA','')
ca_cty_annual['rate'] = ca_cty_annual['rate'] / 100

In [None]:
ca_cty_annual.tail()

### Annual unemployment rates, by California county: 1990-2019

In [None]:
ca_cty_annual_chart = alt.Chart(ca_cty_annual)\
    .mark_bar(size=3)\
    .encode(
    x=alt.X('year:T', title=' ', axis=alt.Axis(grid=False, tickCount=2, format='%Y'),\
           scale=alt.Scale(domain=('1990-01-01', '2020-01-01'))),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(0, .35)))
).properties(width=80, height=70,
    title='Annual unemployment rates, by California county: 1990-2019'
 ).facet(
    facet=alt.Facet('county:N'),
    columns=8,
    padding={"left": -15, "top": 0, "right": -0, "bottom": 0}
)

(ca_cty_annual_chart).configure_view(strokeOpacity=0)

---

### Time series data from BLS for state unemployment rate

In [None]:
# https://data.edd.ca.gov/Labor-Force-and-\
#     Unemployment-Rates/Civilian-Unemployment-Rate-for-US-and-California/x7g9-zu4h

In [None]:
rate_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', rate_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("x7g9-zu4h", limit=2000000)
unemp_rate_state = pd.DataFrame.from_records(results)

In [None]:
unemp_rate_state.columns = unemp_rate_state.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [None]:
unemp_rate_state.date = pd.to_datetime(unemp_rate_state.date)

In [None]:
unemp_rate_state.head()

In [None]:
unemp_rate_state.rename(columns={'geographic_area':'place','seasonally_adjusted':'rate'},inplace=True)

In [None]:
unemp_rate_state = unemp_rate_state.drop(['not_seasonally_adjusted', 'area_type'], axis=1)

In [None]:
unemp_rate_state.head()

In [None]:
ca_rate_chart = alt.Chart((unemp_rate_state.query("date > '01/01/1990'")))\
    .mark_line(size=3)\
    .encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(0, .15))),
    color=alt.Color('place', title=' ')
).properties(width=800, height=200,
     title='Unemployment rate in California, 1990 - March'
 )

ca_rate_chart_text_today = (
    alt.Chart((unemp_rate_state.query("rate == .122 & date == '02/01/2010'")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

ca_rate_chart_text_max = (
    alt.Chart((unemp_rate_state.query("date == '2020-03-01'")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

(ca_rate_chart + ca_rate_chart_text_max + ca_rate_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [None]:
ca_rate_chart = alt.Chart((unemp_rate_state.query("date > '01/01/2019'"))).mark_line(size=4)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(.025, .055))),
    color=alt.Color('place', title=' ')
).properties(width=800, height=200,
     title='Unemployment rate in California'
)


ca_rate_chart_text = (
    alt.Chart((unemp_rate_state.query("date == date.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

(ca_rate_chart + ca_rate_chart_text).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

---

### Unemployment claims

In [None]:
url = 'https://oui.doleta.gov/unemploy/csv/ar539.csv'
#https://oui.doleta.gov/dmstree/handbooks/402/402_4/4024c6/4024c6ar539.pdf
#https://oui.doleta.gov/dmstree/handbooks/401/i_1.pdf
raw = pd.read_csv(url,low_memory=False)

In [None]:
raw.rename(columns={'st':'state',
'rptdate':'date',
'c1':'week',
'c2':'week_ending',
'c3':'initial_claims',
'c4':'fic',
'c5':'xic',
'c6':'wsic',
'c7':'wseic',
'c8':'continued_claims',
'c9':'fcw',
'c10':'xcw',
'c11':'wscw',
'c12':'wsecw',
'c13':'ebt',
'c14':'ebui',
'c15':'abt',
'c16':'abui',
'c17':'at',
'c18':'covered_unemployment',
'c19':'rate_insured_unemployment',
'c20':'ar',
'c21':'p',
'c22':'status',
'c23':'status_change_date'
}, inplace=True)

In [None]:
raw['date'] = pd.to_datetime(raw['date'])

In [None]:
rawca = raw[raw['state'] == 'CA'].reset_index()

### New unemployment claims

In [None]:
ca_claims_chart = alt.Chart((rawca.query("date > '01/01/1990'"))).mark_bar(size=1)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('initial_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''))
).properties(width=800, height=200,
     title='New unemployment claims in California, by week'
 )

ca_claims_chart_text = (
    alt.Chart((rawca.query("date == date.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("initial_claims:Q"), text=alt.Text("initial_claims:Q",format=','))
)

(ca_claims_chart + ca_claims_chart_text).configure_view(strokeOpacity=0)

In [None]:
ca_claims_chart = alt.Chart((rawca.query("date > '01/01/2019'"))).mark_bar(size=10)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y')),
    y=alt.Y('initial_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''))
).properties(width=800, height=200,
     title='New unemployment claims in California, by week'
 )

ca_claims_chart_text = (
    alt.Chart((rawca.query("initial_claims == initial_claims.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("initial_claims:Q"), text=alt.Text("initial_claims:Q",format=','))
)

(ca_claims_chart + ca_claims_chart_text).configure_view(strokeOpacity=0)

In [None]:
continued_claims_chart = alt.Chart((rawca.query("date > '01/01/1990'"))).mark_bar(size=1)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('continued_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=5, format=''))
).properties(width=800, height=400,
     title='Continued unemployment claims in California, by week'
 )

continued_claims_chart_text = (
    alt.Chart((rawca.query("continued_claims == continued_claims.max()")))\
    .mark_text(dy=-14, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("continued_claims:Q"), text=alt.Text("continued_claims:Q",format=','))
)

(continued_claims_chart + continued_claims_chart_text).configure_view(strokeOpacity=0)

In [None]:
continued_claims_chart = alt.Chart((rawca.query("date > '01/01/2019'"))).mark_bar(size=10)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('continued_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=5, format=''))
).properties(width=800, height=400,
     title='Continued unemployment claims in California, by week'
 )

continued_claims_chart_text = (
    alt.Chart((rawca.query("continued_claims == continued_claims.max()")))\
    .mark_text(dy=-14, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("continued_claims:Q"), text=alt.Text("continued_claims:Q",format=','))
)

(continued_claims_chart + continued_claims_chart_text).configure_view(strokeOpacity=0)

In [None]:
rawca.sort_values(by='date', ascending=False).head()

---

### Characteristics of the unemployment insurance claimants

In [None]:
#https://oui.doleta.gov/unemploy/chariu.asp
claimants_url = 'https://oui.doleta.gov/unemploy/csv/ar203.csv'
claimants = pd.read_csv(claimants_url)

In [None]:
claimants.rename(columns={'st':'state',
'rptdate':'date',
'c1':'population',
'c2':'male',
'c3':'female',
'c4':'mf_ina',
'c12':'<22',
'c13':'22-24',
'c14':'25-34',
'c15':'35-44',
'c16':'45-54',
'c17':'55-59',
'c18':'60-64',
'c19':'>=65',
'c20':'age_ina',
'c40':'hisp',
'c41':'nothisp',
'c42':'hisp_ina',
'c43':'ai_an',
'c44':'asian',
'c45':'black',
'c46':'nh_pi',
'c47':'white',
'c48':'other',
'c49':'Ag/Forestry/Fishing/Hunting',
'c50':'Mining',
'c51':'Utilities',
'c52':'Construction',
'c53':'Manufacturing',
'c54':'Wholesale Trade',
'c55':'Retail Trade',
'c56':'Transportation & Warehouse',
'c57':'Information',
'c58':'Real Estate Rental & Leasing',
'c59':'Professional/Scientific/ Tech.Services',             
'c60':'Management of Companies & Enterprises',
'c61':'Admin.&Support/Waste Mgmt./Remedia. Serv.',
'c62':'Other Services',
'c63':'Educational Services',
'c64':'Health Care & Social Assistance',
'c65':'Arts, Entertainment & Recreation',
'c66':'Accommodation and Food Services',
'c67':'Other',
'c68':'Public Administration',
'c69':'naics_ina',
'c70':'Management',
'c71':'Business & Financial Ops.',
'c72':'Computer & Math',
'c73':'Architecture & Engineering',
'c74':'Life, Physical & Social Sciences',
'c75':'Community & Social Services',
'c76':'Legal',
'c77':'Education, Training & Library',
'c78':'Arts, Design, Entertainment Sports & Media',
'c79':'Healthcare Practitioner & Technical',
'c80':'Healthcare Support',
'c81':'Protective Services ',
'c82':'Food Prep. & Serving Related',
'c83':'Build. & Grounds Cleaning & Maintenance',
'c84':'Personal Care & Services',
'c85':'Sales & Related',
'c86':'Office & Admin. Support',
'c87':'Farming, Fishing & Forestry',
'c88':'Construction & Extraction',
'c89':'Installation, Maintenance & Repair',
'c90':'Production',
'c91':'Transportation & Material Moving',
'c92':'Military Specific',
'c93':'sector_ina',
}, inplace=True)

In [None]:
claimants.columns = claimants.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('<', '')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_').str.replace('>=','')

In [None]:
claimants['allpop'] = claimants.apply(lambda x: x['male'] + x['female'] + x['mf_ina'], axis=1)

In [None]:
claimants['date'] = pd.to_datetime(claimants['date'])

In [None]:
claimants['under35'] = claimants.apply(lambda x: x['22'] + x['22_24'] + x['25_34'], axis=1)

In [None]:
claimants.head()

In [None]:
ca_claimants = claimants[claimants['state'] == 'CA']

In [None]:
ca_claimants.tail()

### What share of CA claimants are older?

In [None]:
ca_claimants_melt = pd.melt(ca_claimants, id_vars=['date'], \
                            value_vars=['22','22_24','25_34','35_44','45_54','55_59','60_64','65'],
        var_name='age', value_name='count')

In [None]:
ca_claimants_melt_select = ca_claimants_melt[((ca_claimants_melt['date'] > '1999-12-31') \
                                             & (ca_claimants_melt['date']< '2001-01-31')) |\
                                            (ca_claimants_melt['date'] > '2018-12-31') \
                                             & (ca_claimants_melt['date']< '2020-01-31')]

In [None]:
ca_claim_year_mean = ca_claimants_melt.groupby(['age', (pd.DatetimeIndex(ca_claimants_melt['date']).year)])\
    .agg('mean').reset_index()

In [None]:
alt.Chart(ca_claimants_melt.query("date > '1/31/2000'")).mark_bar(size=3).encode(
    x=alt.X('count:Q', title=' ', stack="normalize", axis=alt.Axis(tickCount=4,format='%',offset=1, grid=False)),
    y=alt.Y('date:T', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='%Y',\
                                               tickCount=8,offset=3, gridWidth=.6, gridColor='#dddddd',)),
    color=alt.Color('age', title=' ', scale=alt.Scale(scheme='tableau20')),
    order=alt.Order(
      'age',
      sort='ascending'
    )
).configure_view(strokeOpacity=0).properties(width=700, height=600,
    title='Share of unemployment benefits recipients in California, by age'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=12)

In [None]:
ca_claimants

In [None]:
ca_claim_year_mean['date'] = ca_claim_year_mean['date'].astype(str)

In [None]:
alt.Chart(ca_claim_year_mean).mark_bar(size=20).encode(
    x=alt.X('count:Q', title=' ', stack="normalize", axis=alt.Axis(tickCount=4,format='%',offset=1, grid=False)),
    y=alt.Y('date:O', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='',\
                                               tickCount=8,offset=3, gridWidth=.6, gridColor='#dddddd',)),
    color=alt.Color('age', title=' ', scale=alt.Scale(scheme='tableau20')),
    order=alt.Order(
      'age',
      sort='ascending'
    )
).configure_view(strokeOpacity=0).properties(width=700, height=500,
    title='Share of unemployment benefits recipients in California, by age'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=10)

---

### Imports data from U.S. Trade Online

#### All state-level imports

In [None]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132784
caimports = pd.read_csv('input/trade/CA Imports by HS Commodities.csv', skiprows=4)

In [None]:
caimports = caimports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'State':'state', \
                     'Total Value ($US)':'value'})

In [None]:
caimports.head(30)

#### Imports to CA ports

In [None]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132782
ca_port_imports = pd.read_csv('input/trade/CA port imports.csv', skiprows=4)

In [None]:
ca_port_imports = ca_port_imports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'Port':'port', \
                     'Customs Value (Gen) ($US)':'value'})

In [None]:
ca_port_imports.value = ca_port_imports.value.str.replace(',', '', regex=False)

In [None]:
ca_port_imports['port'] = ca_port_imports['port'].str.replace(', CA (District)', '', regex=False)

In [None]:
ca_port_imports['date'] = pd.to_datetime(ca_port_imports['month'])

In [None]:
ca_port_imports['year'] = ca_port_imports['date'].dt.year

In [None]:
ca_port_imports['value'] = ca_port_imports['value'].astype(int)

In [None]:
ca_port_imports_group = ca_port_imports.groupby(['year']).agg('sum').reset_index()

In [None]:
ca_port_imports_pivot = ca_port_imports.pivot(index='date', columns='port', values='value').reset_index()

In [None]:
ca_port_imports_pivot.tail(20)

In [None]:
ca_port_imports_pivot['total'] =\
    ca_port_imports_pivot.apply(lambda x: x['Los Angeles'] + x['San Diego'] + x['San Francisco'], axis=1)

In [None]:
ca_port_imports_pivot

In [None]:
ca_port_imports_group

In [None]:
alt.Chart(ca_port_imports_group).mark_bar().encode(
    x=alt.X('year:N', title=' ', axis=alt.Axis(format='')),
    y=alt.Y('value:Q', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='',\
                                               tickCount=5,offset=3, gridWidth=.6, gridColor='#dddddd'))
).configure_view(strokeOpacity=0).properties(width=320, height=400,
    title='Imports to California ports, January-March, by year'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=12)

### Exports data from U.S. Trade Online

#### All state-level exports

In [None]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132783
caexports = pd.read_csv('input/trade/CA Exports by HS Commodities.csv', skiprows=4)

In [None]:
caexports = caexports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'State':'state', \
                     'Total Value ($US)':'value'})

In [None]:
caexports.head(30)

#### Exports from CA ports

In [None]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132781
ca_port_exports = pd.read_csv('input/trade/CA port exports.csv', skiprows=4)

In [None]:
ca_port_exports = ca_port_exports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'Port':'port', \
                     'Total Exports Value ($US)':'value'})

In [None]:
ca_port_exports.value = ca_port_exports.value.str.replace(',', '', regex=False)

In [None]:
ca_port_exports['port'] = ca_port_exports['port'].str.replace(', CA (District)', '', regex=False)

In [None]:
ca_port_exports['date'] = pd.to_datetime(ca_port_exports['month'])

In [None]:
ca_port_exports_pivot = ca_port_exports.pivot(index='date', columns='port', values='value').reset_index()

In [None]:
ca_port_exports_pivot.tail(20)

In [None]:
ca_port_exports.tail(20)

In [None]:
# alt.Chart(ca_port_exports).mark_bar().encode(
#     x=alt.X('month:N'),
#     y=alt.Y('value:Q'),
#     color='port:N'
# )

---

### Labor force participation rate

In [None]:
# https://data.edd.ca.gov/Labor-Force-and-Unemployment-Rates/Labor-Force-Participation-Rate-US-and-California/ww59-3giz

In [None]:
labor_part_rate = pd.read_json('https://data.edd.ca.gov/resource/ww59-3giz.json')

In [None]:
labor_part_rate.columns = labor_part_rate.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [None]:
labor_part_rate.rename(columns={'california_labor_force_participation':'California', \
                                'us_labor_force_participation':'United States'}, inplace=True)

In [None]:
labor_part_rate_melt = pd.melt(labor_part_rate, id_vars=['date'], \
                               value_vars=['California', 'United States'], var_name='place', value_name='rate')

In [None]:
ca_claims_chart = alt.Chart((labor_part_rate_melt.query("date > '01/01/1976'")))\
.mark_line(size=2)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
            gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
            scale=alt.Scale(domain=(.6, .7))),
    color=alt.Color('place', title=' ', scale=alt.Scale(
            domain=['California', 'United States'],
            range=['#B32F2E', '#ccc'])),
).properties(width=800, height=200,
     title='Labor force participation rate, by month'
 )

(ca_claims_chart).configure_view(strokeOpacity=0).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [None]:
ca_claims_chart_zoom = alt.Chart((labor_part_rate_melt.query("date > '01/01/2010'")))\
    .mark_line(size=3)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(.6, .65))),
     color=alt.Color('place', title=' ', scale=alt.Scale(
            domain=['California', 'United States'],
            range=['#B32F2E', '#ccc'])),
).properties(width=800, height=200,
     title='Labor force participation rate, by month'
 )

(ca_claims_chart_zoom).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [None]:
### Current employment statistics

In [None]:
#https://data.edd.ca.gov/Industry-Information-/Current-Employment-Statistics-CES-/r4zm-kdcg

In [None]:
ces_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', ces_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("r4zm-kdcg", limit=1600000)
ces = pd.DataFrame.from_records(results)

In [None]:
ces.columns = ces.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [None]:
ces['date'] = pd.to_datetime(ces['date'])
ces['current_employment'] = ces['current_employment'].astype(int)

In [None]:
ces_state = ces[(ces['seasonally_adjusted'] == 'Y') &\
               (ces['area_type'] == 'State')]

In [None]:
ces_sa = ces[(ces['seasonally_adjusted'] == 'Y')]

In [None]:
ces_state.head()

In [None]:
ces_retail = pd.DataFrame(ces[(ces['industry_title'] == 'Retail Trade') &\
               (ces['seasonally_adjusted'] == 'Y')])

In [None]:
ces_totalnonfarm_la_metro = pd.DataFrame(ces[(ces['industry_title'] == 'Total Nonfarm') &\
               (ces['seasonally_adjusted'] == 'Y') &\
                    (ces['area_name'] == 'Los Angeles-Long Beach-Glendale MD')])

In [None]:
ca_claims_chart_zoom = alt.Chart((ces_totalnonfarm_la_metro.query("date > '2013-01-01'")))\
    .mark_line(size=4)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(offset=20, grid=False, tickCount=8, format='%b. %-d, %Y')),
    y=alt.Y('current_employment:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=3, format=''),\
           scale=alt.Scale(domain=(4000000, 4700000)))
).properties(width=800, height=200,
     title='Total employment in California, Los Angeles metro area'
 )

(ca_claims_chart_zoom).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [None]:
ces.area_name.value_counts().head()

In [None]:
ces.industry_title.value_counts().head()