# CA economic indicators data

In [1]:
import pandas as pd
import zipfile
from urllib.request import urlopen 
import pyarrow
import os
import cpi
import glob
import requests
import matplotlib
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,.0f}'.format

## Labor market data

---

### Unemployment claims

In [2]:
formaturl = lambda x: 'https://oui.doleta.gov/unemploy/page8/' + f'{x[1]}/' + f'{x[0]}' + '.html'

In [3]:
metadata = []
for d in pd.date_range(start='01/01/2020', end='3/31/2020', freq='W-SAT'):
    metadata.append(dict(date = d.strftime('%m%d%y'), display_date = d.strftime('%Y-%m-%d'), year = d.strftime('%Y'), \
                         url = formaturl((d.strftime('%m%d%y'), d.strftime('%Y')))))

In [4]:
df_list = []

for m in metadata:
    df_list.append((pd.read_html(m['url'],header=1,skiprows=2)[0])\
                  .assign(date=m['display_date']))
    
df = pd.concat(df_list, sort=False)

In [5]:
df.columns

Index(['STATENAME', 'STATE', 'LAST WEEK', 'YEAR AGO', 'UCFE(1)', 'UCX(1)',
       '&nbsp&nbsp', 'STATE.1', '(%)(2)', 'LAST WEEK.1', 'YEAR AGO.1',
       'UCFE(1).1', 'UCX(1).1', 'ALL PROGRAMS EXCLUDING RAILROAD RETIREMENT',
       'Unnamed: 14', 'date'],
      dtype='object')

In [6]:
df = df[['STATENAME','STATE','ALL PROGRAMS EXCLUDING RAILROAD RETIREMENT', 'date']]
df.rename(columns={"STATENAME": "state", "STATE": "new_claims", \
                   "ALL PROGRAMS EXCLUDING RAILROAD RETIREMENT": "all_claims"}, inplace=True)

In [7]:
df[df['date'] == '2020-03-07']

Unnamed: 0,state,new_claims,all_claims,date
0,Alabama,2160,15918,2020-03-07
1,Alaska,815,8434,2020-03-07
2,Arizona,3357,17827,2020-03-07
3,Arkansas,1843,11700,2020-03-07
4,California,43385,382363,2020-03-07
5,Colorado,1865,23355,2020-03-07
6,Connecticut,2527,40750,2020-03-07
7,Delaware,518,5508,2020-03-07
8,District of Columbia,471,6985,2020-03-07
9,Florida,5325,32489,2020-03-07


---

In [8]:
src = pd.read_html('https://oui.doleta.gov/unemploy/page8/2020/041820.html',  header=0, skiprows=3)

In [10]:
raw = pd.read_csv('input/ar5159.csv',low_memory=False)

In [11]:
rawca = raw[raw['st'] == 'CA']

In [12]:
rawca.tail()

Unnamed: 0,st,rptdate,c1,c2,c3,c4,c5,c6,c7,c8,...,c55,c56,c57,c58,c94,c95,c96,c97,c98,c99
2950,CA,11/30/2019,178216,95667,80840,1709,4532,3421,1122,409,...,184,29152,170,130,0,0,0,1,0,0
2951,CA,12/31/2019,198750,100721,96428,1601,6375,3103,967,385,...,206,37461,243,176,0,0,0,0,0,0
2952,CA,1/31/2020,222158,148123,72559,1476,11064,3580,1038,728,...,191,34854,275,155,0,0,0,0,0,0
2953,CA,2/29/2020,161349,93946,66248,1155,4405,2740,573,294,...,183,30937,173,156,0,0,0,0,0,0
2954,CA,3/31/2020,1654010,1514993,135352,3665,3576,13904,638,277,...,229,35608,146,146,0,0,0,0,0,0


In [13]:
src = pd.read_html('https://oui.doleta.gov/unemploy/page8/2020/041820.html',  header=0, skiprows=3)

In [14]:
claims = pd.DataFrame(src[0].iloc[:-1, :].copy())

In [16]:
claims.columns = claims.columns.str.strip().str.lower()\
    .str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [17]:
claims.columns

Index(['statename', 'state', 'last_week', 'year_ago', 'ucfe1', 'ucx1',
       '&nbsp&nbsp', 'state.1', '%2', 'last_week.1', 'year_ago.1', 'ucfe1.1',
       'ucx1.1', 'all_programs_excluding_railroad_retirement', 'unnamed:_14'],
      dtype='object')

In [18]:
ca_claims = claims[claims['statename'] == 'California']

In [19]:
ca_claims.head()

Unnamed: 0,statename,state,last_week,year_ago,ucfe1,ucx1,&nbsp&nbsp,state.1,%2,last_week.1,year_ago.1,ucfe1.1,ucx1.1,all_programs_excluding_railroad_retirement,unnamed:_14
4,California,528360,-127112,484460,267,245,&nbsp &nbsp &nbsp &nbsp &nbsp &nbsp,2422458,14,992504,2060696,2164,1164,2425786,


### Characteristics of the unemployment insurance claimants

In [None]:
#https://oui.doleta.gov/unemploy/chariu.asp
claimants = pd.read_csv('input/ar203.csv')

In [None]:
claimants.rename(columns={'st':'state',
'rptdate':'date',
'c1':'population',
'c2':'male',
'c3':'female',
'c4':'INA',
'c12':'<22',
'c13':'22-24',
'c14':'25-34',
'c15':'35-44',
'c16':'45-54',
'c17':'55-59',
'c18':'60-64',
'c19':'>=65',
'c20':'INA',
'c40':'hisp',
'c41':'nothisp',
'c42':'ina',
'c43':'ai_an',
'c44':'asian',
'c45':'black',
'c46':'nh_pi',
'c47':'white',
'c48':'other',
'c49':'Ag/Forestry/Fishing/Hunting',
'c50':'Mining',
'c51':'Utilities',
'c52':'Construction',
'c53':'Manufacturing',
'c54':'Wholesale Trade',
'c55':'Retail Trade',
'c56':'Transportation & Warehouse',
'c57':'Information',
'c58':'Real Estate Rental & Leasing',
'c59':'Professional/Scientific/ Tech.Services',             
'c60':'Management of Companies & Enterprises',
'c61':'Admin.&Support/Waste Mgmt./Remedia. Serv.',
'c62':'Other Services',
'c63':'Educational Services',
'c64':'Health Care & Social Assistance',
'c65':'Arts, Entertainment & Recreation',
'c66':'Accommodation and Food Services',
'c67':'Other',
'c68':'Public Administration',
'c69':'INA',
'c70':'Management',
'c71':'Business & Financial Ops.',
'c72':'Computer & Math',
'c73':'Architecture & Engineering',
'c74':'Life, Physical & Social Sciences',
'c75':'Community & Social Services',
'c76':'Legal',
'c77':'Education, Training & Library',
'c78':'Arts, Design, Entertainment Sports & Media',
'c79':'Healthcare Practitioner & Technical',
'c80':'Healthcare Support',
'c81':'Protective Services ',
'c82':'Food Prep. & Serving Related',
'c83':'Build. & Grounds Cleaning & Maintenance',
'c84':'Personal Care & Services',
'c85':'Sales & Related',
'c86':'Office & Admin. Support',
'c87':'Farming, Fishing & Forestry',
'c88':'Construction & Extraction',
'c89':'Installation, Maintenance & Repair',
'c90':'Production',
'c91':'Transportation & Material Moving',
'c92':'Military Specific',
'c93':'INA',
}, inplace=True)

In [None]:
claimants.columns = claimants.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [None]:
ca_claimants = claimants[claimants['state'] == 'CA']

In [None]:
ca_claimants.tail()

## GDP

## Trade 