# getting retail data from BLS (QCEW)
data [layout](https://www.bls.gov/cew/about-data/downloadable-file-layouts/quarterly/naics-based-quarterly-layout.htm)

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import numpy as np
import altair as alt
import requests
import json
from bs4 import BeautifulSoup
import os

os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [4]:
from datawrapper import Datawrapper

In [5]:
dw_token = os.environ.get("dw_api")
dw = Datawrapper(access_token=dw_token)

In [6]:
month = pd.Timestamp("today").strftime("%Y-%m")

In [7]:
bls_api = os.environ.get("BLS_Key")

---

## Read data

#### Import table from the bureau's API
this is how I originally got the data--it's really slow to read in from the api so i saved it to aws and read in from there

In [8]:
df = pd.read_csv("https://ix.cnn.io/data/bls-retail/qcew_retail_trade.csv")

In [9]:
# fips codes for relevant counties
fips = [
    "06075",  # san francisco
    "06001",  # oakland/alameda county
    "06013",  # pittsburg/contra costa
    "36061",  # new york
    "41051",  # portland/multnomah county
    "53033",  # seattle/king county, wa
    "US000",  # NATIONAL
]

In [10]:
# narrowing down to the relevant counties and only priately owned retailers
df_slim = df[(df["area_fips"].isin(fips)) & (df["own_code"] == 5)].reset_index(
    drop=True
)

In [11]:
df_slim["q_year"] = df_slim["year"].astype(str) + "-Q" + df_slim["qtr"].astype(str)

In [12]:
df_slim["quarter_date"] = pd.PeriodIndex(df_slim["q_year"], freq="Q").to_timestamp()

In [13]:
# pct_change from q1 2019 will be more interesting
alt.Chart(df_slim[df_slim["area_fips"] != "US000"]).mark_line().encode(
    x=alt.X("quarter_date"), y=alt.Y("qtrly_estabs"), color="area_fips"
)

In [15]:
# doesn't seem like we are gonna get quarterly establishments nationwide--too few?
# number of establishments has grown, number of employees has not
df[(df["area_fips"] == "US000") & (df["own_code"] == 5)]

Unnamed: 0,area_fips,own_code,industry_code,agglvl_code,size_code,year,qtr,disclosure_code,qtrly_estabs,month1_emplvl,month2_emplvl,month3_emplvl,total_qtrly_wages,taxable_qtrly_wages,qtrly_contributions,avg_wkly_wage,lq_disclosure_code,lq_qtrly_estabs,lq_month1_emplvl,lq_month2_emplvl,lq_month3_emplvl,lq_total_qtrly_wages,lq_taxable_qtrly_wages,lq_qtrly_contributions,lq_avg_wkly_wage,oty_disclosure_code,oty_qtrly_estabs_chg,oty_qtrly_estabs_pct_chg,oty_month1_emplvl_chg,oty_month1_emplvl_pct_chg,oty_month2_emplvl_chg,oty_month2_emplvl_pct_chg,oty_month3_emplvl_chg,oty_month3_emplvl_pct_chg,oty_total_qtrly_wages_chg,oty_total_qtrly_wages_pct_chg,oty_taxable_qtrly_wages_chg,oty_taxable_qtrly_wages_pct_chg,oty_qtrly_contributions_chg,oty_qtrly_contributions_pct_chg,oty_avg_wkly_wage_chg,oty_avg_wkly_wage_pct_chg
4480,US000,5,44-45,14,0,2019,1,,1040988,15630253,15396927,15400404,127951801731,92848421903,1337255565,636,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,-1572,-0.2,-114433,-0.7,-168875,-1.1,-178209,-1.1,2656676476,2.1,1507523439,1.7,-77655537,-5.5,19,3.1
8965,US000,5,44-45,14,0,2020,1,,1042124,15464678,15306576,15257954,132159234619,95015758823,1339816675,663,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1136,0.1,-165575,-1.1,-90351,-0.6,-142450,-0.9,4207432888,3.3,2167336920,2.3,2561110,0.2,27,4.2
13455,US000,5,44-45,14,0,2021,1,,1043216,15132466,15013474,15065463,136317268042,94509237508,1419401435,696,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1092,0.1,-332212,-2.1,-293102,-1.9,-192491,-1.3,4158033423,3.1,-506521315,-0.5,79584760,5.9,33,5.0
17946,US000,5,44-45,14,0,2022,1,,1049355,15293665,15326857,15316443,151651896851,101972277168,1421911749,762,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,6139,0.6,161199,1.1,313383,2.1,250980,1.7,15334628809,11.2,7463039660,7.9,2510314,0.2,66,9.5
22427,US000,5,44-45,14,0,2019,2,,1043323,15425203,15505375,15556564,129699119593,47298847673,585004703,644,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,373,0.0,-186128,-1.2,-234135,-1.5,-228089,-1.4,3348883624,2.7,232166127,0.5,-45201796,-7.2,25,4.0
26912,US000,5,44-45,14,0,2020,2,,1037726,12807741,13380618,14306824,123106200520,43825759013,463606692,702,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,-5597,-0.5,-2617462,-17.0,-2124757,-13.7,-1249740,-8.0,-6592919073,-5.1,-3473088660,-7.3,-121398011,-20.8,58,9.0
31402,US000,5,44-45,14,0,2021,2,,1047229,15062074,15214416,15299282,150372221616,50729826902,670944756,761,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,9503,0.9,2254333,17.6,1833798,13.7,992458,6.9,27266021096,22.1,6904067889,15.8,207338064,44.7,59,8.4
35893,US000,5,44-45,14,0,2022,2,,1055217,15377855,15391274,15419466,152143512623,49438393684,620922403,760,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7988,0.8,315781,2.1,176858,1.2,120184,0.8,1771291007,1.2,-1291433218,-2.5,-50022353,-7.5,-1,-0.1
40374,US000,5,44-45,14,0,2019,3,,1043974,15556752,15542465,15409511,129037309072,31633342878,385883124,640,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,-702,-0.1,-231863,-1.5,-238232,-1.5,-228519,-1.5,3255902722,2.6,263559493,0.8,-24303100,-5.9,25,4.1
44859,US000,5,44-45,14,0,2020,3,,1043148,14650582,14778923,14815310,133504267981,32036198671,353919950,696,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,-826,-0.1,-906170,-5.8,-763542,-4.9,-594201,-3.9,4466958909,3.5,402855793,1.3,-31963174,-8.3,56,8.8


In [16]:
df_slim["avg_monthly_employees"] = (
    df_slim["month1_emplvl"] + df_slim["month2_emplvl"] + df_slim["month3_emplvl"]
) / 3

In [17]:
df_slim["employees_per_estab"] = (
    df_slim["avg_monthly_employees"] / df_slim["qtrly_estabs"]
)

In [18]:
alt.Chart(df_slim[df_slim["area_fips"] == "US000"]).mark_line().encode(
    x=alt.X("quarter_date"), y=alt.Y("employees_per_estab")
)

### Need to process the data so i have a month in each cell to look at employees

In [19]:
q1 = df_slim[df_slim["qtr"] == 1]
q2 = df_slim[df_slim["qtr"] == 2]
q3 = df_slim[df_slim["qtr"] == 3]
q4 = df_slim[df_slim["qtr"] == 4]

In [20]:
q1_emp = q1[
    [
        "area_fips",
        "year",
        "qtr",
        "quarter_date",
        "month1_emplvl",
        "month2_emplvl",
        "month3_emplvl",
    ]
].rename(
    columns={
        "month1_emplvl": "January",
        "month2_emplvl": "February",
        "month3_emplvl": "March",
    }
)

q2_emp = q2[
    [
        "area_fips",
        "year",
        "qtr",
        "quarter_date",
        "month1_emplvl",
        "month2_emplvl",
        "month3_emplvl",
    ]
].rename(
    columns={
        "month1_emplvl": "April",
        "month2_emplvl": "May",
        "month3_emplvl": "June",
    }
)

q3_emp = q3[
    [
        "area_fips",
        "year",
        "qtr",
        "quarter_date",
        "month1_emplvl",
        "month2_emplvl",
        "month3_emplvl",
    ]
].rename(
    columns={
        "month1_emplvl": "July",
        "month2_emplvl": "August",
        "month3_emplvl": "September",
    }
)

q4_emp = q4[
    [
        "area_fips",
        "year",
        "qtr",
        "quarter_date",
        "month1_emplvl",
        "month2_emplvl",
        "month3_emplvl",
    ]
].rename(
    columns={
        "month1_emplvl": "October",
        "month2_emplvl": "November",
        "month3_emplvl": "December",
    }
)

In [21]:
q1_emp_long = q1_emp.melt(
    id_vars=["area_fips", "year", "qtr", "quarter_date"],
    value_vars=["January", "February", "March"],
)

q2_emp_long = q2_emp.melt(
    id_vars=["area_fips", "year", "qtr", "quarter_date"],
    value_vars=["April", "May", "June"],
)
q3_emp_long = q3_emp.melt(
    id_vars=["area_fips", "year", "qtr", "quarter_date"],
    value_vars=["July", "August", "September"],
)
q4_emp_long = q4_emp.melt(
    id_vars=["area_fips", "year", "qtr", "quarter_date"],
    value_vars=["October", "November", "December"],
)

In [22]:
monthly_employees = pd.concat(
    [q1_emp_long, q2_emp_long, q3_emp_long, q4_emp_long]
).reset_index(drop=True)

In [23]:
monthly_employees["date"] = pd.to_datetime(
    monthly_employees["variable"] + " " + monthly_employees["year"].astype(str)
)

In [24]:
alt.Chart(
    monthly_employees[monthly_employees["area_fips"] != "US000"]
).mark_line().encode(x=alt.X("date"), y=alt.Y("value"), color=alt.Color("area_fips"))

In [25]:
dw_employees = (
    monthly_employees[monthly_employees["area_fips"] != "US000"]
    .pivot(columns="area_fips", index="date", values="value")
    .reset_index()
)

In [26]:
dw.add_data(data=dw_employees, chart_id="EJRs4")

<Response [204]>

### doing some comparisons of q1 2019 vs. 2023 for dot plots

In [33]:
comparison_2019_23 = df_slim[
    (df_slim["quarter_date"] == "2019-01-01")
    | (df_slim["quarter_date"] == "2023-01-01")
][
    [
        "area_fips",
        "q_year",
        "quarter_date",
        "qtrly_estabs",
        "avg_monthly_employees",
        "employees_per_estab",
    ]
]

In [44]:
employee_chg = comparison_2019_23.pivot(
    columns="q_year", values="avg_monthly_employees", index="area_fips"
).reset_index()

In [39]:
dw.add_data(
    data=comparison_2019_23.pivot(
        columns="q_year", values="employees_per_estab", index="area_fips"
    ).reset_index(),
    chart_id="EMpPm",
)

<Response [201]>

In [40]:
estab_chg = comparison_2019_23.pivot(
    columns="q_year", values="qtrly_estabs", index="area_fips"
).reset_index()

In [42]:
estab_chg["pct_change"] = (estab_chg["2023-Q1"] - estab_chg["2019-Q1"]) / estab_chg[
    "2019-Q1"
]

In [45]:
dw.add_data(data=estab_chg, chart_id="YK4ES")

<Response [201]>

In [49]:
monthly_employees[monthly_employees["area_fips"] == "53033"].sort_values("date")

Unnamed: 0,area_fips,year,qtr,quarter_date,variable,value,date
5,53033,2019,1,2019-01-01,January,157978,2019-01-01
40,53033,2019,1,2019-01-01,February,154992,2019-02-01
75,53033,2019,1,2019-01-01,March,155586,2019-03-01
110,53033,2019,2,2019-04-01,April,155419,2019-04-01
138,53033,2019,2,2019-04-01,May,157884,2019-05-01
166,53033,2019,2,2019-04-01,June,160157,2019-06-01
194,53033,2019,3,2019-07-01,July,163184,2019-07-01
222,53033,2019,3,2019-07-01,August,163790,2019-08-01
250,53033,2019,3,2019-07-01,September,162188,2019-09-01
278,53033,2019,4,2019-10-01,October,162492,2019-10-01
