# LACoFD EMS vaccination data

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import altair as alt

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Read the raw data

In [4]:
src = pd.read_csv(
    "input/lacofd_vax/employees_list.csv",
    dtype={"Employee Number": str},
    parse_dates=["Birth Date", "First Dose Date"],
)

In [5]:
src.columns = src.columns.str.lower().str.replace(" ", "", regex=False)

In [6]:
src["lotnumber"] = src["lotnumber"].str.upper()

In [7]:
today = pd.to_datetime("today")

In [8]:
src["age"] = src["birthdate"].apply(
    lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day))
)

### Dates

In [9]:
src["firstdose_year"] = src["firstdosedate"].dt.year
src["firstdose_quarter"] = src["firstdosedate"].dt.quarter
src["firstdose_month"] = src["firstdosedate"].dt.month
src["firstdose_weekday"] = src["firstdosedate"].dt.weekday
src["firstdose_monthname"] = src["firstdosedate"].dt.month_name()
src["firstdose_month_year_full"] = src["firstdosedate"].apply(
    lambda x: x.strftime("%B-%Y")
)
src["firstdose_month_year_full"] = pd.to_datetime(src["firstdose_month_year_full"])

In [10]:
len(src.employeenumber.unique())

4975

### Filter the dataframe for the most recent entry by employee (some declined initially but have since got the jab) and make a copy

In [11]:
df = (
    src.sort_values("firstdosedate")
    .drop_duplicates("employeenumber", keep="last")
    .copy()
)

---

### Declined boolean

In [12]:
df["declined"] = df["lotnumber"] == "DECLINED"

### What percentage declined? 

In [13]:
df.declined.value_counts("normalize").round(2)

False    0.75
True     0.25
Name: declined, dtype: float64

### Count and mean age of "declined" employees? 

In [14]:
len(df[df["declined"] == True])

1224

In [15]:
df[df["declined"] == True]["age"].mean().round()

40.0

### Those who didn't "decline"? 

In [16]:
len(df[df["declined"] == False])

3751

In [17]:
df[df["declined"] == False]["age"].mean().round()

45.0

### Among those who didn't 'decline' the first shot, when was their first-dose month? 

In [18]:
df[df["declined"] == False]["firstdose_monthname"].value_counts()

December    2637
January      765
February     246
March         60
April         33
May            9
June           1
Name: firstdose_monthname, dtype: int64

In [19]:
df[df["declined"] == False]["firstdose_monthname"].value_counts("normalize").round(2)

December    0.70
January     0.20
February    0.07
March       0.02
April       0.01
May         0.00
June        0.00
Name: firstdose_monthname, dtype: float64

---

In [20]:
def get_num_people_by_age_category(df):
    df["age_group"] = pd.cut(
        x=df["age"],
        bins=[0, 30, 40, 50, 60, 70, 86],
        labels=["< 30", "30-40", "40-50", "50-60", "60-70", "> 70"],
    )
    return df


df = get_num_people_by_age_category(df)

In [21]:
age_bars = (
    df[~df["age_group"].isnull()]
    .groupby(["age_group", "declined"])
    .size()
    .reset_index(name="count")
)

In [22]:
age_bars_pivot = age_bars.pivot_table(
    columns="declined", index="age_group"
).reset_index()

In [23]:
age_bars_pivot

Unnamed: 0_level_0,age_group,count,count
declined,Unnamed: 1_level_1,False,True
0,< 30,446,207
1,30-40,946,464
2,40-50,960,287
3,50-60,1047,217
4,60-70,229,25
5,> 70,41,1


In [28]:
alt.Chart(age_bars).mark_bar().encode(
    x=alt.X("count", axis=alt.Axis(format="%", tickCount=4), stack="normalize"),
    y=alt.Y("age_group", sort=["< 30", "30-40", "40-50", "50-60", "60-70", "> 70"]),
    color="declined",
).properties(height=230, width=600)

In [25]:
alt.Chart(df[df["declined"] == False]).mark_bar().encode(
    alt.X("age:Q", bin=alt.Bin(maxbins=20)),
    y="count()",
)

In [26]:
alt.Chart(df[df["declined"] == True]).mark_bar().encode(
    alt.X("age:Q", bin=alt.Bin(maxbins=20)),
    y="count()",
)

### Exports

In [27]:
age_bars.to_csv("output/lacofd_age_vax_choices.csv", index=False)