# COVID-19 at CA prisons

### Import Python tools and Jupyter configuration

In [2]:
%load_ext lab_black

In [3]:
import pandas as pd
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import altair as alt

In [25]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Read the raw data from a URL

In [43]:
url = "https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdcr-prison-totals.csv"

In [44]:
df = pd.read_csv(url)

In [45]:
src.columns = (
    src.columns.str.lower()
    .str.replace(":", "", regex=False)
    .str.replace(" ", "_", regex=False)
    .str.replace("#", "_no", regex=False)
)

In [46]:
src.dtypes

date                    object
code                    object
name                    object
city                    object
county                  object
fips                     int64
zipcode                  int64
x                      float64
y                      float64
confirmed_cases          int64
new_confirmed_cases    float64
active_cases           float64
released_cases           int64
resolved_cases         float64
deaths                   int64
new_deaths             float64
dtype: object

### Dates

In [47]:
src["date"] = pd.to_datetime(src["date"], format="%Y-%m-%d")
src["date"] = pd.to_datetime(src["date"].dt.strftime("%Y-%m-%d"))
src["year"] = src["date"].dt.year
src["quarter"] = src["date"].dt.quarter
src["day"] = src["date"].dt.day
src["month"] = src["date"].dt.month
src["weekday"] = src["date"].dt.weekday
src["monthname"] = src["date"].dt.month_name()
src["month_year_full"] = src["date"].apply(lambda x: x.strftime("%B-%Y"))
src["month_year_full"] = pd.to_datetime(src["month_year_full"])

---

In [48]:
df = src.copy()

In [51]:
df[df["name"] == "Folsom State Prison"].sort_values("date", ascending=False).head()

Unnamed: 0,date,code,name,city,county,fips,zipcode,x,y,confirmed_cases,new_confirmed_cases,active_cases,released_cases,resolved_cases,deaths,new_deaths,year,quarter,day,month,weekday,monthname,month_year_full
10185,2021-06-15,FSP,Folsom State Prison,Represa,Sacramento,67,95671,-121.162429,38.694558,1369,0.0,0.0,12,1355.0,2,0.0,2021,2,15,6,1,June,2021-06-01
10184,2021-06-14,FSP,Folsom State Prison,Represa,Sacramento,67,95671,-121.162429,38.694558,1369,0.0,0.0,12,1355.0,2,0.0,2021,2,14,6,0,June,2021-06-01
10183,2021-06-13,FSP,Folsom State Prison,Represa,Sacramento,67,95671,-121.162429,38.694558,1369,0.0,,12,,2,0.0,2021,2,13,6,6,June,2021-06-01
10182,2021-06-12,FSP,Folsom State Prison,Represa,Sacramento,67,95671,-121.162429,38.694558,1369,0.0,,12,,2,0.0,2021,2,12,6,5,June,2021-06-01
10181,2021-06-11,FSP,Folsom State Prison,Represa,Sacramento,67,95671,-121.162429,38.694558,1369,0.0,,12,,2,0.0,2021,2,11,6,4,June,2021-06-01


In [52]:
alt.Chart(df).mark_line().encode(
    x="date",
    y="confirmed_cases",
    color="name",
    #     facet=alt.Facet("institutionname", columns=5)
).properties(width=1000, height=600)

---

### Institution reports

In [117]:
from bs4 import BeautifulSoup
import requests

In [118]:
prison_url = (
    "https://www.cdcr.ca.gov/research/monthly-total-population-report-archive-2019/"
)

In [126]:
from bs4 import BeautifulSoup
import urllib.request

parser = "html.parser"
resp = urllib.request.urlopen(
    "https://www.cdcr.ca.gov/research/monthly-total-population-report-archive-2019/"
)
soup = BeautifulSoup(resp, parser, from_encoding=resp.info().get_param("charset"))

for link in soup.find_all(
    "a", href=lambda href: href and "/research/wp-content/uploads/" in href
):
    print(link["href"])

https://www.cdcr.ca.gov/research/wp-content/uploads/sites/174/2019/06/Tpop1d1901.pdf
https://www.cdcr.ca.gov/research/wp-content/uploads/sites/174/2019/06/Tpop1d1902.pdf
https://www.cdcr.ca.gov/research/wp-content/uploads/sites/174/2019/06/Tpop1d1903.pdf
https://www.cdcr.ca.gov/research/wp-content/uploads/sites/174/2019/06/Tpop1d1904.pdf
https://www.cdcr.ca.gov/research/wp-content/uploads/sites/174/2019/06/Tpop1d1905.pdf
/research/wp-content/uploads/sites/174/2019/07/Tpop1d1906.pdf
/research/wp-content/uploads/sites/174/2019/08/Tpop1d1907.pdf
/research/wp-content/uploads/sites/174/2019/09/Tpop1d1908.pdf
/research/wp-content/uploads/sites/174/2019/10/Tpop1d1909.pdf
/research/wp-content/uploads/sites/174/2019/11/Tpop1d1910.pdf
/research/wp-content/uploads/sites/174/2019/12/Tpop1d1911.pdf
/research/wp-content/uploads/sites/174/2020/01/Tpop1d1912.pdf
/research/wp-content/uploads/sites/174/2020/02/Tpop1d2001.pdf
/research/wp-content/uploads/sites/174/2020/05/Tpop1d2002.pdf
https://www.cdcr.

In [107]:
pop_df = pd.read_fwf(
    "input/prisonpop/Tpop1d2003.txt",
    skiprows=12,
    header=0,
    skipfooter=5,
    names=["name", "inmates", "designed_capacity", "pct_occupied", "staffed_capacity"],
)

In [108]:
pop_df["inmates"] = pop_df["inmates"].str.replace(",", "", regex=False)
pop_df["designed_capacity"] = pop_df["designed_capacity"].str.replace(
    ",", "", regex=False
)
pop_df["staffed_capacity"] = pop_df["staffed_capacity"].str.replace(
    ",", "", regex=False
)

In [109]:
pop_df[["name", "abbr"]] = (
    pop_df["name"].str.split("(", expand=True).replace(")", "", regex=False)
)

In [110]:
pop_df["abbr"] = pop_df["abbr"].str.replace(")", "", regex=False)

In [111]:
pop_df.tail(10)

Unnamed: 0,name,inmates,designed_capacity,pct_occupied,staffed_capacity,abbr
29,Salinas Valley State Prison,2930.0,2452.0,119.5,3509.0,SVSP
30,Valley State Prison,3024.0,1980.0,152.7,2954.0,VSP
31,Wasco State Prison,4359.0,2984.0,146.1,4447.0,WSP
32,Male Total,112052.0,85858.0,130.5,118500.0,
33,Female Institutions,,,,,
34,Central California Women's Facility,2795.0,2004.0,139.5,2988.0,CCWF
35,California Institution for Women,1650.0,1398.0,118.0,1877.0,CIW
36,Folsom State Prison,389.0,403.0,96.5,530.0,FOL
37,Female Total,4834.0,3805.0,127.0,5395.0,
38,Institution Total,116886.0,89663.0,130.4,123895.0,


In [112]:
to_drop = ["Male Total", "Female Institutions", "Female Total", "Institution Total"]

In [113]:
for t in to_drop:
    pop_df = pop_df[pop_df["name"] != t]

In [114]:
pop_df

Unnamed: 0,name,inmates,designed_capacity,pct_occupied,staffed_capacity,abbr
0,Avenal State Prison,4294,2920,147.1,4387,ASP
1,Calipatria State Prison,3055,2308,132.4,3451,CAL
2,California Correctional Center,4309,3883,111.0,4752,CCC
3,California Correctional Institution,3666,2783,131.7,4085,CCI
4,Centinela State Prison,3385,2308,146.7,3446,CEN
5,California Health Care Facility - Stockton,2834,2951,96.0,2951,CHCF
6,California Institution for Men,3515,2976,118.1,4226,CIM
7,California Men's Colony,3823,3838,99.6,4407,CMC
8,California Medical Facility,2475,2361,104.8,2861,CMF
9,"California State Prison, Corcoran",2819,3116,90.5,4476,COR
