# LA County probation juvenile statistics 2020

In [2]:
%load_ext lab_black

In [4]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [5]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the 2020 institutions report

In [11]:
# PDF too messy to parse. Used Tabula to export csvs for violence, population, race, gender, etc., in 2020 by month

In [12]:
url = "http://file.lacounty.gov/SDSInter/probation/1104983_LAPROB.INSTITUTIONS.STATISTICS.2020_MASTER.pdf"

---

## Population

### Read the data and clean up

In [195]:
pop = pd.read_csv("input/population_2020.csv")

In [196]:
pop["population"] = pop["population"].str.replace(" ", "_", regex=False).str.lower()

In [197]:
pop.columns = pop.columns.str.lower()

In [198]:
pop.rename(columns={"population": "category"}, inplace=True)

In [199]:
pop

Unnamed: 0,category,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
0,average_daily_population,545,540,507,382,362,305,304,304,311,327,330,314
1,black,197,205,193,134,118,90,96,99,110,103,116,123
2,caucasian,30,25,20,13,10,11,19,19,12,8,7,5
3,hispanic,310,302,288,228,228,200,186,182,188,213,207,185
4,other,8,8,6,7,5,4,3,4,1,3,0,1


---

## Violence

### Read the data and clean up

In [34]:
violence = pd.read_csv("input/violence_2020.csv")

In [35]:
violence.columns = violence.columns.str.lower()

In [36]:
violence["category"] = (
    violence["category"].str.replace(" ", "_", regex=False).str.lower()
)

In [200]:
violence

Unnamed: 0,category,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
0,youth_on_youth_violence,104,103,97,48,52,43,41,40,41,49,49,55
1,youth_on_staff_assaults,39,24,44,21,18,16,14,12,14,18,26,14
2,direct_assaults_on_staff,34,22,38,19,16,16,14,10,11,16,25,13
3,incidental_assaults_on_staff,5,2,6,2,2,0,0,2,3,2,1,1


---

In [182]:
pop_vio = pd.concat([violence, pop]).reset_index(drop=True)

In [201]:
pop_vio

Unnamed: 0,category,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
0,youth_on_youth_violence,104,103,97,48,52,43,41,40,41,49,49,55
1,youth_on_staff_assaults,39,24,44,21,18,16,14,12,14,18,26,14
2,direct_assaults_on_staff,34,22,38,19,16,16,14,10,11,16,25,13
3,incidental_assaults_on_staff,5,2,6,2,2,0,0,2,3,2,1,1
4,average_daily_population,545,540,507,382,362,305,304,304,311,327,330,314
5,black,197,205,193,134,118,90,96,99,110,103,116,123
6,caucasian,30,25,20,13,10,11,19,19,12,8,7,5
7,hispanic,310,302,288,228,228,200,186,182,188,213,207,185
8,other,8,8,6,7,5,4,3,4,1,3,0,1


In [211]:
pop_vio_df = pop_vio.iloc[[0, 1, 4]].T

In [212]:
pop_vio_df.reset_index(level=0, inplace=True)

In [213]:
pop_vio_df.columns = [
    "month",
    "youth_on_youth_violence",
    "youth_on_staff_assaults",
    "average_daily_population",
]

In [214]:
pop_vio_df = pop_vio_df[pop_vio_df["month"] != "category"].copy()

In [215]:
pop_vio_df[
    [
        "youth_on_youth_violence",
        "youth_on_staff_assaults",
        "average_daily_population",
    ]
] = pop_vio_df[
    [
        "youth_on_youth_violence",
        "youth_on_staff_assaults",
        "average_daily_population",
    ]
].astype(
    int
)

In [216]:
pop_vio_df["youth_assault_rate_per_100"] = (
    (pop_vio_df["youth_on_youth_violence"] / pop_vio_df["average_daily_population"])
    * 100
).round(1)

In [217]:
pop_vio_df["staff_assault_rate_per_100"] = (
    (pop_vio_df["youth_on_staff_assaults"] / pop_vio_df["average_daily_population"])
    * 100
).round(1)

In [218]:
pop_vio_df

Unnamed: 0,month,youth_on_youth_violence,youth_on_staff_assaults,average_daily_population,youth_assault_rate_per_100,staff_assault_rate_per_100
1,jan,104,39,545,19.1,7.2
2,feb,103,24,540,19.1,4.4
3,mar,97,44,507,19.1,8.7
4,apr,48,21,382,12.6,5.5
5,may,52,18,362,14.4,5.0
6,jun,43,16,305,14.1,5.2
7,jul,41,14,304,13.5,4.6
8,aug,40,12,304,13.2,3.9
9,sep,41,14,311,13.2,4.5
10,oct,49,18,327,15.0,5.5


In [225]:
area = (
    alt.Chart(pop_vio_df)
    .mark_area(opacity=0.2)
    .encode(
        x=alt.X(
            "month:O",
            sort=[
                "jan",
                "feb",
                "mar",
                "apr",
                "may",
                "jun",
                "jul",
                "aug",
                "sep",
                "oct",
                "nov",
                "dec",
            ],
            title="Month in 2020",
        ),
        y=alt.Y("staff_assault_rate_per_100", title="Youth on staff assualt rate"),
    )
)

line = (
    alt.Chart(pop_vio_df)
    .mark_line()
    .encode(
        x=alt.X(
            "month:O",
            sort=[
                "jan",
                "feb",
                "mar",
                "apr",
                "may",
                "jun",
                "jul",
                "aug",
                "sep",
                "oct",
                "nov",
                "dec",
            ],
            title="Month in 2020",
        ),
        y=alt.Y("staff_assault_rate_per_100", title=""),
    )
)

In [226]:
(line + area)

---

In [232]:
race_df = pop_vio.iloc[5:9].T

In [235]:
race_df

Unnamed: 0,index,5,6,7,8
0,category,black,caucasian,hispanic,other
1,jan,197,30,310,8
2,feb,205,25,302,8
3,mar,193,20,288,6
4,apr,134,13,228,7
5,may,118,10,228,5
6,jun,90,11,200,4
7,jul,96,19,186,3
8,aug,99,19,182,4
9,sep,110,12,188,1


In [234]:
race_df.reset_index(level=0, inplace=True)

In [236]:
race_df.columns = ["month", "black", "white", "latino", "other"]

In [239]:
race_df = race_df[race_df["month"] != "category"].copy()

In [240]:
race_df[["black", "white", "latino", "other"]] = race_df[
    ["black", "white", "latino", "other"]
].astype(int)

In [249]:
cols = ["black", "white", "latino", "other"]

In [254]:
race_df["total"] = race_df.sum(axis=1)

In [318]:
race_melt_df = pd.melt(
    race_df, id_vars="month", value_vars=["black", "white", "latino", "other"]
)

In [319]:
alt.Chart(race_melt_df).mark_area().encode(
    x=alt.X(
        "month:O",
        sort=[
            "jan",
            "feb",
            "mar",
            "apr",
            "may",
            "jun",
            "jul",
            "aug",
            "sep",
            "oct",
            "nov",
            "dec",
        ],
        title="Month in 2020",
    ),
    y=alt.Y(
        "value:Q",
        title="Youths in L.A. County juvenile halls, 2020",
        axis=alt.Axis(tickCount=5),
    ),
    color=alt.Color("variable:N", title="Race/ethnicity"),
)

In [320]:
months = ["jan", "dec"]

In [321]:
jan_dec_race = race_melt_df[race_melt_df["month"].isin(months)]

In [332]:
alt.Chart(race_melt_df).mark_line().encode(
    x=alt.X(
        "month:O",
        sort=[
            "jan",
            "feb",
            "mar",
            "apr",
            "may",
            "jun",
            "jul",
            "aug",
            "sep",
            "oct",
            "nov",
            "dec",
        ],
    ),
    y="value",
    color="variable",
    facet="variable",
).properties(width=100, height=100)

### Pct change by race

In [333]:
race_melt_df.head()

Unnamed: 0,month,variable,value,pct_ch
0,jan,black,197,
1,feb,black,205,
2,mar,black,193,
3,apr,black,134,
4,may,black,118,


In [334]:
race_melt_df["pct_ch"] = race_melt_df.groupby("variable")["value"].pct_change(11)

In [335]:
race_melt_df[race_melt_df["month"].isin(months)]

Unnamed: 0,month,variable,value,pct_ch
0,jan,black,197,
11,dec,black,123,-0.375635
12,jan,white,30,
23,dec,white,5,-0.833333
24,jan,latino,310,
35,dec,latino,185,-0.403226
36,jan,other,8,
47,dec,other,1,-0.875
