# Processing Pasadena (Texas) police arrests: 2016-2021

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import altair as alt

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### Read the raw data

In [4]:
src = pd.read_excel("data/raw/Adult_Arrest_Data_-_01012016-05072021.xlsx", skiprows=1)

In [5]:
src.columns = (
    src.columns.str.lower()
    .str.replace(":", "", regex=False)
    .str.replace(" ", "_", regex=False)
    .str.replace("#", "_no", regex=False)
)

### Dates

In [6]:
src["arrest_date"] = pd.to_datetime(src["arrest_date"], format="%m/%d/%Y")
src["arrest_date"] = pd.to_datetime(src["arrest_date"].dt.strftime("%Y-%m-%d"))
src["year"] = src["arrest_date"].dt.year
src["quarter"] = src["arrest_date"].dt.quarter
src["day"] = src["arrest_date"].dt.day
src["month"] = src["arrest_date"].dt.month
src["weekday"] = src["arrest_date"].dt.weekday
src["monthname"] = src["arrest_date"].dt.month_name()
src["month_year"] = pd.to_datetime(src["arrest_date"]).dt.to_period("M")
src["month_year_full"] = src["arrest_date"].apply(lambda x: x.strftime("%B-%Y"))
src["month_year_full"] = pd.to_datetime(src["month_year_full"])

### Times

In [7]:
src["arrest_time"] = src["arrest_time"].astype(str).str.zfill(4)
src["arrest_hour"] = src["arrest_time"].str[:2]
src["arrest_minute"] = src["arrest_time"].str[-2:]
src["arrest_time_full"] = pd.to_datetime(
    src["arrest_hour"] + ":" + src["arrest_minute"], format="%H:%M"
).dt.time
src["arrest_hour"] = src["arrest_hour"].astype(int)
src["arrest_minute"] = src["arrest_minute"].astype(int)

### Categorize the time of arrest

In [8]:
def categorize_hours(h):
    if (h > 4) and (h <= 8):
        return "Early Morning"
    elif (h > 8) and (h <= 12):
        return "Morning"
    elif (h > 12) and (h <= 16):
        return "Noon"
    elif (h > 16) and (h <= 20):
        return "Evening"
    elif (h > 20) and (h <= 24):
        return "Night"
    elif h <= 4:
        return "Late Night"

In [9]:
src["arrest_time_period"] = src["arrest_hour"].apply(categorize_hours)

In [10]:
src["arrest_charge"] = src["arrest_charge"].str.upper()

---

In [11]:
df = src.copy()

In [12]:
df.head()

Unnamed: 0,case__no,arrest_date,arrest_time,arresting_agency,arresting_officer,arrestee_age,arrestee_sex,arrestee_race,arrestee_ethnicity,arrest_charge,arrest_location,city,state,zip_code,geox,geoy,year,quarter,day,month,weekday,monthname,month_year,month_year_full,arrest_hour,arrest_minute,arrest_time_full,arrest_time_period
0,21006943,2021-05-07,50,PPD,"SLIGHT, N",35,M,B,N,DRIVING WHILE INTOXICATED - MA & MB,3100 E NASA PKWY/SPACE CENTER BLVD,PASADENA,TX,77058,3216028.75,13771449,2021,2,7,5,4,May,2021-05,2021-05-01,0,50,00:50:00,Late Night
1,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,N,"HOLD PPD - FOR FURTHER INVESTIGATION, M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,Late Night
2,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,"93OC - OUTSIDE WARRANT (CLASS C), M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,Late Night
3,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,NO CHARGES HAVE BEEN ADDED,7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,Late Night
4,21006964,2021-05-07,1130,PPD,"ROHM, A",41,M,W,H,PAROLE VIOLATOR,2706 CHERRYBROOK LN,PASADENA,TX,77502,3182997.0,13813571,2021,2,7,5,4,May,2021-05,2021-05-01,11,30,11:30:00,Morning


### Jose Manuel Garcia case

In [13]:
df[df["case__no"].fillna("").str.contains("19016065")].iloc[0]

case__no                         19016065
arrest_date           2019-09-24 00:00:00
arrest_time                          1804
arresting_agency                      PPD
arresting_officer               CARTER, J
arrestee_age                           41
arrestee_sex                            M
arrestee_race                           W
arrestee_ethnicity                      H
arrest_charge               ASSAULT - FEL
arrest_location          2801 SPENCER HWY
city                             PASADENA
state                                  TX
zip_code                            77504
geox                           3174699.75
geoy                             13808897
year                                 2019
quarter                                 3
day                                    24
month                                   9
weekday                                 1
monthname                       September
month_year                        2019-09
month_year_full       2019-09-01 0

### Arrests by Officer Carter

In [14]:
df_carter = df[df["arresting_officer"] == "CARTER, J"]

In [15]:
len(df_carter)

473

### Group Officer Carter arrests by dates

In [16]:
df_carter_dates = (
    df_carter.groupby(["month_year_full"])["case__no"]
    .size()
    .reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

### What were his most productive months

In [17]:
df_carter_dates.sort_values("cases_count", ascending=False).head()

Unnamed: 0,month_year_full,cases_count
18,2019-06-01,23
25,2020-01-01,22
13,2018-08-01,21
12,2018-07-01,18
20,2019-08-01,17


### No arrests between October 2018 and February 2019. Same in March and April 2020

In [18]:
month_year_chart = (
    alt.Chart(df_carter_dates)
    .mark_bar(size=15)
    .encode(
        x=alt.X(
            "month_year_full:T",
            axis=alt.Axis(format="%b %y", grid=False),
            title="Month/Year",
        ),
        y=alt.Y(
            "cases_count:Q",
            axis=alt.Axis(
                tickCount=6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                title="Arrests per month",
            ),
        ),
    )
    .properties(
        width=1000,
        height=500,
        title="Officer Carter 'assault' arrests by month and year",
    )
    .configure_view(strokeOpacity=0)
)
month_year_chart

In [19]:
df_carter_times = (
    df_carter.groupby(["arrest_time_period"])["case__no"]
    .size()
    .reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

In [20]:
df_carter_times

Unnamed: 0,arrest_time_period,cases_count
1,Evening,168
5,Noon,117
4,Night,98
2,Late Night,42
3,Morning,33
0,Early Morning,15


In [21]:
time_chart = (
    alt.Chart(df_carter_times)
    .mark_bar()
    .encode(
        x=alt.X(
            "cases_count",
            axis=alt.Axis(grid=False),
            title="Arrests",
        ),
        y=alt.Y(
            "arrest_time_period:O",
            axis=alt.Axis(
                tickCount=6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                title="Arrests per month",
            ),
            sort="-x",
        ),
    )
    .properties(width=1000, height=200, title="Officer Carter arrests by time period")
    .configure_view(strokeOpacity=0)
)
time_chart

### Group Officer Carter by arrest charges

In [22]:
df_carter_charges = (
    df_carter.groupby(["arrest_charge"]).size().reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

In [23]:
df_carter_charges.head(10)

Unnamed: 0,arrest_charge,cases_count
85,PUBLIC INTOXICATION - MC,28
2,93OC - OUTSIDE WARRANT (FELONY),24
4,93OC - OUTSIDE WARRANT (MA & MB),23
74,POSSESSION OF COCAINE/METH/OPIUM (PG1 SUBSTANCE),16
8,AGGRAVATED ASSAULT,11
57,HOLD PPD - FOR FURTHER INVESTIGATION,11
86,"PUBLIC INTOXICATION - MC, M",10
103,TERRORISTIC THREAT - MA & MB,8
87,PUBLIC INTOXICATION - OTHER THAN ALCOHOL - MC,8
71,POSS DRUG PARAPHERNALIA,8


--- 

In [24]:
df_carter.to_csv("data/processed/jason_carter_arrests.csv", index=False)

In [25]:
df_carter_charges.to_csv("data/processed/jason_carter_arrests_counts.csv", index=False)

In [26]:
month_year_chart.save("visuals/carter_arrests_monthyear.png")

In [27]:
time_chart.save("visuals/carter_arrests_time_periods.png")