# Processing Pasadena (Texas) police arrests: 2016-2021

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import altair as alt

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### Read the raw data

In [4]:
src = pd.read_excel("data/raw/Adult_Arrest_Data_-_01012016-05072021.xlsx", skiprows=1)

In [5]:
src.columns = (
    src.columns.str.lower()
    .str.replace(":", "", regex=False)
    .str.replace(" ", "_", regex=False)
    .str.replace("#", "_no", regex=False)
)

In [6]:
src.head()

Unnamed: 0,case__no,arrest_date,arrest_time,arresting_agency,arresting_officer,arrestee_age,arrestee_sex,arrestee_race,arrestee_ethnicity,arrest_charge,arrest_location,city,state,zip_code,geox,geoy
0,21006943,2021-05-07,50,PPD,"SLIGHT, N",35,M,B,N,DRIVING WHILE INTOXICATED - MA & MB,3100 E NASA PKWY/SPACE CENTER BLVD,PASADENA,TX,77058,3216028.75,13771449
1,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,N,"HOLD PPD - FOR FURTHER INVESTIGATION, M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154
2,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,"93OC - OUTSIDE WARRANT (CLASS C), M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154
3,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,No charges have been added,7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154
4,21006964,2021-05-07,1130,PPD,"ROHM, A",41,M,W,H,PAROLE VIOLATOR,2706 CHERRYBROOK LN,PASADENA,TX,77502,3182997.0,13813571


### Dates

In [7]:
src["arrest_date"] = pd.to_datetime(src["arrest_date"], format="%m/%d/%Y")
src["arrest_date"] = pd.to_datetime(src["arrest_date"].dt.strftime("%Y-%m-%d"))
src["year"] = src["arrest_date"].dt.year
src["quarter"] = src["arrest_date"].dt.quarter
src["day"] = src["arrest_date"].dt.day
src["month"] = src["arrest_date"].dt.month
src["weekday"] = src["arrest_date"].dt.weekday
src["monthname"] = src["arrest_date"].dt.month_name()
src["month_year"] = pd.to_datetime(src["arrest_date"]).dt.to_period("M")
src["month_year_full"] = src["arrest_date"].apply(lambda x: x.strftime("%B-%Y"))
src["month_year_full"] = pd.to_datetime(src["month_year_full"])

### Times

In [8]:
src["arrest_time"] = src["arrest_time"].astype(str).str.zfill(4)
src["arrest_hour"] = src["arrest_time"].str[:2]
src["arrest_minute"] = src["arrest_time"].str[-2:]
src["arrest_time_full"] = pd.to_datetime(
    src["arrest_hour"] + ":" + src["arrest_minute"], format="%H:%M"
).dt.time
src["arrest_hour"] = src["arrest_hour"].astype(int)
src["arrest_minute"] = src["arrest_minute"].astype(int)

### Categorize the time of arrest

In [9]:
def categorize_hours(h):
    if (h > 4) and (h <= 8):
        return "5-8am"
    elif (h > 8) and (h <= 12):
        return "9-noon"
    elif (h > 12) and (h <= 16):
        return "1-4pm"
    elif (h > 16) and (h <= 20):
        return "5-8pm"
    elif (h > 20) and (h <= 24):
        return "9-midnight"
    elif h <= 4:
        return "1-4am"

In [10]:
src["arrest_time_period"] = src["arrest_hour"].apply(categorize_hours)

In [11]:
src["arrest_charge"] = src["arrest_charge"].str.upper()

---

In [12]:
df = src.copy()

In [13]:
df.head()

Unnamed: 0,case__no,arrest_date,arrest_time,arresting_agency,arresting_officer,arrestee_age,arrestee_sex,arrestee_race,arrestee_ethnicity,arrest_charge,arrest_location,city,state,zip_code,geox,geoy,year,quarter,day,month,weekday,monthname,month_year,month_year_full,arrest_hour,arrest_minute,arrest_time_full,arrest_time_period
0,21006943,2021-05-07,50,PPD,"SLIGHT, N",35,M,B,N,DRIVING WHILE INTOXICATED - MA & MB,3100 E NASA PKWY/SPACE CENTER BLVD,PASADENA,TX,77058,3216028.75,13771449,2021,2,7,5,4,May,2021-05,2021-05-01,0,50,00:50:00,1-4am
1,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,N,"HOLD PPD - FOR FURTHER INVESTIGATION, M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,1-4am
2,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,"93OC - OUTSIDE WARRANT (CLASS C), M",7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,1-4am
3,21006951,2021-05-07,438,PPD,"SMITH, K",50,M,W,,NO CHARGES HAVE BEEN ADDED,7015 SPENCER HWY,PASADENA,TX,77505,3197134.5,13809154,2021,2,7,5,4,May,2021-05,2021-05-01,4,38,04:38:00,1-4am
4,21006964,2021-05-07,1130,PPD,"ROHM, A",41,M,W,H,PAROLE VIOLATOR,2706 CHERRYBROOK LN,PASADENA,TX,77502,3182997.0,13813571,2021,2,7,5,4,May,2021-05,2021-05-01,11,30,11:30:00,9-noon


### Jose Manuel Garcia case

In [16]:
df[df["case__no"].fillna("").str.contains("19016065")].iloc[0]

case__no                         19016065
arrest_date           2019-09-24 00:00:00
arrest_time                          1804
arresting_agency                      PPD
arresting_officer               CARTER, J
arrestee_age                           41
arrestee_sex                            M
arrestee_race                           W
arrestee_ethnicity                      H
arrest_charge               ASSAULT - FEL
arrest_location          2801 SPENCER HWY
city                             PASADENA
state                                  TX
zip_code                            77504
geox                           3174699.75
geoy                             13808897
year                                 2019
quarter                                 3
day                                    24
month                                   9
weekday                                 1
monthname                       September
month_year                        2019-09
month_year_full       2019-09-01 0

### Arrests by Officer Carter

In [17]:
df_carter = df[df["arresting_officer"].str.contains("CARTER")]

In [18]:
len(df_carter)

475

In [20]:
df_carter.sort_values("arrest_date", ascending=False).head()

Unnamed: 0,case__no,arrest_date,arrest_time,arresting_agency,arresting_officer,arrestee_age,arrestee_sex,arrestee_race,arrestee_ethnicity,arrest_charge,arrest_location,city,state,zip_code,geox,geoy,year,quarter,day,month,weekday,monthname,month_year,month_year_full,arrest_hour,arrest_minute,arrest_time_full,arrest_time_period
35,21006770,2021-05-03,1829,PPD,"CARTER, J",19,M,W,H,"THEFT - ALL OTHER - FEL, F",3002 STRAWBERRY RD SB,PASADENA,TX,77502,3177296.0,13809485,2021,2,3,5,0,May,2021-05,2021-05-01,18,29,18:29:00,5-8pm
63,21006602,2021-05-01,124,PPD,"CARTER, J",19,M,W,H,FRAUDULENT USE/POSSESSION OF IDENTIFYING INFORMATION,3101 SPENCER HWY,PASADENA,TX,77504,3176456.75,13808986,2021,2,1,5,5,May,2021-05,2021-05-01,1,24,01:24:00,1-4am
99,21006463,2021-04-28,1720,PPD,"CARTER, J",32,M,W,H,CRIMINAL MISCHIEF - MA & MB,409 RICHEY ST,PASADENA,TX,77506,3168285.5,13823722,2021,2,28,4,2,April,2021-04,2021-04-01,17,20,17:20:00,5-8pm
113,21006402,2021-04-27,1430,PPD,"CARTER, J",19,M,W,,PUBLIC INTOXICATION - MC,2216 HARRIS AVE,PASADENA,TX,77506,3180415.75,13821224,2021,2,27,4,1,April,2021-04,2021-04-01,14,30,14:30:00,1-4pm
112,21006402,2021-04-27,1432,PPD,"CARTER, J",19,M,W,H,PUBLIC INTOXICATION - MC,2216 HARRIS AVE,PASADENA,TX,77506,3180415.75,13821224,2021,2,27,4,1,April,2021-04,2021-04-01,14,32,14:32:00,1-4pm


### Group Officer Carter arrests by dates

In [21]:
df_carter_dates = (
    df_carter.groupby(["month_year_full"])["case__no"]
    .size()
    .reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

### What were his most productive months

In [22]:
df_carter_dates.sort_values("cases_count", ascending=False).head()

Unnamed: 0,month_year_full,cases_count
18,2019-06-01,23
25,2020-01-01,22
13,2018-08-01,22
12,2018-07-01,18
20,2019-08-01,17


### No arrests between October 2018 and February 2019. Same in March and April 2020

In [23]:
month_year_chart = (
    alt.Chart(df_carter_dates)
    .mark_bar(size=15)
    .encode(
        x=alt.X(
            "month_year_full:T",
            axis=alt.Axis(format="%b %y", grid=False),
            title="Month/Year",
        ),
        y=alt.Y(
            "cases_count:Q",
            axis=alt.Axis(
                tickCount=6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                title="Arrests per month",
            ),
        ),
    )
    .properties(
        width=1000,
        height=500,
        title="Officer Carter 'assault' arrests by month and year",
    )
    .configure_view(strokeOpacity=0)
)
month_year_chart

In [24]:
df_carter_times = (
    df_carter.groupby(["arrest_time_period"])["case__no"]
    .size()
    .reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

In [25]:
df_carter_times

Unnamed: 0,arrest_time_period,cases_count
3,5-8pm,168
1,1-4pm,117
4,9-midnight,99
0,1-4am,42
5,9-noon,34
2,5-8am,15


In [26]:
time_chart = (
    alt.Chart(df_carter_times)
    .mark_bar()
    .encode(
        x=alt.X(
            "cases_count",
            axis=alt.Axis(grid=False),
            title="Arrests",
        ),
        y=alt.Y(
            "arrest_time_period:O",
            axis=alt.Axis(
                tickCount=6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                title="Arrests per month",
            ),
            sort="-x",
        ),
    )
    .properties(width=1000, height=200, title="Officer Carter arrests by time period")
    .configure_view(strokeOpacity=0)
)
time_chart

### Group Officer Carter by arrest charges

In [27]:
df_carter_charges = (
    df_carter.groupby(["arrest_charge"]).size().reset_index(name="cases_count")
).sort_values("cases_count", ascending=False)

In [28]:
df_carter_charges.head(10)

Unnamed: 0,arrest_charge,cases_count
85,PUBLIC INTOXICATION - MC,28
2,93OC - OUTSIDE WARRANT (FELONY),24
4,93OC - OUTSIDE WARRANT (MA & MB),23
74,POSSESSION OF COCAINE/METH/OPIUM (PG1 SUBSTANCE),16
57,HOLD PPD - FOR FURTHER INVESTIGATION,12
8,AGGRAVATED ASSAULT,11
86,"PUBLIC INTOXICATION - MC, M",10
103,TERRORISTIC THREAT - MA & MB,8
87,PUBLIC INTOXICATION - OTHER THAN ALCOHOL - MC,8
71,POSS DRUG PARAPHERNALIA,8


---

### Different types of "assult" charges

In [29]:
assaults_grouped = (
    df[df["arrest_charge"].fillna("").str.contains("ASSAULT")]
    .groupby(["arrest_charge"])
    .size()
    .reset_index(name="count")
).sort_values("count", ascending=False)

In [30]:
assaults_grouped

Unnamed: 0,arrest_charge,count
21,ASSAULT - MA (FAMILY VIOLENCE) (MINOR INJ),320
9,ASSAULT - FEL (FAMILY VIOLENCE),194
15,ASSAULT - MA & MB (MINOR INJ),132
0,AGGRAVATED ASSAULT,112
24,ASSAULT - MC,86
25,ASSAULT - MC (FAMILY VIOLENCE),71
1,AGGRAVATED ASSAULT (FAMILY VIOLENCE),71
27,"ASSAULT - MC, M",68
8,ASSAULT - FEL,58
18,ASSAULT - MA (FAMILY VIOLENCE),54


--- 

### Exports

In [31]:
df_carter.to_csv("data/processed/jason_carter_arrests.csv", index=False)

In [32]:
df_carter_charges.to_csv("data/processed/jason_carter_arrests_counts.csv", index=False)

In [33]:
month_year_chart.save("visuals/arrests_carter_month_year.png")

In [34]:
time_chart.save("visuals/arrests_carter_time_periods.png")

In [35]:
df[df["arrest_charge"] == "ASSAULT - FEL"].to_csv(
    "data/processed/assault_fel_cases.csv", index=False
)

In [36]:
len(df[df["arrest_charge"].fillna("").str.contains("ASSAULT - FEL")])

304