# How often does LAPD make arrests in Part I crimes_major?

### Import data tools

In [16]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from earthpy import clip as cl
from altair import datum
import weightedcalcs as wc
import altair as alt
alt.renderers.enable('notebook')
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Read crimes_major CSV downloaded from LA City data portal

In [153]:
# https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/63jg-8b9z
crimes = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv')

In [155]:
crimes.columns = crimes.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

### These data are super messy...

In [156]:
crimes.rename(columns = { 
'dr_no':'record_id',
'date_rptd':'date_reported',
'date_occ':'date_occurred',
'time_occ':'time_occurred',
'area':'division',
'area_name':'division_name',
'rpt_dist_no':'reporting_district',
'part_1_2':'part_type',
'crm_cd':'crime_code',
'crm_cd_desc':'crime_code_description',
'mocodes':'modus_operandi_code',
'vict_age':'victim_age',
'vict_sex':'victim_sex',
'vict_descent':'victim_descent',
'premis_cd':'premises_code',
'premis_desc':'premises_description',
'weapon_used_cd':'weapon_code',
'weapon_desc':'weapon_description',
'status':'status_code',
'status_desc':'status_code_description',
'crm_cd_1':'crm_cd_1',
'crm_cd_2':'crm_cd_2',
'crm_cd_3':'crm_cd_3',
'crm_cd_4':'crm_cd_4',
'location':'address',
'cross_street':'cross_street',
'lat':'latitude',
'lon':'longitide',
 }, inplace = True)

### ... contunued

In [157]:
crimes['date_reported'] = crimes['date_reported'].str.replace(' 12:00:00 AM','')
crimes['date_occurred'] = crimes['date_occurred'].str.replace(' 12:00:00 AM','')

In [158]:
crimes['date_occurred'] = pd.to_datetime(crimes['date_occurred'], format='%m/%d/%Y')

In [159]:
crimes['year'] = crimes['date_occurred'].dt.year
crimes['quarter'] = crimes['date_occurred'].dt.quarter
crimes['day'] = crimes['date_occurred'].dt.day
crimes['month'] = crimes['date_occurred'].dt.month
crimes['weekday'] = crimes['date_occurred'].dt.weekday_name
crimes['monthname'] = crimes['date_occurred'].dt.month_name()

In [163]:
crimes[[ 'record_id', 
    'time_occurred', 
    'division', 
    'reporting_district', 
    'part_type',
    'crm_cd_1',
    'crm_cd_2',
    'crm_cd_3',
    'crm_cd_4',
    'crime_code', 
    'premises_code', 
    'weapon_code',
    'year',
    'quarter',
    'day',
    'month', ]] = crimes[[
    'record_id', 
    'time_occurred', 
    'division', 
    'reporting_district', 
    'part_type',
    'crm_cd_1',
    'crm_cd_2',
    'crm_cd_3',
    'crm_cd_4',
    'crime_code', 
    'premises_code', 
    'weapon_code',
    'year',
    'quarter',
    'day',
    'month',]].astype(str)

In [164]:
descent_recode = { 'A':"asian",'B':"black",'C':"asian",'D':"asian",'F':"asian",'G':"asian",\
                 'H':"hispanic",'I':"ai_an",'J':"asian",\
                 'K':"asian",'L':"asian",'O':"other",'P':"asian",'S':"asian",\
                 'U':"asian",'V':"asian",'W':"white",'X':"other",'Z':"asian" }

In [166]:
crimes['descent_description'] = crimes['victim_descent'].map(descent_recode)

In [167]:
victim_sex_recode = { 'F':"female",'M':"male",'X':"unknown",'-':"unknown",'N':"unknown",'H':"unknown" }

In [168]:
crimes['victim_sex'] = crimes['victim_sex'].map(victim_sex_recode)

In [169]:
crimes['premises_code'] = crimes['premises_code'].str.replace('.0','', regex=False)
crimes['premises_code'] = crimes['premises_code'].str.replace('.0','', regex=False)
crimes['crm_cd_1'] = crimes['crm_cd_1'].str.replace('.0','', regex=False)
crimes['crm_cd_2'] = crimes['crm_cd_2'].str.replace('.0','', regex=False)

In [170]:
crimes['premises_description'] = crimes['premises_description'].str.capitalize()
crimes['status_code_description'] = crimes['status_code_description'].str.lower().str.replace(' ','_')
crimes['crime_code_description'] = crimes['crime_code_description'].str.capitalize()
crimes['weapon_description'] = crimes['weapon_description'].str.capitalize()

In [35]:
monthnames = ['January','February','March','April','May','June','July','August','September','October','November','December']

In [171]:
crimes['time_occurred'] = crimes['time_occurred'].str.replace('.0','', regex=False)
crimes['time_occurred'] = crimes['time_occurred'].str.zfill(4)
crimes['hour'] = crimes['time_occurred'].str[:2]
crimes['minute'] = crimes['time_occurred'].str[2:]
crimes = crimes[crimes['hour'] != '0n']
crimes = crimes[crimes['hour'] != '24']

In [172]:
homicides = crimes[crimes['crime_code_description'].str.contains('Criminal homicide', na=False)]

---

### How many homicides: 2010-19?

In [189]:
len(homicides)

2722

### Filter dataframe so it only inlcudes part I — or "major" — cases

In [190]:
crimes_major = crimes.loc[crimes.part_type == '1']

In [191]:
len(crimes_major)

1142556

### Crime codes

In [42]:
codes = crimes_major.groupby(['part_type', 'crime_code', 'crime_code_description']).agg('size').reset_index()

In [43]:
codes.head()

Unnamed: 0,part_type,crime_code,crime_code_description,0
0,1,110,Criminal homicide,2722
1,1,113,"Manslaughter, negligent",5
2,1,121,"Rape, forcible",9931
3,1,122,"Rape, attempted",1082
4,1,210,Robbery,81975


### Recode crime_codes to identify 'part one' violent/property crimes_major

In [44]:
part_one_violent = ['110','113','121','122','210','220','230','231',\
                    '235','236','250','251','761','815','820','821']

In [45]:
part_one_property = ['310','320','330','331','341','343','345','350',
                     '351','352','353','410','420','421','433','440','441',
                     '442','443','444','445','450','451','452','453','470','471'
                     ,'472','473','474','475','480','485','487','510','520','522']

In [46]:
categories = []

for row in crimes_major['crime_code']:
    if row in part_one_violent:
        categories.append('pt_one_violent')
    elif row in part_one_property:
        categories.append('pt_one_property')
    else:
        categories.append('other')
        
crimes_major['part_category'] = categories

In [47]:
crimes_major['part_category'].value_counts('normalize').round(4)*100

pt_one_property    78.77
pt_one_violent     20.93
other               0.30
Name: part_category, dtype: float64

---

## Now that it's clean, what can we learn from this data?

In [50]:
cases_grouped = crimes_major.groupby(['crime_code_description', 'victim_sex'])\
.agg('size').reset_index(name='total').sort_values(by='total', ascending=False)

In [51]:
cases_grouped.head()

Unnamed: 0,crime_code_description,victim_sex,total
26,Burglary from vehicle,male,87075
23,Burglary,male,81418
117,Theft plain - petty ($950 & under),male,76316
25,Burglary from vehicle,female,70473
7,"Assault with deadly weapon, aggravated assault",male,65377


### Cases involving suspects or victimes defined by LAPD as 'homeless'

In [52]:
mocrimes_major = crimes_major.dropna(subset=['modus_operandi_code'])

In [53]:
mocodes = pd.read_csv('mo_codes.csv')

### Isolating just cases involving homeless people

In [54]:
# MO Code 1218 is to be used when a victim is homeless
# MO Code 2004 is to be used when a suspect is homeless
mocodes_homeless = mocodes[mocodes['mo_code_description'].str.lower().str.contains('homeless')]
mocodes_homeless.head()

Unnamed: 0,mo_code,mo_code_description
361,1218,Victim was Homeless/Transient
521,2004,Suspect is homeless/transient


### Limit homeless crime data to recent years to account for coding issues

In [55]:
homeless_dates = ['2017', '2018', '2019']

### Various filters

In [56]:
crimes_major['homeless_victim'] = (crimes_major['modus_operandi_code']\
                            .str.contains('1218', na=False)) & (crimes_major['year'].isin(homeless_dates))

In [57]:
crimes_major['homeless_suspect'] = (crimes_major['modus_operandi_code']\
                                .str.contains('2004', na=False)) & (crimes_major['year'].isin(homeless_dates))

In [58]:
# My street
rubens = crimes_major[crimes_major['address'].str.contains('RUBENS', na=False)]
# crimes_major tagged by specific modus operandi code
bunco = crimes_major[crimes_major['modus_operandi_code'].str.contains('0800', na=False) ]

#Homeless people codes
homeless_victims = crimes_major[(crimes_major['modus_operandi_code'].str.contains('1218', na=False)) & (crimes_major['year'].isin(homeless_dates))]
homeless_suspects = crimes_major[(crimes_major['modus_operandi_code'].str.contains('2004', na=False)) & (crimes_major['year'].isin(homeless_dates))]
homeless_homicides = homeless_victims[(homeless_victims['crime_code_description']\
                    .str.contains('Criminal homicide', na=False))\
                    & (homeless_victims['year'].isin(homeless_dates))]

In [59]:
homeless_victims_women = homeless_victims[homeless_victims['victim_sex'] == 'female']

### How many cases do we have involving homeless people? 

In [60]:
len(homeless_victims_women)

2755

In [61]:
len(homeless_victims)

7781

In [62]:
len(homeless_suspects)

12800

In [63]:
homeless_victims.columns

Index(['record_id', 'date_reported', 'date_occurred', 'time_occurred',
       'division', 'division_name', 'reporting_district', 'part_type',
       'crime_code', 'crime_code_description', 'modus_operandi_code',
       'victim_age', 'victim_sex', 'victim_descent', 'premises_code',
       'premises_description', 'weapon_code', 'weapon_description',
       'status_code', 'status_code_description', 'crm_cd_1', 'crm_cd_2',
       'crm_cd_3', 'crm_cd_4', 'address', 'cross_street', 'latitude',
       'longitide', 'year', 'quarter', 'day', 'month', 'weekday', 'monthname',
       'descent_description', 'hour', 'minute', 'part_category',
       'homeless_victim', 'homeless_suspect'],
      dtype='object')

---

## What's the arrest rate for specific crimes_major?

In [64]:
status = crimes_major.groupby(['status_code_description', 'homeless_victim',\
                         'crime_code', 'crime_code_description', 'year']).agg('size').reset_index(name='count')

### Pivot and count disposition of each crime

In [65]:
status_pivot = pd.pivot_table(status, values ='count', index =['crime_code_description'], 
                         columns = ['status_code_description'], \
                              aggfunc = np.sum, fill_value=0).reset_index()

In [66]:
status_pivot['arrests_tot'] = status_pivot['adult_arrest'] + status_pivot['juv_arrest']
status_pivot['crimes_tot'] = status_pivot.sum(axis=1)
status_pivot['arrest_rate'] = (( status_pivot['arrests_tot'] / status_pivot['crimes_tot'] )*100).round(2)

In [67]:
arrest_rate_all = status_pivot[['crime_code_description','arrest_rate']]

### Pivot and count disposition of each crime involving a homeless victim

In [68]:
status_homeless_victim = homeless_victims.groupby(['status_code_description', 'homeless_victim',\
                         'crime_code', 'crime_code_description', 'year']).agg('size').reset_index(name='count')

In [69]:
status_pivot_homeless = pd.pivot_table(status_homeless_victim, values ='count', index =['crime_code_description'], 
                         columns = ['status_code_description'], \
                              aggfunc = np.sum, fill_value=0).reset_index()

In [70]:
status_pivot_homeless['arrests_tot'] = status_pivot_homeless['adult_arrest'] + status_pivot_homeless['juv_arrest']
status_pivot_homeless['crimes_tot'] = status_pivot_homeless.sum(axis=1)
status_pivot_homeless['arrest_rate'] = \
    (( status_pivot_homeless['arrests_tot'] / status_pivot_homeless['crimes_tot'] )*100).round(2)

In [71]:
arrest_rate_homeless_victims = status_pivot_homeless[['crime_code_description','arrest_rate']]

In [72]:
status_pivot_homeless.head()

status_code_description,crime_code_description,adult_arrest,adult_other,invest_cont,juv_arrest,arrests_tot,crimes_tot,arrest_rate
0,Arson,2,0,20,0,2,24,8.33
1,Assault with deadly weapon on police officer,1,0,0,0,1,2,50.0
2,"Assault with deadly weapon, aggravated assault",421,489,1937,3,424,3274,12.95
3,Attempted robbery,23,16,137,0,23,199,11.56
4,Bike - stolen,1,4,83,0,1,89,1.12


In [73]:
arrest_rates = pd.merge(arrest_rate_homeless_victims, arrest_rate_all, how='left', \
                        on='crime_code_description', suffixes=('_homeless_victim', '_all_victims'))

In [74]:
arrest_rates.head(100)

status_code_description,crime_code_description,arrest_rate_homeless_victim,arrest_rate_all_victims
0,Arson,8.33,3.04
1,Assault with deadly weapon on police officer,50.0,33.71
2,"Assault with deadly weapon, aggravated assault",12.95,19.26
3,Attempted robbery,11.56,17.67
4,Bike - stolen,1.12,1.7
5,Boat - stolen,0.0,0.72
6,Brandish weapon,8.95,15.55
7,Burglary,1.79,7.4
8,Burglary from vehicle,6.82,2.44
9,"Burglary, attempted",50.0,7.45


### Aggregated arrest rate for all crimes

In [75]:
arrests_total = crimes_major.groupby(['status_code_description', 'year']).agg('size').reset_index(name='count')

In [76]:
arrests_total_pivot = pd.pivot_table(arrests_total, values ='count', index =['year'], 
                         columns = ['status_code_description'], \
                              aggfunc = np.sum, fill_value=0).reset_index()

In [77]:
arrests_total_pivot['arrests_tot'] = arrests_total_pivot['adult_arrest'] \
    + arrests_total_pivot['juv_arrest']

arrests_total_pivot['crimes_tot'] = arrests_total_pivot.sum(axis=1)

arrests_total_pivot['arrest_rate'] = (( arrests_total_pivot['arrests_tot']\
                                       / arrests_total_pivot['crimes_tot'] )*100).round(2)

In [78]:
arrests_total_pivot.head(10)

status_code_description,year,adult_arrest,adult_other,invest_cont,juv_arrest,juv_other,unk,arrests_tot,crimes_tot,arrest_rate
0,2010,6786,4582,100837,446,70,1,7232,119954,6.03
1,2011,7262,5318,94327,572,64,2,7834,115379,6.79
2,2012,9197,5927,91538,870,101,0,10067,117700,8.55
3,2013,9255,5522,88033,959,135,4,10214,114122,8.95
4,2014,9798,6282,87021,974,134,6,10772,114987,9.37
5,2015,11126,7419,100003,1004,113,2,12130,131797,9.2
6,2016,11742,8019,107269,937,140,2,12679,140788,9.01
7,2017,12007,8058,111023,884,148,1,12891,145012,8.89
8,2018,11465,7950,110815,824,72,0,12289,143415,8.57
9,2019,7583,4525,82901,457,42,2,8040,103550,7.76


### Aggregated arrest rate for all crimes involving homeless victims

In [79]:
arrests_homeless_victims_total = \
homeless_victims.groupby(['status_code_description', 'year']).agg('size').reset_index(name='count')

In [80]:
arrests_total_homeless_victims_pivot = \
    pd.pivot_table(arrests_homeless_victims_total, values ='count', index =['year'], \
    columns = ['status_code_description'], \
    aggfunc = np.sum, fill_value=0).reset_index()

In [81]:
arrests_total_homeless_victims_pivot['arrests_tot'] = arrests_total_homeless_victims_pivot['adult_arrest'] \
    + arrests_total_homeless_victims_pivot['juv_arrest']

arrests_total_homeless_victims_pivot['crimes_tot'] = arrests_total_homeless_victims_pivot.sum(axis=1)

arrests_total_homeless_victims_pivot['arrest_rate'] = (( arrests_total_homeless_victims_pivot['arrests_tot']\
                                       / arrests_total_homeless_victims_pivot['crimes_tot'] )*100).round(2)

In [82]:
arrests_total_homeless_victims_pivot.head(10)

status_code_description,year,adult_arrest,adult_other,invest_cont,juv_arrest,arrests_tot,crimes_tot,arrest_rate
0,2017,177,287,1305,2,179,1950,9.18
1,2018,338,442,2204,4,342,3330,10.27
2,2019,308,350,2361,3,311,3333,9.33
