In [1]:
import pandas as pd
import matplotlib as mpl
import geopandas as gpd
import descartes
import geojson
import json
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,}'.format

In [2]:
# ! jt -r

Reset css and font defaults in:
/Users/mhustiles/.jupyter/custom &
/Users/mhustiles/Library/Jupyter/nbextensions


In [3]:
arrests = pd.read_csv('/Users/mhustiles/Desktop/data/LA/Arrest_Data_from_2010_to_Present.csv')
arrests.columns = arrests.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [4]:
arrests.columns

Index(['report_id', 'arrest_date', 'time', 'area_id', 'area_name',
       'reporting_district', 'age', 'sex_code', 'descent_code',
       'charge_group_code', 'charge_group_description', 'arrest_type_code',
       'charge', 'charge_description', 'address', 'cross_street', 'location'],
      dtype='object')

In [5]:
arrests[['report_id', 'time', 'area_id', 'charge_group_code', 'reporting_district']] = arrests[['report_id', 'time', 'area_id', 'charge_group_code', 'reporting_district']].astype(str)

In [6]:
arrests['arrest_date'] = pd.to_datetime(arrests['arrest_date'], format='%m/%d/%Y')
arrests['year'] = arrests['arrest_date'].dt.year
arrests['month'] = arrests['arrest_date'].dt.month
arrests['year'] = arrests['arrest_date'].dt.year
arrests['weekday'] = arrests['arrest_date'].dt.weekday_name

In [7]:
charges = arrests.groupby(['charge', 'charge_description']).agg('size').sort_values(ascending=False).reset_index()

In [8]:
charges_homeless = arrests[arrests.charge == '41.18DLAMC']

In [9]:
charges_homeless.head(20)

Unnamed: 0,report_id,arrest_date,time,area_id,area_name,reporting_district,age,sex_code,descent_code,charge_group_code,charge_group_description,arrest_type_code,charge,charge_description,address,cross_street,location,year,month,weekday
172,190618578,2019-09-05,1000.0,6,Hollywood,637,33,M,W,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,1700 TAMARIND AV,,"(34.103, -118.3196)",2019,9,Thursday
419,190122466,2019-09-04,2145.0,1,Central,192,42,F,O,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,HILL,PICO,"(34.0377, -118.2621)",2019,9,Wednesday
572,190122462,2019-09-03,2045.0,1,Central,163,41,M,O,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,9TH,HILL,"(34.043, -118.2571)",2019,9,Tuesday
1209,191516619,2019-09-01,1010.0,15,N Hollywood,1511,51,M,W,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,13300 RAYMER ST,,"(34.2063, -118.4225)",2019,9,Sunday
1629,190122040,2019-08-30,1900.0,1,Central,164,30,M,H,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,8TH,HOPE,"(34.0446, -118.2507)",2019,8,Friday
3222,190121670,2019-08-24,1520.0,1,Central,111,65,M,B,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,ARCADIA,LOS ANGELES,"(34.0553, -118.2391)",2019,8,Saturday
3375,190121669,2019-08-24,1510.0,1,Central,118,37,M,H,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,ALAMEDA,LOS ANGELES,"(34.0663, -118.2302)",2019,8,Saturday
4215,190617782,2019-08-21,1630.0,6,Hollywood,636,28,M,W,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,YUCCA,MC CADDEN,"(34.1032, -118.3374)",2019,8,Wednesday
4687,190121298,2019-08-19,2035.0,1,Central,174,52,M,B,20.0,Disorderly Conduct,M,41.18DLAMC,SIT/LIE/SLEEP SIDEWALK OR STREET,9TH ST,HILL ST,"(34.0426, -118.2528)",2019,8,Monday
5364,190216889,2019-08-16,1520.0,2,Rampart,246,46,M,H,,,I,41.18DLAMC,,6TH ST,ALVARADO,"(34.0596, -118.2749)",2019,8,Friday


### Clean up location field so lat/lon separated

In [10]:
arrests['location'] = arrests.location.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in arrests['location']:
    try:
        lat.append(row.split(',')[0])
        lon.append(row.split(',')[1])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

arrests['latitude'] = lat
arrests['longitude'] = lon

In [11]:
arrests.drop('location', axis=1, inplace=True)
arrests.drop('cross_street', axis=1, inplace=True)

In [12]:
arrests.head()

Unnamed: 0,report_id,arrest_date,time,area_id,area_name,reporting_district,age,sex_code,descent_code,charge_group_code,charge_group_description,arrest_type_code,charge,charge_description,address,year,month,weekday,latitude,longitude
0,192115378,2019-09-07,1330.0,21,Topanga,2134,70,M,W,16.0,Narcotic Drug Laws,M,11364HS,POSSESSION CNTL SUBSTANCE PARAPHERNALIA,22200 SHERMAN WY,2019,9,Saturday,34.201,-118.6103
1,191222114,2019-09-07,558.0,12,77th Street,1249,20,F,B,13.0,Prostitution/Allied,M,647(B)PC,PROSTITUTION,6900 DENVER AV,2019,9,Saturday,33.9774,-118.2838
2,5740147,2019-09-07,1500.0,14,Pacific,1431,56,F,W,8.0,Other Assaults,M,243(E)(1)PC,BATT EX-SPOUSE/FIANCEE/PERSN W/DATING REL,1700 PACIFIC AV,2019,9,Saturday,33.9872,-118.4718
3,5739903,2019-09-07,340.0,2,Rampart,246,27,M,H,8.0,Other Assaults,M,243(A)PC,BATTERY ON PERSON,400 S BONNIE BRAE ST,2019,9,Saturday,34.0598,-118.2734
4,5739905,2019-09-07,240.0,5,Harbor,517,45,F,H,22.0,Driving Under Influence,M,23152(A)VC,DRUNK DRIVING ALCOHOL/DRUGS,1000 N AVALON BL,2019,9,Saturday,33.7829,-118.2627


### What's the average age for men and women who get arrested? 

In [13]:
mean_age = arrests['age'].groupby(arrests['sex_code'])

In [14]:
mean_age.mean()

sex_code
F   31.811035978518742
M     34.8370015796835
Name: age, dtype: float64

### What's the max age of arrestee for each charge type?

In [15]:
age_charge = arrests['age'].groupby(arrests['charge_description'])

In [16]:
age_charge.max().round(0).sort_values(ascending=False)

charge_description
PARK REGULATIONS                              96
MURDER:FIRST DEGREE:SHOOT FROM VEHICLE        96
POSSESSION BURGLARY TOOLS                     96
DRINKING IN PUBLIC                            94
THEFT FROM VEHICLE                            94
CORPORAL INJURY ON SPOUSE/COHABITANT/ETC      94
DRINKING ALCOHOL*******                       93
ADW, NOT FIREARM, W/GBI                       93
DRUNK DRIVING ALCOHOL/DRUGS                   92
DRIVE W/LIC SUSPEND/REVOKE 4 OTHER REASON     92
PROSTITUTION                                  92
TERRORIZE CAUSING FEAR                        92
ILLEGAL POSSESSION OF SHOPPING CART           92
DISP/ETC DANG DRUG/DEVICE                     91
OPEN ALCOHOLIC BEV IN PUBLIC PARK/PLACE       91
HIT AND RUN:PROP DAMAGE                       90
FTA AFTER WRITTEN PROMISE                     90
LOS ANGELES MUNICIPAL CODE                    90
PRESENT IN PARK AFTER HOURS                   90
BATTERY ON PERSON                             90
A

In [17]:
month = arrests['month'].groupby(arrests['year'])

In [18]:
month.count()

year
2010    162418
2011    157641
2012    163315
2013    152679
2014    139385
2015    126162
2016    118142
2017    107677
2018    104283
2019     65908
Name: month, dtype: int64