# Processing LAPD crimes database for other notebooks

### Import data tools

In [1]:
import pandas as pd
import pyarrow
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Download historical data, if needed (in case there are amendments)

In [1]:
!wget https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

--2021-01-25 14:21:16--  https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
Resolving data.lacity.org (data.lacity.org)... 52.206.68.26, 52.206.140.205, 52.206.140.199
Connecting to data.lacity.org (data.lacity.org)|52.206.68.26|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’

rows.csv?accessType     [         <=>        ] 510.55M  5.70MB/s    in 90s     

2021-01-25 14:22:47 (5.68 MB/s) - ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’ saved [535350143]



In [3]:
# !mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv'

### Download current data

In [4]:
# !wget https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

In [5]:
# !mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv'

### Read both datasets

In [48]:
# https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/63jg-8b9z
# https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
crimes_old = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

# https://data.lacity.org/A-Safe-City/Crime-Data-from-2020-to-Present/2nrs-mtv8
# https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
crimes_new = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

In [49]:
df = pd.concat([crimes_new,crimes_old])

In [50]:
len(df)

1549227

In [51]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [52]:
df = df.drop(df[df.lat < 30].index)

### These data are super messy...

In [53]:
df.rename(columns = { 
'dr_no':'record_id',
'date_rptd':'date_reported',
'date_occ':'date_occurred',
'time_occ':'time_occurred',
'area':'division',
'area_name':'division_name',
'rpt_dist_no':'reporting_district',
'part_1_2':'part_type',
'crm_cd':'crime_code',
'crm_cd_desc':'crime_code_description',
'mocodes':'modus_operandi_code',
'vict_age':'victim_age',
'vict_sex':'victim_sex',
'vict_descent':'victim_descent',
'premis_cd':'premises_code',
'premis_desc':'premises_description',
'weapon_used_cd':'weapon_code',
'weapon_desc':'weapon_description',
'status':'status_code',
'status_desc':'status_code_description',
'crm_cd_1':'crm_cd_1',
'crm_cd_2':'crm_cd_2',
'crm_cd_3':'crm_cd_3',
'crm_cd_4':'crm_cd_4',
'location':'address',
'cross_street':'cross_street',
'lat':'latitude',
'lon':'longitude',
 }, inplace = True)

### ... contunued

In [54]:
df['date_reported'] = df['date_reported'].str.replace(' 12:00:00 AM','')

In [55]:
df['date_occurred'] = df['date_occurred'].str.replace(' 12:00:00 AM','')

In [56]:
df.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1
0,10304468,01/08/2020,01/08/2020,2230,3.0,Southwest,377.0,2.0,624.0,BATTERY - SIMPLE ASSAULT,0444 0913,36.0,F,B,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",AO,Adult Other,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978,
1,190101086,01/02/2020,01/01/2020,330,1.0,Central,163.0,2.0,624.0,BATTERY - SIMPLE ASSAULT,0416 1822 1414,25.0,M,H,102.0,SIDEWALK,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,624.0,,,,700 S HILL ST,,34.0459,-118.2545,
2,201220752,09/16/2020,09/16/2020,1230,12.0,77th Street,1259.0,2.0,745.0,VANDALISM - MISDEAMEANOR ($399 OR UNDER),2004 1820 0913 0329 1202,62.0,M,B,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",,,IC,Invest Cont,745.0,,,,700 E 73RD ST,,33.9739,-118.263,
3,191501505,01/01/2020,01/01/2020,1730,15.0,N Hollywood,1543.0,2.0,745.0,VANDALISM - MISDEAMEANOR ($399 OR UNDER),0329 1402,76.0,F,W,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",,,IC,Invest Cont,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,
4,191921269,01/01/2020,01/01/2020,415,19.0,Mission,1998.0,2.0,740.0,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329,31.0,X,X,409.0,BEAUTY SUPPLY STORE,,,IC,Invest Cont,740.0,,,,14400 TITUS ST,,34.2198,-118.4468,


In [57]:
df['date_reported'] = pd.to_datetime(df['date_reported'], format='%m/%d/%Y')

In [58]:
df['date_occurred'] = pd.to_datetime(df['date_occurred'], format='%m/%d/%Y')

In [59]:
df['year'] = df['date_occurred'].dt.year
df['quarter'] = df['date_occurred'].dt.quarter
df['day'] = df['date_occurred'].dt.day
df['month'] = df['date_occurred'].dt.month
df['weekday'] = df['date_occurred'].dt.weekday
df['monthname'] = df['date_occurred'].dt.month_name()

In [60]:
descent_recode = { 'A':"asian",'B':"black",'C':"asian",'D':"asian",'F':"asian",'G':"asian",\
                 'H':"hispanic",'I':"ai_an",'J':"asian",\
                 'K':"asian",'L':"asian",'O':"other",'P':"asian",'S':"asian",\
                 'U':"asian",'V':"asian",'W':"white",'X':"other",'Z':"asian" }

In [61]:
df['descent_description'] = df['victim_descent'].map(descent_recode)

In [62]:
victim_sex_recode = { 'F':"female",'M':"male",'X':"unknown",'-':"unknown",'N':"unknown",'H':"unknown" }

In [63]:
df['victim_sex'] = df['victim_sex'].map(victim_sex_recode)

In [64]:
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_1'] = df['crm_cd_1'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_2'] = df['crm_cd_2'].astype(str).str.replace('.0','', regex=False)

In [65]:
df['premises_description'] = df['premises_description'].str.capitalize()
df['status_code_description'] = df['status_code_description'].str.lower().str.replace(' ','_')
df['crime_code_description'] = df['crime_code_description'].str.capitalize()
df['weapon_description'] = df['weapon_description'].str.capitalize()

In [66]:
monthnames = ['January','February','March','April','May','June','July','August','September','October','November','December']

In [67]:
df['time_occurred'] = df['time_occurred'].astype(str).str.replace('.0','', regex=False)
df['time_occurred'] = df['time_occurred'].astype(str).str.zfill(4)
df['hour'] = df['time_occurred'].astype(str).str[:2]
df['minute'] = df['time_occurred'].astype(str).str[2:]
df = df[df['hour'] != '0n']
df = df[df['hour'] != '24']

In [68]:
df['part_type'] = df['part_type'].astype(str).str.replace('.0', '', regex=False)

In [69]:
df['reporting_district'] = df['reporting_district'].astype(str).str.replace('.0', '', regex=False)

In [70]:
crimes = df.copy()

In [71]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
0,10304468,2020-01-08,2020-01-08,2230,3.0,Southwest,377,2,624.0,Battery - simple assault,0444 0913,36.0,female,B,501,Single family dwelling,400.0,"Strong-arm (hands, fist, feet or bodily force)",AO,adult_other,624,,,,1100 W 39TH PL,,34.0141,-118.2978,,2020,1,8,1,2,January,black,22,30
1,190101086,2020-01-02,2020-01-01,330,1.0,Central,163,2,624.0,Battery - simple assault,0416 1822 1414,25.0,male,H,102,Sidewalk,500.0,Unknown weapon/other weapon,IC,invest_cont,624,,,,700 S HILL ST,,34.0459,-118.2545,,2020,1,1,1,2,January,hispanic,3,30
2,201220752,2020-09-16,2020-09-16,1230,12.0,77th Street,1259,2,745.0,Vandalism - misdeameanor ($399 or under),2004 1820 0913 0329 1202,62.0,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,,,,700 E 73RD ST,,33.9739,-118.263,,2020,3,16,9,2,September,black,12,30
3,191501505,2020-01-01,2020-01-01,1730,15.0,N Hollywood,1543,2,745.0,Vandalism - misdeameanor ($399 or under),0329 1402,76.0,female,W,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,,2020,1,1,1,2,January,white,17,30
4,191921269,2020-01-01,2020-01-01,415,19.0,Mission,1998,2,740.0,"Vandalism - felony ($400 & over, all church va...",0329,31.0,unknown,X,409,Beauty supply store,,,IC,invest_cont,740,,,,14400 TITUS ST,,34.2198,-118.4468,,2020,1,1,1,2,January,other,4,15


In [72]:
homicides = crimes[crimes['crime_code_description'].str.contains('Criminal homicide', na=False)]

In [73]:
hollenbeck_homicides = homicides[homicides['division_name'] == 'Hollenbeck']

In [74]:
hollenbeck_homicides.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
29387,200405562,2020-02-10,2020-02-10,200,4.0,Hollenbeck,449,1,110.0,Criminal homicide,0913 1100 1402 0430 1820 0302 1309 0906,29.0,male,H,501,Single family dwelling,109.0,Semi-automatic pistol,AA,adult_arrest,110,,,,2600 MARENGO ST,,34.0545,-118.1992,,2020,1,10,2,0,February,hispanic,2,0
51374,200407038,2020-03-17,2020-03-17,905,4.0,Hollenbeck,478,1,110.0,Criminal homicide,0302 0334 0430 0202 0906 1100 2004 1407 1822 0341,26.0,male,H,102,Sidewalk,109.0,Semi-automatic pistol,AA,adult_arrest,110,998.0,,,900 GRANDE VISTA AV,,34.028,-118.2048,,2020,1,17,3,1,March,hispanic,9,5
60939,200401071,2020-09-26,2020-09-26,900,4.0,Hollenbeck,453,1,110.0,Criminal homicide,1822 0400 0416 0429 1202,62.0,male,H,101,Street,400.0,"Strong-arm (hands, fist, feet or bodily force)",IC,invest_cont,110,,,,1800 PENNSYLVANIA AV,,34.0475,-118.2169,,2020,3,26,9,5,September,hispanic,9,0
83533,200407880,2020-04-16,2020-04-15,2259,4.0,Hollenbeck,466,1,110.0,Criminal homicide,1402 1407 0946 1270 1809 1822 1100 0906 0430,45.0,male,H,108,Parking lot,102.0,Hand gun,AA,adult_arrest,110,998.0,,,2800 E CESAR E CHAVEZ AV,,34.0445,-118.2025,,2020,2,15,4,2,April,hispanic,22,59
84127,200408872,2020-05-15,2020-05-15,710,4.0,Hollenbeck,453,1,110.0,Criminal homicide,0906 1822 1100 0430 0334 1407 0302,40.0,male,H,109,Park/playground,102.0,Hand gun,IC,invest_cont,110,998.0,,,700 ECHANDIA ST,,34.0542,-118.2172,,2020,2,15,5,4,May,hispanic,7,10


In [75]:
hollenbeck_homicides.to_csv('output/hollenbeck_homicides.csv', index=False)

---

### Filter dataframe so it only inlcudes part I — or "major" — cases

In [76]:
crimes = crimes[crimes.part_type == '1']

In [77]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
5,200100501,2020-01-02,2020-01-01,30,1.0,Central,163,1,121.0,"Rape, forcible",0413 1822 1262 1415,25.0,female,H,735,Night club (open evenings only),500.0,Unknown weapon/other weapon,IC,invest_cont,121,998.0,,,700 S BROADWAY,,34.0452,-118.2534,,2020,1,1,1,2,January,hispanic,0,30
6,200100502,2020-01-02,2020-01-02,1315,1.0,Central,161,1,442.0,Shoplifting - petty theft ($950 & under),1402 2004 0344 0387,23.0,male,H,404,Department store,,,IC,invest_cont,442,998.0,,,700 S FIGUEROA ST,,34.0483,-118.2631,,2020,1,2,1,3,January,hispanic,13,15
8,200100507,2020-01-04,2020-01-04,200,1.0,Central,101,1,341.0,"Theft-grand ($950.01 & over)excpt,guns,fowl,li...",1822 0344 1402,23.0,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,341,998.0,,,700 BERNARD ST,,34.0677,-118.2398,,2020,1,4,1,5,January,black,2,0
10,200100509,2020-01-04,2020-01-04,2200,1.0,Central,192,1,330.0,Burglary from vehicle,1822 1414 0344 1307,29.0,male,A,101,Street,306.0,Rock/thrown object,IC,invest_cont,330,,,,15TH,OLIVE,34.0359,-118.2648,,2020,1,4,1,5,January,asian,22,0
12,200100514,2020-01-05,2020-01-05,1355,1.0,Central,162,1,341.0,"Theft-grand ($950.01 & over)excpt,guns,fowl,li...",1822 0344 2032,41.0,male,A,503,Hotel,,,AA,adult_arrest,341,,,,800 S OLIVE ST,,34.0452,-118.2569,,2020,1,5,1,6,January,asian,13,55


### Crime codes

In [78]:
codes = crimes.groupby(['part_type', 'crime_code', 'crime_code_description']).agg('size').reset_index()

In [79]:
codes.head()

Unnamed: 0,part_type,crime_code,crime_code_description,0
0,1,110.0,Criminal homicide,2225
1,1,113.0,"Manslaughter, negligent",6
2,1,121.0,"Rape, forcible",7264
3,1,122.0,"Rape, attempted",853
4,1,210.0,Robbery,60332


### Recode crime_codes to identify 'part one' violent/property crimes_major

In [80]:
part_one_violent = ['110','113','121','122','210','220','230','231',\
                    '235','236','250','251','761','815','820','821']

In [81]:
part_one_property = ['310','320','330','331','341','343','345','350',
                     '351','352','353','410','420','421','433','440','441',
                     '442','443','444','445','450','451','452','453','470','471'
                     ,'472','473','474','475','480','485','487','510','520','522']

In [82]:
categories = []

for row in crimes['crime_code']:
    if row in part_one_violent:
        categories.append('pt_one_violent')
    elif row in part_one_property:
        categories.append('pt_one_property')
    else:
        categories.append('other')
        
crimes['part_category'] = categories

---

### Export clean table of major crimes for other notebooks

In [83]:
# crimes.reset_index().to_feather('/Users/mhustiles/data/data/LA/crimes.feather')

In [84]:
crimes.to_csv('/Users/mhustiles/data/data/LA/crimes.csv', index=False)

In [85]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute,part_category
5,200100501,2020-01-02,2020-01-01,30,1.0,Central,163,1,121.0,"Rape, forcible",0413 1822 1262 1415,25.0,female,H,735,Night club (open evenings only),500.0,Unknown weapon/other weapon,IC,invest_cont,121,998.0,,,700 S BROADWAY,,34.0452,-118.2534,,2020,1,1,1,2,January,hispanic,0,30,other
6,200100502,2020-01-02,2020-01-02,1315,1.0,Central,161,1,442.0,Shoplifting - petty theft ($950 & under),1402 2004 0344 0387,23.0,male,H,404,Department store,,,IC,invest_cont,442,998.0,,,700 S FIGUEROA ST,,34.0483,-118.2631,,2020,1,2,1,3,January,hispanic,13,15,other
8,200100507,2020-01-04,2020-01-04,200,1.0,Central,101,1,341.0,"Theft-grand ($950.01 & over)excpt,guns,fowl,li...",1822 0344 1402,23.0,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,341,998.0,,,700 BERNARD ST,,34.0677,-118.2398,,2020,1,4,1,5,January,black,2,0,other
10,200100509,2020-01-04,2020-01-04,2200,1.0,Central,192,1,330.0,Burglary from vehicle,1822 1414 0344 1307,29.0,male,A,101,Street,306.0,Rock/thrown object,IC,invest_cont,330,,,,15TH,OLIVE,34.0359,-118.2648,,2020,1,4,1,5,January,asian,22,0,other
12,200100514,2020-01-05,2020-01-05,1355,1.0,Central,162,1,341.0,"Theft-grand ($950.01 & over)excpt,guns,fowl,li...",1822 0344 2032,41.0,male,A,503,Hotel,,,AA,adult_arrest,341,,,,800 S OLIVE ST,,34.0452,-118.2569,,2020,1,5,1,6,January,asian,13,55,other
