# Processing LAPD crimes database for other notebooks

### Import data tools

In [1]:
import pandas as pd
import pyarrow
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Download historical data, if needed (in case there are amendments)

In [2]:
!wget https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

--2021-03-27 19:15:26--  https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
Resolving data.lacity.org (data.lacity.org)... 52.206.68.26, 52.206.140.199, 52.206.140.205
Connecting to data.lacity.org (data.lacity.org)|52.206.68.26|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’

rows.csv?accessType     [            <=>     ] 510.69M  4.80MB/s    in 2m 1s   

2021-03-27 19:17:30 (4.20 MB/s) - ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’ saved [535502578]



In [3]:
!mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv'

### Download current data

In [4]:
!wget https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

--2021-03-27 19:17:30--  https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
Resolving data.lacity.org (data.lacity.org)... 52.206.140.205, 52.206.140.199, 52.206.68.26
Connecting to data.lacity.org (data.lacity.org)|52.206.140.205|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’

rows.csv?accessType     [              <=>   ]  57.54M  5.77MB/s    in 12s     

2021-03-27 19:17:43 (4.87 MB/s) - ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’ saved [60334617]



In [5]:
!mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv'

### Read both datasets

In [6]:
# https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/63jg-8b9z
# https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
crimes_old = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

# https://data.lacity.org/A-Safe-City/Crime-Data-from-2020-to-Present/2nrs-mtv8
# https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
crimes_new = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

In [7]:
df = pd.concat([crimes_new,crimes_old])

In [8]:
len(df)

2352370

In [9]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [10]:
df = df.drop(df[df.lat < 30].index)

### These data are super messy...

In [11]:
df.rename(columns = { 
'dr_no':'record_id',
'date_rptd':'date_reported',
'date_occ':'date_occurred',
'time_occ':'time_occurred',
'area':'division',
'area_name':'division_name',
'rpt_dist_no':'reporting_district',
'part_1_2':'part_type',
'crm_cd':'crime_code',
'crm_cd_desc':'crime_code_description',
'mocodes':'modus_operandi_code',
'vict_age':'victim_age',
'vict_sex':'victim_sex',
'vict_descent':'victim_descent',
'premis_cd':'premises_code',
'premis_desc':'premises_description',
'weapon_used_cd':'weapon_code',
'weapon_desc':'weapon_description',
'status':'status_code',
'status_desc':'status_code_description',
'crm_cd_1':'crm_cd_1',
'crm_cd_2':'crm_cd_2',
'crm_cd_3':'crm_cd_3',
'crm_cd_4':'crm_cd_4',
'location':'address',
'cross_street':'cross_street',
'lat':'latitude',
'lon':'longitude',
 }, inplace = True)

### ... contunued

In [12]:
df['date_reported'] = df['date_reported'].str.replace(' 12:00:00 AM','')

In [13]:
df['date_occurred'] = df['date_occurred'].str.replace(' 12:00:00 AM','')

In [14]:
df.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1
0,10304468,01/08/2020,01/08/2020,2230,3.0,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,0444 0913,36,F,B,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",AO,Adult Other,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978,
1,190101086,01/02/2020,01/01/2020,330,1.0,Central,163,2,624,BATTERY - SIMPLE ASSAULT,0416 1822 1414,25,M,H,102.0,SIDEWALK,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,624.0,,,,700 S HILL ST,,34.0459,-118.2545,
2,201220752,09/16/2020,09/16/2020,1230,12.0,77th Street,1259,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),2004 1820 0913 0329 1202,62,M,B,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",,,IC,Invest Cont,745.0,,,,700 E 73RD ST,,33.9739,-118.263,
3,191501505,01/01/2020,01/01/2020,1730,15.0,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),0329 1402,76,F,W,502.0,"MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)",,,IC,Invest Cont,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,
4,191921269,01/01/2020,01/01/2020,415,19.0,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329,31,X,X,409.0,BEAUTY SUPPLY STORE,,,IC,Invest Cont,740.0,,,,14400 TITUS ST,,34.2198,-118.4468,


In [15]:
df['date_reported'] = pd.to_datetime(df['date_reported'], format='%m/%d/%Y')

In [16]:
df['date_occurred'] = pd.to_datetime(df['date_occurred'], format='%m/%d/%Y')

In [17]:
df['year'] = df['date_occurred'].dt.year
df['quarter'] = df['date_occurred'].dt.quarter
df['day'] = df['date_occurred'].dt.day
df['month'] = df['date_occurred'].dt.month
df['weekday'] = df['date_occurred'].dt.weekday
df['monthname'] = df['date_occurred'].dt.month_name()

In [18]:
descent_recode = { 'A':"asian",'B':"black",'C':"asian",'D':"asian",'F':"asian",'G':"asian",\
                 'H':"hispanic",'I':"ai_an",'J':"asian",\
                 'K':"asian",'L':"asian",'O':"other",'P':"asian",'S':"asian",\
                 'U':"asian",'V':"asian",'W':"white",'X':"other",'Z':"asian" }

In [19]:
df['descent_description'] = df['victim_descent'].map(descent_recode)

In [20]:
victim_sex_recode = { 'F':"female",'M':"male",'X':"unknown",'-':"unknown",'N':"unknown",'H':"unknown" }

In [21]:
df['victim_sex'] = df['victim_sex'].map(victim_sex_recode)

In [22]:
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_1'] = df['crm_cd_1'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_2'] = df['crm_cd_2'].astype(str).str.replace('.0','', regex=False)

In [23]:
df['premises_description'] = df['premises_description'].str.capitalize()
df['status_code_description'] = df['status_code_description'].str.lower().str.replace(' ','_')
df['crime_code_description'] = df['crime_code_description'].str.capitalize()
df['weapon_description'] = df['weapon_description'].str.capitalize()

In [24]:
monthnames = ['January','February','March','April','May','June','July','August','September','October','November','December']

In [25]:
df['time_occurred'] = df['time_occurred'].astype(str).str.replace('.0','', regex=False)
df['time_occurred'] = df['time_occurred'].astype(str).str.zfill(4)
df['hour'] = df['time_occurred'].astype(str).str[:2]
df['minute'] = df['time_occurred'].astype(str).str[2:]
df = df[df['hour'] != '0n']
df = df[df['hour'] != '24']

In [26]:
df['part_type'] = df['part_type'].astype(str).str.replace('.0', '', regex=False)

In [27]:
df['reporting_district'] = df['reporting_district'].astype(str).str.replace('.0', '', regex=False)

In [28]:
crimes = df.copy()

In [29]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
0,10304468,2020-01-08,2020-01-08,2230,3.0,Southwest,377,2,624,Battery - simple assault,0444 0913,36,female,B,501,Single family dwelling,400.0,"Strong-arm (hands, fist, feet or bodily force)",AO,adult_other,624,,,,1100 W 39TH PL,,34.0141,-118.2978,,2020,1,8,1,2,January,black,22,30
1,190101086,2020-01-02,2020-01-01,330,1.0,Central,163,2,624,Battery - simple assault,0416 1822 1414,25,male,H,102,Sidewalk,500.0,Unknown weapon/other weapon,IC,invest_cont,624,,,,700 S HILL ST,,34.0459,-118.2545,,2020,1,1,1,2,January,hispanic,3,30
2,201220752,2020-09-16,2020-09-16,1230,12.0,77th Street,1259,2,745,Vandalism - misdeameanor ($399 or under),2004 1820 0913 0329 1202,62,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,,,,700 E 73RD ST,,33.9739,-118.263,,2020,3,16,9,2,September,black,12,30
3,191501505,2020-01-01,2020-01-01,1730,15.0,N Hollywood,1543,2,745,Vandalism - misdeameanor ($399 or under),0329 1402,76,female,W,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,,2020,1,1,1,2,January,white,17,30
4,191921269,2020-01-01,2020-01-01,415,19.0,Mission,1998,2,740,"Vandalism - felony ($400 & over, all church va...",0329,31,unknown,X,409,Beauty supply store,,,IC,invest_cont,740,,,,14400 TITUS ST,,34.2198,-118.4468,,2020,1,1,1,2,January,other,4,15


In [30]:
homicides = crimes[crimes['crime_code_description'].str.contains('Criminal homicide', na=False)]

In [31]:
hollenbeck_homicides = homicides[homicides['division_name'] == 'Hollenbeck']

In [32]:
hollenbeck_homicides.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
29601,200405562,2020-02-10,2020-02-10,200,4.0,Hollenbeck,449,1,110,Criminal homicide,0913 1100 1402 0430 1820 0302 1309 0906,29,male,H,501,Single family dwelling,109.0,Semi-automatic pistol,AA,adult_arrest,110,,,,2600 MARENGO ST,,34.0545,-118.1992,,2020,1,10,2,0,February,hispanic,2,0
51769,200407038,2020-03-17,2020-03-17,905,4.0,Hollenbeck,478,1,110,Criminal homicide,0302 0334 0430 0202 0906 1100 2004 1407 1822 0341,26,male,H,102,Sidewalk,109.0,Semi-automatic pistol,AA,adult_arrest,110,998.0,,,900 GRANDE VISTA AV,,34.028,-118.2048,,2020,1,17,3,1,March,hispanic,9,5
61437,200401071,2020-09-26,2020-09-26,900,4.0,Hollenbeck,453,1,110,Criminal homicide,1822 0400 0416 0429 1202,62,male,H,101,Street,400.0,"Strong-arm (hands, fist, feet or bodily force)",IC,invest_cont,110,,,,1800 PENNSYLVANIA AV,,34.0475,-118.2169,,2020,3,26,9,5,September,hispanic,9,0
84319,200407880,2020-04-16,2020-04-15,2259,4.0,Hollenbeck,466,1,110,Criminal homicide,1402 1407 0946 1270 1809 1822 1100 0906 0430,45,male,H,108,Parking lot,102.0,Hand gun,AA,adult_arrest,110,998.0,,,2800 E CESAR E CHAVEZ AV,,34.0445,-118.2025,,2020,2,15,4,2,April,hispanic,22,59
84932,200408872,2020-05-15,2020-05-15,710,4.0,Hollenbeck,453,1,110,Criminal homicide,0906 1822 1100 0430 0334 1407 0302,40,male,H,109,Park/playground,102.0,Hand gun,IC,invest_cont,110,998.0,,,700 ECHANDIA ST,,34.0542,-118.2172,,2020,2,15,5,4,May,hispanic,7,10


In [33]:
hollenbeck_homicides.to_csv('output/hollenbeck_homicides.csv', index=False)

---

### Filter dataframe so it only inlcudes part I — or "major" — cases

In [34]:
crimes_part1 = crimes[crimes.part_type == '1']

In [35]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute
0,10304468,2020-01-08,2020-01-08,2230,3.0,Southwest,377,2,624,Battery - simple assault,0444 0913,36,female,B,501,Single family dwelling,400.0,"Strong-arm (hands, fist, feet or bodily force)",AO,adult_other,624,,,,1100 W 39TH PL,,34.0141,-118.2978,,2020,1,8,1,2,January,black,22,30
1,190101086,2020-01-02,2020-01-01,330,1.0,Central,163,2,624,Battery - simple assault,0416 1822 1414,25,male,H,102,Sidewalk,500.0,Unknown weapon/other weapon,IC,invest_cont,624,,,,700 S HILL ST,,34.0459,-118.2545,,2020,1,1,1,2,January,hispanic,3,30
2,201220752,2020-09-16,2020-09-16,1230,12.0,77th Street,1259,2,745,Vandalism - misdeameanor ($399 or under),2004 1820 0913 0329 1202,62,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,,,,700 E 73RD ST,,33.9739,-118.263,,2020,3,16,9,2,September,black,12,30
3,191501505,2020-01-01,2020-01-01,1730,15.0,N Hollywood,1543,2,745,Vandalism - misdeameanor ($399 or under),0329 1402,76,female,W,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,,2020,1,1,1,2,January,white,17,30
4,191921269,2020-01-01,2020-01-01,415,19.0,Mission,1998,2,740,"Vandalism - felony ($400 & over, all church va...",0329,31,unknown,X,409,Beauty supply store,,,IC,invest_cont,740,,,,14400 TITUS ST,,34.2198,-118.4468,,2020,1,1,1,2,January,other,4,15


### Crime codes

In [36]:
codes = crimes.groupby(['part_type', 'crime_code', 'crime_code_description']).agg('size').reset_index()

In [37]:
codes.head()

Unnamed: 0,part_type,crime_code,crime_code_description,0
0,1,110,Criminal homicide,3199
1,1,113,"Manslaughter, negligent",10
2,1,121,"Rape, forcible",11309
3,1,122,"Rape, attempted",1190
4,1,210,Robbery,92122


### Recode crime_codes to identify 'part one' violent/property crimes_major

In [38]:
part_one_violent = ['110','113','121','122','210','220','230','231',\
                    '235','236','250','251','761','815','820','821']

In [39]:
part_one_property = ['310','320','330','331','341','343','345','350',
                     '351','352','353','410','420','421','433','440','441',
                     '442','443','444','445','450','451','452','453','470','471'
                     ,'472','473','474','475','480','485','487','510','520','522']

In [40]:
categories = []

for row in crimes['crime_code']:
    if row in part_one_violent:
        categories.append('pt_one_violent')
    elif row in part_one_property:
        categories.append('pt_one_property')
    else:
        categories.append('other')
        
crimes['part_category'] = categories

---

### Export clean table of major crimes for other notebooks

In [41]:
# crimes.reset_index().to_feather('/Users/mhustiles/data/data/LA/crimes.feather')

In [42]:
crimes.to_csv('/Users/mhustiles/data/data/LA/crimes.csv', index=False)

In [43]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,division.1,year,quarter,day,month,weekday,monthname,descent_description,hour,minute,part_category
0,10304468,2020-01-08,2020-01-08,2230,3.0,Southwest,377,2,624,Battery - simple assault,0444 0913,36,female,B,501,Single family dwelling,400.0,"Strong-arm (hands, fist, feet or bodily force)",AO,adult_other,624,,,,1100 W 39TH PL,,34.0141,-118.2978,,2020,1,8,1,2,January,black,22,30,other
1,190101086,2020-01-02,2020-01-01,330,1.0,Central,163,2,624,Battery - simple assault,0416 1822 1414,25,male,H,102,Sidewalk,500.0,Unknown weapon/other weapon,IC,invest_cont,624,,,,700 S HILL ST,,34.0459,-118.2545,,2020,1,1,1,2,January,hispanic,3,30,other
2,201220752,2020-09-16,2020-09-16,1230,12.0,77th Street,1259,2,745,Vandalism - misdeameanor ($399 or under),2004 1820 0913 0329 1202,62,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,,,,700 E 73RD ST,,33.9739,-118.263,,2020,3,16,9,2,September,black,12,30,other
3,191501505,2020-01-01,2020-01-01,1730,15.0,N Hollywood,1543,2,745,Vandalism - misdeameanor ($399 or under),0329 1402,76,female,W,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,,2020,1,1,1,2,January,white,17,30,other
4,191921269,2020-01-01,2020-01-01,415,19.0,Mission,1998,2,740,"Vandalism - felony ($400 & over, all church va...",0329,31,unknown,X,409,Beauty supply store,,,IC,invest_cont,740,,,,14400 TITUS ST,,34.2198,-118.4468,,2020,1,1,1,2,January,other,4,15,other
