# Processing LAPD crimes database for other notebooks

### Import data tools

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import json
import jenkspy
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000



### Download historical data, if needed (in case there are amendments)

In [2]:
# !wget 'https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD' \
# -P '/Users/mhustiles/data/LAPD/'

In [3]:
# !mv '/Users/mhustiles/data/LAPD/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/LAPD/Crime_Data_from_2010_to_Present.csv'

### Download current data

In [4]:
# !wget 'https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD' -P '/Users/mhustiles/data/LAPD/'

In [5]:
# !mv '/Users/mhustiles/data/LAPD/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/LAPD/Crime_Data_from_2020_to_Present.csv'

### Read both datasets

In [6]:
# https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/63jg-8b9z
# https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
crimes_old = pd.read_csv('/Users/mhustiles/data/LAPD/Crime_Data_from_2010_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

# https://data.lacity.org/A-Safe-City/Crime-Data-from-2020-to-Present/2nrs-mtv8
# https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
crimes_new = pd.read_csv('/Users/mhustiles/data/LAPD/Crime_Data_from_2020_to_Present.csv', \
                        dtype={'area_name':str, 'rpt_dist_no':str, 'weapon_used_cd':str, 'crm_cd':str, 
                              'premis_cd':str, 'area':str})

In [7]:
df = pd.concat([crimes_new,crimes_old])

In [8]:
len(df)

2448501

In [9]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [10]:
df = df.drop(df[df.lat < 30].index)

### These data are super messy...

In [11]:
df.rename(columns = { 
'dr_no':'record_id',
'date_rptd':'date_reported',
'date_occ':'date_occurred',
'time_occ':'time_occurred',
'area':'division',
'area_name':'division_name',
'rpt_dist_no':'reporting_district',
'part_1_2':'part_type',
'crm_cd':'crime_code',
'crm_cd_desc':'crime_code_description',
'mocodes':'modus_operandi_code',
'vict_age':'victim_age',
'vict_sex':'victim_sex',
'vict_descent':'victim_descent',
'premis_cd':'premises_code',
'premis_desc':'premises_description',
'weapon_used_cd':'weapon_code',
'weapon_desc':'weapon_description',
'status':'status_code',
'status_desc':'status_code_description',
'crm_cd_1':'crm_cd_1',
'crm_cd_2':'crm_cd_2',
'crm_cd_3':'crm_cd_3',
'crm_cd_4':'crm_cd_4',
'location':'address',
'cross_street':'cross_street',
'lat':'latitude',
'lon':'longitude',
 }, inplace = True)

### ... contunued

In [12]:
df['date_reported'] = df['date_reported'].str.replace(' 12:00:00 AM','')

In [13]:
df['date_occurred'] = df['date_occurred'].str.replace(' 12:00:00 AM','')

In [14]:
df = df[['record_id', 'date_reported', 'date_occurred', 'time_occurred',
       'division', 'division_name', 'reporting_district', 'part_type',
       'crime_code', 'crime_code_description', 'modus_operandi_code',
       'victim_age', 'victim_sex', 'victim_descent', 'premises_code',
       'premises_description', 'weapon_code', 'weapon_description',
       'status_code', 'status_code_description', 'crm_cd_1', 'crm_cd_2',
       'crm_cd_3', 'crm_cd_4', 'address', 'cross_street', 'latitude',
       'longitude']]

In [15]:
df['date_reported'] = pd.to_datetime(df['date_reported'], format='%m/%d/%Y')

In [16]:
df['date_occurred'] = pd.to_datetime(df['date_occurred'], format='%m/%d/%Y')

In [17]:
df['year'] = df['date_occurred'].dt.year
df['quarter'] = df['date_occurred'].dt.quarter
df['day'] = df['date_occurred'].dt.day
df['month'] = df['date_occurred'].dt.month
df['weekday'] = df['date_occurred'].dt.weekday
df['monthname'] = df['date_occurred'].dt.month_name()

In [18]:
descent_recode = { 'A':"asian",'B':"black",'C':"asian",'D':"asian",'F':"asian",'G':"asian",\
                 'H':"hispanic",'I':"ai_an",'J':"asian",\
                 'K':"asian",'L':"asian",'O':"other",'P':"asian",'S':"asian",\
                 'U':"asian",'V':"asian",'W':"white",'X':"other",'Z':"asian" }

In [19]:
df['descent_description'] = df['victim_descent'].map(descent_recode)

In [20]:
victim_sex_recode = { 'F':"female",'M':"male",'X':"unknown",'-':"unknown",'N':"unknown",'H':"unknown" }

In [21]:
df['victim_sex'] = df['victim_sex'].map(victim_sex_recode)

In [22]:
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['premises_code'] = df['premises_code'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_1'] = df['crm_cd_1'].astype(str).str.replace('.0','', regex=False)
df['crm_cd_2'] = df['crm_cd_2'].astype(str).str.replace('.0','', regex=False)

In [23]:
df['premises_description'] = df['premises_description'].str.capitalize()
df['status_code_description'] = df['status_code_description'].str.lower().str.replace(' ','_')
df['crime_code_description'] = df['crime_code_description'].str.capitalize()
df['weapon_description'] = df['weapon_description'].str.capitalize()

In [24]:
monthnames = ['January','February','March','April','May','June','July','August','September','October','November','December']

In [25]:
df['time_occurred'] = df['time_occurred'].astype(str).str.replace('.0','', regex=False)
df['time_occurred'] = df['time_occurred'].astype(str).str.zfill(4)
df['hour'] = df['time_occurred'].astype(str).str[:2]
df['minute'] = df['time_occurred'].astype(str).str[2:]
df = df[df['hour'] != '0n']
df = df[df['hour'] != '24']

In [26]:
df['part_type'] = df['part_type'].astype(str).str.replace('.0', '', regex=False)

In [27]:
df['reporting_district'] = df['reporting_district'].astype(str).str.replace('.0', '', regex=False)

In [28]:
crimes = df.loc[:, ~df.columns.duplicated()]

---

### Filter dataframe so it only inlcudes part I — or "major" — cases

In [29]:
crimes_part1 = crimes[crimes.part_type == '1']

### Crime codes

In [30]:
codes = crimes.groupby(['part_type', 'crime_code', 'crime_code_description']).agg('size').reset_index()

In [31]:
codes.head()

Unnamed: 0,part_type,crime_code,crime_code_description,0
0,1,110,Criminal homicide,3382
1,1,113,"Manslaughter, negligent",11
2,1,121,"Rape, forcible",11850
3,1,122,"Rape, attempted",1229
4,1,210,Robbery,95311


### Recode crime_codes to identify 'part one' violent/property crimes_major

In [32]:
part_one_violent = ['110','113','121','122','210','220','230','231',\
                    '235','236','250','251','761','815','820','821']

In [33]:
part_one_property = ['310','320','330','331','341','343','345','350',
                     '351','352','353','410','420','421','433','440','441',
                     '442','443','444','445','450','451','452','453','470','471'
                     ,'472','473','474','475','480','485','487','510','520','522']

In [34]:
categories = []

for row in crimes['crime_code']:
    if row in part_one_violent:
        categories.append('pt_one_violent')
    elif row in part_one_property:
        categories.append('pt_one_property')
    else:
        categories.append('other')
        
crimes['part_category'] = categories

---

In [45]:
usc = ['328', '338', '357', '358', '359', '378', '379']

In [46]:
usc_crimes = crimes[crimes['reporting_district'].isin(usc)]

In [49]:
usc_crimes.to_csv('../../usc/data/lapd/lapd_crimes_2010_present_usc_area.csv', index=False)
crimes.to_csv('../../usc/data/lapd/lapd_crimes_2010_present.csv', index=False)

### Export clean table of major crimes for other notebooks

In [50]:
crimes.to_csv('/Users/mhustiles/data/LAPD/lapd_crimes_2010_present.csv', index=False)

In [51]:
crimes.head()

Unnamed: 0,record_id,date_reported,date_occurred,time_occurred,division,division_name,reporting_district,part_type,crime_code,crime_code_description,modus_operandi_code,victim_age,victim_sex,victim_descent,premises_code,premises_description,weapon_code,weapon_description,status_code,status_code_description,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,address,cross_street,latitude,longitude,year,quarter,day,month,weekday,monthname,descent_description,hour,minute,part_category
0,10304468,2020-01-08,2020-01-08,2230,3.0,Southwest,377,2,624,Battery - simple assault,0444 0913,36,female,B,501,Single family dwelling,400.0,"Strong-arm (hands, fist, feet or bodily force)",AO,adult_other,624,,,,1100 W 39TH PL,,34.0141,-118.2978,2020,1,8,1,2,January,black,22,30,other
1,190101086,2020-01-02,2020-01-01,330,1.0,Central,163,2,624,Battery - simple assault,0416 1822 1414,25,male,H,102,Sidewalk,500.0,Unknown weapon/other weapon,IC,invest_cont,624,,,,700 S HILL ST,,34.0459,-118.2545,2020,1,1,1,2,January,hispanic,3,30,other
2,201220752,2020-09-16,2020-09-16,1230,12.0,77th Street,1259,2,745,Vandalism - misdeameanor ($399 or under),2004 1820 0913 0329 1202,62,male,B,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,,,,700 E 73RD ST,,33.9739,-118.263,2020,3,16,9,2,September,black,12,30,other
3,191501505,2020-01-01,2020-01-01,1730,15.0,N Hollywood,1543,2,745,Vandalism - misdeameanor ($399 or under),0329 1402,76,female,W,502,"Multi-unit dwelling (apartment, duplex, etc)",,,IC,invest_cont,745,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019,2020,1,1,1,2,January,white,17,30,other
4,191921269,2020-01-01,2020-01-01,415,19.0,Mission,1998,2,740,"Vandalism - felony ($400 & over, all church va...",0329,31,unknown,X,409,Beauty supply store,,,IC,invest_cont,740,,,,14400 TITUS ST,,34.2198,-118.4468,2020,1,1,1,2,January,other,4,15,other
