# Processing LAPD crimes database for other notebooks

### Import data tools

In [1]:
import pandas as pd
import pyarrow
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Download historical data (in case there are amendments)

In [2]:
!wget https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

--2020-10-21 13:58:17--  https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
Resolving data.lacity.org (data.lacity.org)... 52.206.68.26, 52.206.140.205, 52.206.140.199
Connecting to data.lacity.org (data.lacity.org)|52.206.68.26|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’

rows.csv?accessType     [       <=>          ] 510.38M  5.45MB/s    in 95s     

2020-10-21 13:59:53 (5.40 MB/s) - ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’ saved [535170965]



In [3]:
!mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv'

### Download current data

In [4]:
!wget https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD -P '/Users/mhustiles/data/data/LA/'

--2020-10-21 13:59:53--  https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
Resolving data.lacity.org (data.lacity.org)... 52.206.140.205, 52.206.140.199, 52.206.68.26
Connecting to data.lacity.org (data.lacity.org)|52.206.140.205|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’

rows.csv?accessType     [                 <=>]  37.99M  6.08MB/s    in 10s     

2020-10-21 14:00:04 (3.63 MB/s) - ‘/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD’ saved [39835455]



In [5]:
!mv '/Users/mhustiles/data/data/LA/rows.csv?accessType=DOWNLOAD' '/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv'

### Read both datasets

In [6]:
# https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/63jg-8b9z
# https://data.lacity.org/api/views/63jg-8b9z/rows.csv?accessType=DOWNLOAD
crimes_old = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2010_to_Present.csv')

# https://data.lacity.org/A-Safe-City/Crime-Data-from-2020-to-Present/2nrs-mtv8
# https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD
crimes_new = pd.read_csv('/Users/mhustiles/data/data/LA/Crime_Data_from_2020_to_Present.csv')

In [29]:
crimes = pd.concat([crimes_new,crimes_old])

In [31]:
len(crimes)

2270995

In [32]:
crimes.columns = crimes.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [33]:
crimes = crimes.drop(crimes[crimes.lat < 30].index)

### These data are super messy...

In [34]:
crimes.rename(columns = { 
'dr_no':'record_id',
'date_rptd':'date_reported',
'date_occ':'date_occurred',
'time_occ':'time_occurred',
'area':'division',
'area_name':'division_name',
'rpt_dist_no':'reporting_district',
'part_1_2':'part_type',
'crm_cd':'crime_code',
'crm_cd_desc':'crime_code_description',
'mocodes':'modus_operandi_code',
'vict_age':'victim_age',
'vict_sex':'victim_sex',
'vict_descent':'victim_descent',
'premis_cd':'premises_code',
'premis_desc':'premises_description',
'weapon_used_cd':'weapon_code',
'weapon_desc':'weapon_description',
'status':'status_code',
'status_desc':'status_code_description',
'crm_cd_1':'crm_cd_1',
'crm_cd_2':'crm_cd_2',
'crm_cd_3':'crm_cd_3',
'crm_cd_4':'crm_cd_4',
'location':'address',
'cross_street':'cross_street',
'lat':'latitude',
'lon':'longitude',
 }, inplace = True)

### ... contunued

In [35]:
crimes['date_reported'] = crimes['date_reported'].str.replace(' 12:00:00 AM','')
crimes['date_occurred'] = crimes['date_occurred'].str.replace(' 12:00:00 AM','')

In [36]:
crimes['date_reported'] = pd.to_datetime(crimes['date_reported'], format='%m/%d/%Y')
crimes['date_occurred'] = pd.to_datetime(crimes['date_occurred'], format='%m/%d/%Y')

In [37]:
crimes['year'] = crimes['date_occurred'].dt.year
crimes['quarter'] = crimes['date_occurred'].dt.quarter
crimes['day'] = crimes['date_occurred'].dt.day
crimes['month'] = crimes['date_occurred'].dt.month
crimes['weekday'] = crimes['date_occurred'].dt.weekday
crimes['monthname'] = crimes['date_occurred'].dt.month_name()

In [38]:
descent_recode = { 'A':"asian",'B':"black",'C':"asian",'D':"asian",'F':"asian",'G':"asian",\
                 'H':"hispanic",'I':"ai_an",'J':"asian",\
                 'K':"asian",'L':"asian",'O':"other",'P':"asian",'S':"asian",\
                 'U':"asian",'V':"asian",'W':"white",'X':"other",'Z':"asian" }

In [39]:
crimes['descent_description'] = crimes['victim_descent'].map(descent_recode)

In [40]:
victim_sex_recode = { 'F':"female",'M':"male",'X':"unknown",'-':"unknown",'N':"unknown",'H':"unknown" }

In [41]:
crimes['victim_sex'] = crimes['victim_sex'].map(victim_sex_recode)

In [42]:
crimes['premises_code'] = crimes['premises_code'].astype(str).str.replace('.0','', regex=False)
crimes['premises_code'] = crimes['premises_code'].astype(str).str.replace('.0','', regex=False)
crimes['crm_cd_1'] = crimes['crm_cd_1'].astype(str).str.replace('.0','', regex=False)
crimes['crm_cd_2'] = crimes['crm_cd_2'].astype(str).str.replace('.0','', regex=False)

In [43]:
crimes['premises_description'] = crimes['premises_description'].str.capitalize()
crimes['status_code_description'] = crimes['status_code_description'].str.lower().str.replace(' ','_')
crimes['crime_code_description'] = crimes['crime_code_description'].str.capitalize()
crimes['weapon_description'] = crimes['weapon_description'].str.capitalize()

In [44]:
monthnames = ['January','February','March','April','May','June','July','August','September','October','November','December']

In [45]:
crimes['time_occurred'] = crimes['time_occurred'].astype(str).str.replace('.0','', regex=False)
crimes['time_occurred'] = crimes['time_occurred'].astype(str).str.zfill(4)
crimes['hour'] = crimes['time_occurred'].astype(str).str[:2]
crimes['minute'] = crimes['time_occurred'].astype(str).str[2:]
crimes = crimes[crimes['hour'] != '0n']
crimes = crimes[crimes['hour'] != '24']

In [46]:
homicides = crimes[crimes['crime_code_description'].str.contains('Criminal homicide', na=False)]

---

### Filter dataframe so it only inlcudes part I — or "major" — cases

In [51]:
crimes['part_type'] = crimes['part_type'].astype(str)

In [27]:
crimes = crimes[crimes.part_type == '1']

In [47]:
len(crimes)

2268442

### Crime codes

In [None]:
codes = crimes.groupby(['part_type', 'crime_code', 'crime_code_description']).agg('size').reset_index()

In [None]:
codes.head()

### Recode crime_codes to identify 'part one' violent/property crimes_major

In [None]:
part_one_violent = ['110','113','121','122','210','220','230','231',\
                    '235','236','250','251','761','815','820','821']

In [None]:
part_one_property = ['310','320','330','331','341','343','345','350',
                     '351','352','353','410','420','421','433','440','441',
                     '442','443','444','445','450','451','452','453','470','471'
                     ,'472','473','474','475','480','485','487','510','520','522']

In [None]:
categories = []

for row in crimes['crime_code']:
    if row in part_one_violent:
        categories.append('pt_one_violent')
    elif row in part_one_property:
        categories.append('pt_one_property')
    else:
        categories.append('other')
        
crimes['part_category'] = categories

---

### Export clean table of major crimes for other notebooks

In [None]:
crimes.reset_index().to_feather('/Users/mhustiles/data/data/LA/crimes.feather')