# Los Angeles Crime Records: 2010-2020
- This notebook is for combining archival with current year data and generating categorical variables

#### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import missingno as msno
import requests
import geopandas
from mpl_toolkits.axes_grid1 import make_axes_locatable
import contextily as ctx
from shapely.geometry import Point

#### Read in and combine datasets

In [2]:
crime19 = pd.read_csv('../datasets/crime19.csv')
crime19.rename(columns={'AREA ':'AREA'},inplace=True)
print(crime19.shape)
print(crime19.columns)

(2114699, 28)
Index(['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA', 'AREA NAME',
       'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Crm Cd Desc', 'Mocodes',
       'Vict Age', 'Vict Sex', 'Vict Descent', 'Premis Cd', 'Premis Desc',
       'Weapon Used Cd', 'Weapon Desc', 'Status', 'Status Desc', 'Crm Cd 1',
       'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT',
       'LON'],
      dtype='object')


In [3]:
crime20 = pd.read_csv('../datasets/crime20.csv')
print(crime20.shape)
crime20.columns

(95405, 28)


Index(['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA', 'AREA NAME',
       'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Crm Cd Desc', 'Mocodes',
       'Vict Age', 'Vict Sex', 'Vict Descent', 'Premis Cd', 'Premis Desc',
       'Weapon Used Cd', 'Weapon Desc', 'Status', 'Status Desc', 'Crm Cd 1',
       'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT',
       'LON'],
      dtype='object')

#### Concatenate dataframes

In [4]:
crime = pd.concat([crime19,crime20],axis=0)
print(crime.shape)
crime.head().T

(2210104, 28)


Unnamed: 0,0,1,2,3,4
DR_NO,1307355,11401303,70309629,90631215,100100501
Date Rptd,02/20/2010 12:00:00 AM,09/13/2010 12:00:00 AM,08/09/2010 12:00:00 AM,01/05/2010 12:00:00 AM,01/03/2010 12:00:00 AM
DATE OCC,02/20/2010 12:00:00 AM,09/12/2010 12:00:00 AM,08/09/2010 12:00:00 AM,01/05/2010 12:00:00 AM,01/02/2010 12:00:00 AM
TIME OCC,1350,45,1515,150,2100
AREA,13,14,13,6,1
AREA NAME,Newton,Pacific,Newton,Hollywood,Central
Rpt Dist No,1385,1485,1324,646,176
Part 1-2,2,2,2,2,1
Crm Cd,900,740,946,900,122
Crm Cd Desc,VIOLATION OF COURT ORDER,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",OTHER MISCELLANEOUS CRIME,VIOLATION OF COURT ORDER,"RAPE, ATTEMPTED"


### Crimes by year and month

- create dummy columns for years and months

In [5]:
# Convert dates to datetime format
crime['DATE OCC'] = pd.to_datetime(crime['DATE OCC'])

##### Dummify year and month

In [6]:
crime['2010'] = [1 if date.year == 2010 else 0 for date in crime['DATE OCC']]
crime['2011'] = [1 if date.year == 2011 else 0 for date in crime['DATE OCC']]
crime['2012'] = [1 if date.year == 2012 else 0 for date in crime['DATE OCC']]
crime['2013'] = [1 if date.year == 2013 else 0 for date in crime['DATE OCC']]
crime['2014'] = [1 if date.year == 2014 else 0 for date in crime['DATE OCC']]
crime['2015'] = [1 if date.year == 2015 else 0 for date in crime['DATE OCC']]
crime['2016'] = [1 if date.year == 2016 else 0 for date in crime['DATE OCC']]
crime['2017'] = [1 if date.year == 2017 else 0 for date in crime['DATE OCC']]
crime['2018'] = [1 if date.year == 2018 else 0 for date in crime['DATE OCC']]
crime['2019'] = [1 if date.year == 2019 else 0 for date in crime['DATE OCC']]
crime['2020'] = [1 if date.year == 2020 else 0 for date in crime['DATE OCC']]

crime['January'] = [1 if date.month == 1 else 0 for date in crime['DATE OCC']]
crime['February'] = [1 if date.month == 2 else 0 for date in crime['DATE OCC']]
crime['March'] = [1 if date.month == 3 else 0 for date in crime['DATE OCC']]
crime['April'] = [1 if date.month == 4 else 0 for date in crime['DATE OCC']]
crime['May'] = [1 if date.month == 5 else 0 for date in crime['DATE OCC']]
crime['June'] = [1 if date.month == 6 else 0 for date in crime['DATE OCC']]
crime['July'] = [1 if date.month == 7 else 0 for date in crime['DATE OCC']]
crime['August'] = [1 if date.month == 8 else 0 for date in crime['DATE OCC']]
crime['September'] = [1 if date.month == 9 else 0 for date in crime['DATE OCC']]
crime['October'] = [1 if date.month == 10 else 0 for date in crime['DATE OCC']]
crime['November'] = [1 if date.month == 11 else 0 for date in crime['DATE OCC']]
crime['December'] = [1 if date.month == 12 else 0 for date in crime['DATE OCC']]

### Categories

In [7]:
V = [110,113,121,122,815,820,821,210,220,230,231,235,236,250,251,761,926,435,436,437,622,623,624,625,626,627,647,763,928,930]
V_homicide = [110,113]
V_rape = [121,122,815,820,821]
V_robbery = [210,220]
V_assault_agg = [230,231,235,236,250,251,761,926]
V_assault_sim = [435,436,437,622,623,624,625,626,627,647,763,928,930]

P = [310,320,510,520,433,330,331,410,421,350,351,352,353,450,451,452,453,341,343,345,440,441,442,443,444,445,470,471,472,473,474,475,480,485,487,491]
P_burglary = [310,320]
P_mvt = [510,520,433]
P_btfv = [330,331,410,421]
P_theft_personal = [350,351,352,353,450,451,452,453]
P_theft_other = [341,343,345,440,441,442,443,444,445,470,471,472,473,474,475,480,485,487,491]

In [8]:
crime['V'] = [1 if code in V else 0 for code in crime['Crm Cd']] 
crime['V_homicide'] = [1 if code in V_homicide else 0 for code in crime['Crm Cd']] 
crime['V_rape'] = [1 if code in V_rape else 0 for code in crime['Crm Cd']] 
crime['V_robbery'] = [1 if code in V_robbery else 0 for code in crime['Crm Cd']] 
crime['V_assault_agg'] = [1 if code in V_assault_agg else 0 for code in crime['Crm Cd']]
crime['V_assault_sim'] = [1 if code in V_assault_sim else 0 for code in crime['Crm Cd']] 

crime['P'] = [1 if code in P else 0 for code in crime['Crm Cd']] 
crime['P_burglary'] = [1 if code in P_burglary else 0 for code in crime['Crm Cd']] 
crime['P_mvt'] = [1 if code in P_mvt else 0 for code in crime['Crm Cd']] 
crime['P_btfv'] = [1 if code in P_btfv else 0 for code in crime['Crm Cd']] 
crime['P_theft_personal'] = [1 if code in P_theft_personal else 0 for code in crime['Crm Cd']] 
crime['P_theft_other'] = [1 if code in P_theft_other else 0 for code in crime['Crm Cd']] 

### Neighborhoods

In [9]:
gdf = geopandas.read_file('../datasets//la-county-neighborhoods-current/l.a. county neighborhood (current).shp')
gdf.rename(columns={'name':'place'},inplace=True)
print(gdf.shape)
gdf.head(3)

(272, 11)


Unnamed: 0,slug,set,kind,external_i,place,display_na,sqmi,type,name_1,slug_1,geometry
0,acton,L.A. County Neighborhoods (Current),L.A. County Neighborhood (Current),acton,Acton,Acton L.A. County Neighborhood (Current),39.3391089485,unincorporated-area,,,"POLYGON ((-118.20262 34.53899, -118.18947 34.5..."
1,adams-normandie,L.A. County Neighborhoods (Current),L.A. County Neighborhood (Current),adams-normandie,Adams-Normandie,Adams-Normandie L.A. County Neighborhood (Curr...,0.805350187789,segment-of-a-city,,,"POLYGON ((-118.30901 34.03741, -118.30041 34.0..."
2,agoura-hills,L.A. County Neighborhoods (Current),L.A. County Neighborhood (Current),agoura-hills,Agoura Hills,Agoura Hills L.A. County Neighborhood (Current),8.14676029818,standalone-city,,,"POLYGON ((-118.76193 34.16820, -118.72632 34.1..."


In [10]:
crime['nu_index'] = list(range(0,crime.shape[0]))
crime = crime.set_index('nu_index')

crime['points'] = [Point(crime['LON'][i],crime['LAT'][i]) for i in range(crime.shape[0])]

#### NOTE: this cell can take several hours to run

In [None]:
for i in range(gdf.shape[0]):
    dummy = [1 if crime['points'][j].within(gdf['geometry'][i]) else 0 for j in range(crime.shape[0])]
    crime[gdf['place'][i]] = dummy

### Write to file

In [None]:
crime.to_csv('../datasets/crime_final.csv',index=False)