# Mapping 'sit-lie' arrests by LAPD: 2010-19

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from earthpy import clip as cl
from altair import datum
import altair as alt
alt.renderers.enable('notebook')
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Read in arrests data from city portal

In [2]:
# https://data.lacity.org/api/views/yru6-6re4/rows.csv?accessType=DOWNLOAD
arrests = pd.read_csv('/Users/mhustiles/data/data/LA/Arrest_Data_from_2010_to_Present.csv')

### Clean up arrests DataFrame

In [3]:
arrests.columns = arrests.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
arrests['arrest_date'] = pd.to_datetime(arrests['arrest_date'], format='%m/%d/%Y')
arrests['year'] = arrests['arrest_date'].dt.year
arrests['day'] = arrests['arrest_date'].dt.day
arrests['month'] = arrests['arrest_date'].dt.month
arrests['monthname'] = arrests['arrest_date'].dt.month_name()
arrests[['report_id', 'time', 'area_id', 'charge_group_code', 'reporting_district', 'year', 'month', 'day']] = \
arrests[['report_id', 'time', 'area_id', 'charge_group_code', 'reporting_district', 'year', 'month','day']].astype(str)
arrests['charge_group_code'] = arrests['charge_group_code'].str.replace('.0', '', regex=False)
arrests['weekday'] = arrests['arrest_date'].dt.weekday_name
arrests['time'] = arrests['time'].str.replace('.0','', regex=False)
arrests['time'] = arrests['time'].str.zfill(4)
arrests['hour'] = arrests['time'].str[:2]
arrests['minute'] = arrests['time'].str[2:]
arrests = arrests[arrests['hour'] != '0n']
arrests = arrests[arrests['hour'] != '24']

# Highlight nighttime hours
night = ['01', '02', '03', '04', '05', '22', '23', '00']
arrests['night'] = arrests['hour'].isin(night)
night_arrests = arrests[arrests['hour'].isin(night)]

# Separate location field into lat/lon
arrests['location'] = arrests.location.str.replace('(', '').str.replace(')', '')

lat = []
lon = []

for row in arrests['location']:
    try:
        lat.append(row.split(',')[0])
        lon.append(row.split(',')[1])
    except:
        lat.append(np.NaN)
        lon.append(np.NaN)

arrests['latitude'] = lat
arrests['longitude'] = lon
arrests['latitude'] = arrests['latitude'].astype(float)
arrests['longitude'] = arrests['longitude'].astype(float)

In [4]:
# mile = gpd.read_file('input/mile.geojson')
halfmile = gpd.read_file('input/halfmile.geojson')
halfmile_mapshaper = gpd.read_file('input/halfmile-mapshaper.json')
halfmileshp = gpd.read_file('input/shp/halfmile.shp')
# mile_gdf = gpd.GeoDataFrame(mile, geometry='geometry')
halfmile_gdf = gpd.GeoDataFrame(halfmile, geometry='geometry')

DriverError: input/mile.geojson: No such file or directory

In [None]:
halfmile_mapshaper['geometry'] = halfmile_mapshaper['geometry'].to_crs(epsg=4326)
halfmileshp['geometry'] = halfmileshp['geometry'].to_crs(epsg=4326)
# mile['geometry'] = mile['geometry'].to_crs(epsg=4326)

In [None]:
halfmile_mapshaper.plot()

In [None]:
halfmile_mapshaper.to_file('output/halfmile_mapshaper.json', driver='GeoJSON')

In [None]:
mile_gdf.plot()

### Isolate sit-lie cases from larger arrests dataframe

In [None]:
sit_lie_all = arrests[arrests.charge.str.contains('41.18')]

### Convert the sit-lie cases to a geodataframe

In [None]:
sit_lie_all_gdf = gpd.GeoDataFrame(sit_lie_all, \
geometry=gpd.points_from_xy(sit_lie_all.longitude, sit_lie_all.latitude))

In [None]:
sit_lie_all_gdf.crs = {'init' :'epsg:4326'}
sit_lie_all_gdf['geometry'] = sit_lie_all_gdf['geometry'].to_crs(epsg=4326)

### Remove stray LA arrests with the clip_data module

In [None]:
# Read city of Los Angeles boundary file
la = gpd.read_file('input/la_city_boundary/la_city_boundary_1570551157861.geojson')
la.plot()

In [None]:
sit_lie_all_gdf_clipped = cl.clip_shp(sit_lie_all_gdf, la)

In [None]:
sit_lie_all_gdf_clipped.plot()

### Join arrests data to hexbin geodataframes

In [None]:
sit_lie_halfmile = gpd.sjoin(sit_lie_all_gdf, halfmile_mapshaper, op = 'within')

In [None]:
sit_lie_halfmile.head()

In [None]:
sit_lie_halfmile_grouped = sit_lie_halfmile.groupby(['year', 'id', 'area_name'])\
.agg('size').sort_values(ascending=False).reset_index(name='total')

In [None]:
sit_lie_halfmile_grouped.head()

### Merge grouped arrests data back to binned geodataframe

In [None]:
la_bins_sit_lie = halfmile_mapshaper.merge(sit_lie_halfmile_grouped, on='id')
la_bins_sit_lie.sort_values(by='total', ascending=False).head(30)

In [None]:
la_bins_sit_lie.plot()
plt.rcParams["figure.figsize"] = [20,25]

In [None]:
la_bins_sit_lie_clean = la_bins_sit_lie[['id', 'total', 'year', 'area_name', 'geometry']]
la_bins_sit_lie_csv = la_bins_sit_lie[['id', 'total', 'year']]

In [None]:
la_bins_sit_lie_clean.to_file('output/la_bins_sit_lie_clean.geojson', driver='GeoJSON')

In [None]:
la_bins_sit_lie_csv.to_csv('output/la_bins_sit_lie_csv.csv')