In [None]:
%matplotlib inline
import sys
sys.path.insert(0, '../src')
import glob
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import os

from random import choices
from scipy import stats
from analyze import test
from geospatial import *

from pylab import rcParams
import statsmodels.api as sm
import warnings
warnings.filterwarnings("ignore")

In [None]:
crimes = pd.read_csv('../data/cleaned/crime-processed.csv')
arrests = pd.read_csv('../data/cleaned/arrests-processed.csv')

In [None]:
census = read_gis()

In [None]:
census.rename(columns=CENSUS_MAP, inplace=True)

In [None]:
for col in census:
    if col.startswith('H7'):
        census.drop(col, axis=1, inplace=True)
census['Other'] += census['Others']

In [None]:
census_div = census[['APREC', 'Total', 'White', 'Black or African American',
       'American Indian and Alaska Native', 'Asian',
       'Native Hawaiian and Other Pacific Islander', 'Other',
       'Hispanic or Latino']]

In [None]:
census_div = census_div.groupby('APREC').sum()

In [None]:
census_div.div(census_div['Total'], axis=0)

# Crime & Arrest Trends

In [None]:
crimes['Date Rptd'] = pd.to_datetime(crimes['Date Rptd'])

In [None]:
daily_crimes = crimes.loc[crimes.Year!=2020].groupby('Date Rptd').size()

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = daily_crimes.plot(legend=False)
ax.set_title('Crime Trends in LA (2010-2019)')
daily_crimes.rolling(window=7).mean().plot(ax=ax)
daily_crimes.rolling(window=30).mean().plot(ax=ax)
daily_crimes.rolling(window=365).mean().plot(ax=ax)
plt.axvline(dt.datetime(2013, 1, 1), linewidth=3, color='purple')
plt.axvline(dt.datetime(2015, 1, 1), linewidth=3, color='yellow')
ax.legend(['Daily', 'Weekly', 'Monthly', 'Annually', '3 Divisions', 'All Divisions'])
plt.show()

In [None]:
filtered_crimes = crimes.loc[(crimes.Year>=2013)&(crimes.Year<=2014)].groupby('Date Rptd').size()
fig = plt.figure(figsize=(10, 6))
ax = filtered_crimes.plot(legend=False)
ax.set_title('Crime Trends in LA (2013-2014)')
filtered_crimes.rolling(window=7).mean().plot(ax=ax)
filtered_crimes.rolling(window=30).mean().plot(ax=ax)
ax.legend(['Daily', 'Weekly', 'Monthly'])
plt.show()

In [None]:
arrests['Arrest Date'] = pd.to_datetime(arrests['Arrest Date'])
daily_arrests = arrests.loc[arrests.Year!=2020].groupby('Arrest Date').size()
fig = plt.figure(figsize=(10, 6))
ax = daily_arrests.plot(legend=False)
ax.set_title('Arrest Trends in LA (2010-2019)')
daily_arrests.rolling(window=7).mean().plot(ax=ax)
daily_arrests.rolling(window=30).mean().plot(ax=ax)
daily_arrests.rolling(window=365).mean().plot(ax=ax)
ax.legend(['Daily', 'Weekly', 'Monthly', 'Annually'])
plt.axvline(dt.datetime(2013, 1, 1), linewidth=3, color='purple')
plt.axvline(dt.datetime(2015, 1, 1), linewidth=3, color='yellow')
ax.legend(['Daily', 'Weekly', 'Monthly', 'Annually', '3 Divisions', 'All Divisions'])
plt.show()

In [None]:
filtered_arrests = arrests.loc[(arrests.Year>=2013)&(arrests.Year<=2014)].groupby('Arrest Date').size()
fig = plt.figure(figsize=(10, 6))
ax = filtered_arrests.plot(legend=False)
ax.set_title('Arrest Trends in LA (2013-2014)')
filtered_arrests.rolling(window=7).mean().plot(ax=ax)
filtered_arrests.rolling(window=30).mean().plot(ax=ax)
ax.legend(['Daily', 'Weekly', 'Monthly'])
plt.show()

In [None]:
from analyze import *

In [None]:
def format_df(df, feat, scope=False, area=False, group='PredPol Deployed', group2=['Area Name', 'PredPol Deployed']):
    if feat == 'Descent Description':
        group = 'Reassigned Officer'
        group2 = ['Stop Division', 'Reassigned Officer']
    if scope:
        df = df.loc[(df.Year >= 2013) & (df.Year <= 2014)]
    else:
        df = df.loc[df.Year != 2020]
    if not area:
        return df.groupby(group)[feat].value_counts(normalize=True).unstack().T
    else:
        return df.groupby(group2)[feat].value_counts(normalize=True).unstack()

format_df(crimes, 'Crime Type', scope=True)

In [None]:
def test_overall(df, outpath, feat, scope=False):
    print('Testing overall distribution of {}.'.format(feat))
    types = format_df(df, scope=scope, feat=feat)
    statvals = []
    pvals = []
    for tp, row in types.iterrows():
        print('{}: {}'.format(feat, tp))
        try:
            stat, pval = test(tp, row[1], row[0])
        except IndexError:
            stat, pval = 0.0, 0.0
        statvals.append(round(stat, 5))
        pvals.append(round(pval, 5))
        print('Statistic = ', stat)
        print('P-Value = {}\n'.format(pval))
    if feat == 'Crime Type' or feat == 'Charge Group Description':
        idx = TYPES
    elif feat == 'Crime Charge' or feat == 'Arrest Type Code':
        idx = CHARGES
    elif feat == 'Descent Description':
        idx = RACES
    if scope:
        title = 'ovr_{}_dist_2013-14.csv'.format(feat)
    else:
        title = 'ovr_{}_dist.csv'.format(feat)
    try:
        pd.DataFrame({'Statistic':statvals, 'P-Value':pvals}, index=idx).to_csv(os.path.join(outpath, title))
    except ValueError:
        pd.DataFrame({'Statistic':statvals, 'P-Value':pvals}, index=idx[:-1]).to_csv(os.path.join(outpath, title))
    print('Complete.')

test_overall(crimes, '', 'Crime Type', scope=True)

In [None]:
test_overall(arrests, '', 'Charge Group Description', scope=True)