# 0.0b Data Preparation - Geocoding
Garrett Eason, Chris Broll, Shilpa Rajbhandari<br>
*Note: the following can be memory intensive, you might need to use a computer high in RAM or use another IDE that is less RAM intensive (e.g. not JupyterNB).

In [1]:
#%% Packages
from __future__ import (absolute_import, division, print_function)
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import datetime as dt
import progressbar #used progressbar2

#%% Data
Windows = 'C:/Users/Sade/Documents/GitHub/lights-and-crime/Lights and Crime Garrett/Data'
Linux = '/home/sade/Desktop/Git Cloned Repos/lights-and-crime/Lights and Crime Garrett/Data'

choice = Linux

## Data

In [2]:
isf = pd.read_excel(choice + '/Lights.xlsx') # (from 0.0a)
inv = pd.read_excel(choice + '/islims_inventory.xlsx') # (see 0.0a iSlims and City Work Data)
wo = pd.read_excel(choice + '/islims_workorders.xlsx') # (see 0.0a iSlims and City Work Data)
NCR = pd.read_excel(choice + '/NCR.xlsx') # (from 0.0a)
DCR = pd.read_excel(choice + '/DCR.xlsx') # (from 0.0a)

## Night Crimes

In [3]:
#%% Prepping for Geojoin

wo = wo.rename(columns={'woID':'WoID'})
isf_wo = pd.merge(isf, wo, how='left', on = 'WoID')
isf_wo = isf_wo.drop(['srchAssetID', 'gpscoordinateX', 'gpscoordinateY', 'initialproblemID', \
       'resolveddatetime', 'entereddate', 'finalresolutionID'], axis = 1)
isf_wo_inv = pd.merge(isf_wo, inv, how='left', on = 'inventoryID')
isf_wo_inv = isf_wo_inv.drop(['gpscoordinateX', 'gpscoordinateY'], axis = 1)

# Setting up data into geopandas
geometry = [Point(xy) for xy in zip(isf_wo_inv['gpsX'], isf_wo_inv['gpsY'])]
gLights = GeoDataFrame(isf_wo_inv, geometry=geometry)
gLights = gLights.drop_duplicates(subset = ['WoID'])
geometry = [Point(xy) for xy in zip(NCR['X'], NCR['Y'])]
gNCR = GeoDataFrame(NCR, geometry=geometry)

BUFFER = .000625 # 1/4th of a city block in radius of Maryland coordinates.
#BUFFER = .00125 # 1/2 of a city block in radius of Maryland coordinates.

gLights_Buff = gLights.assign(geometry = lambda x: x.geometry.buffer(BUFFER)) 
# Overwrites geometry variable with a buffer centered at the point of interest. A.k.a. applies the function geometry(x) to gNCR and saves it as geometry.

In [4]:
#%% Geojoin - Note* this takes up a decent amount of memory, don't use if you are on a bad computer.

Matched_NLights = gpd.sjoin(gLights_Buff, gNCR, 'left')
# Left geojoin by buffer

In [5]:
#%% Filtering (Note: 'flag' takes up the most cpu time/ skip if you already have the data)

Matched_NLights['Crime_LO_intime'] = [0]*len(Matched_NLights) # Counter to be used

Matched_NLights = Matched_NLights.dropna(subset = ['WoCompleted'])
Matched_NLights = Matched_NLights.dropna(subset = ['REPORT_DAT'])
Matched_NLights = Matched_NLights.reset_index(drop=True)

# Flagging possible lights that influenced crime:
def flag(z):
    bar = progressbar.ProgressBar(maxval=len(z), widgets=[progressbar.ETA(), ' ', progressbar.Percentage()])
    bar.start()
    for i in range(len(z)):
        print(bar.update(i+1))
        try:
            if abs(z.loc[i, 'WoCompleted'] - z.loc[i, 'REPORT_DAT']).days <= 10:
                z.loc[i, 'Crime_LO_intime'] = 1
        except:
            z.loc[i, 'WoCompleted'] = dt.datetime.strptime(z.loc[i, 'WoCompleted'], '%Y-%m-%dT%H:%M:%S.%fZ') # Some values coded incorrectly.
            if abs(z.loc[i, 'WoCompleted'] - z.loc[i, 'REPORT_DAT']).days <= 10:
                z.loc[i, 'Crime_LO_intime'] = 1
    bar.finish()
    return z

In [None]:
Matched_NLights = flag(Matched_NLights)

In [7]:
sum(Matched_NLights['Crime_LO_intime'])/len(Matched_NLights) # Rough Hit Ratio

# Lights matched with a crime nearby outside of likely bulb outage
Matched_NLights0 = Matched_NLights[Matched_NLights['Crime_LO_intime'] == 0].drop_duplicates(subset = 'WoID', keep = 'first')
Matched_NLights0 = Matched_NLights0.drop(['index_right', 'geometry'], axis = 1)

# Lights matched with a crime nearby within timeframe of light outage
Matched_NLights1 = Matched_NLights[Matched_NLights['Crime_LO_intime'] == 1].drop_duplicates(subset = 'OBJECTID', keep = 'first')
Matched_NLights1 = Matched_NLights1.drop(['index_right', 'geometry'], axis = 1)

In [9]:
#%% To excel
Matched_NLights0.to_excel(choice + '/geoLights0.xlsx')
Matched_NLights1.to_excel(choice + '/geoLights1.xlsx')

## Clearing Memory

In [10]:
%reset -f

## Day Crimes

In [1]:
#%% Packages
from __future__ import (absolute_import, division, print_function)
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import datetime as dt
import progressbar #used progressbar2

#%% Data
Windows = 'C:/Users/Sade/Documents/GitHub/lights-and-crime/Lights and Crime Garrett/Data'
Linux = '/home/sade/Desktop/Git Cloned Repos/lights-and-crime/Lights and Crime Garrett/Data'

choice = Linux

In [2]:
isf = pd.read_excel(choice + '/Lights.xlsx') # (from 0.0a)
inv = pd.read_excel(choice + '/islims_inventory.xlsx') # (see 0.0a iSlims and City Work Data)
wo = pd.read_excel(choice + '/islims_workorders.xlsx') # (see 0.0a iSlims and City Work Data)
NCR = pd.read_excel(choice + '/NCR.xlsx') # (from 0.0a)
DCR = pd.read_excel(choice + '/DCR.xlsx') # (from 0.0a)

In [3]:
#%% Prepping for Geojoin

wo = wo.rename(columns={'woID':'WoID'})
isf_wo = pd.merge(isf, wo, how='left', on = 'WoID')
isf_wo = isf_wo.drop(['srchAssetID', 'gpscoordinateX', 'gpscoordinateY', 'initialproblemID', \
       'resolveddatetime', 'entereddate', 'finalresolutionID'], axis = 1)
isf_wo_inv = pd.merge(isf_wo, inv, how='left', on = 'inventoryID')
isf_wo_inv = isf_wo_inv.drop(['gpscoordinateX', 'gpscoordinateY'], axis = 1)

# Setting up data into geopandas
geometry = [Point(xy) for xy in zip(isf_wo_inv['gpsX'], isf_wo_inv['gpsY'])]
gLights = GeoDataFrame(isf_wo_inv, geometry=geometry)
gLights = gLights.drop_duplicates(subset = ['WoID'])
geometry = [Point(xy) for xy in zip(DCR['X'], DCR['Y'])]
gDCR = GeoDataFrame(DCR, geometry=geometry)

BUFFER = .000625 # 1/4th of a city block in radius of Maryland coordinates.
#BUFFER = .00125 # 1/2 of a city block in radius of Maryland coordinates.

gLights_Buff = gLights.assign(geometry = lambda x: x.geometry.buffer(BUFFER)) 
# Overwrites geometry variable with a buffer centered at the point of interest. A.k.a. applies the function geometry(x) to gNCR and saves it as geometry.

In [None]:
#%% Geojoin - Note* this takes up a decent amount of memory, don't use if you are on a bad computer.

Matched_DLights = gpd.sjoin(gLights_Buff, gDCR, 'left')
# Left geojoin by buffer

In [5]:
#%% Filtering (Note: 'flag' takes up the most cpu time/ skip if you already have the data)

Matched_DLights['Crime_LO_intime'] = [0]*len(Matched_DLights) # Counter to be used

Matched_DLights = Matched_DLights.dropna(subset = ['WoCompleted'])
Matched_DLights = Matched_DLights.dropna(subset = ['REPORT_DAT'])
Matched_DLights = Matched_DLights.reset_index(drop=True)

# Flagging possible lights that influenced crime:
def flag(z):
    bar = progressbar.ProgressBar(maxval=len(z), widgets=[progressbar.ETA(), ' ', progressbar.Percentage()])
    bar.start()
    for i in range(len(z)):
        print(bar.update(i+1))
        try:
            if abs(z.loc[i, 'WoCompleted'] - z.loc[i, 'REPORT_DAT']).days <= 10:
                z.loc[i, 'Crime_LO_intime'] = 1
        except:
            z.loc[i, 'WoCompleted'] = dt.datetime.strptime(z.loc[i, 'WoCompleted'], '%Y-%m-%dT%H:%M:%S.%fZ') # Some values coded incorrectly.
            if abs(z.loc[i, 'WoCompleted'] - z.loc[i, 'REPORT_DAT']).days <= 10:
                z.loc[i, 'Crime_LO_intime'] = 1
    bar.finish()
    return z

In [None]:
Matched_DLights = flag(Matched_DLights)

In [None]:
sum(Matched_DLights['Crime_LO_intime'])/len(Matched_DLights) # Rough Hit Ratio

# Lights matched with a crime nearby outside of likely bulb outage
Matched_DLights0 = Matched_DLights[Matched_DLights['Crime_LO_intime'] == 0].drop_duplicates(subset = 'WoID', keep = 'first')
Matched_DLights0 = Matched_DLights0.drop(['index_right', 'geometry'], axis = 1)

# Lights matched with a crime nearby within timeframe of light outage
Matched_DLights1 = Matched_DLights[Matched_DLights['Crime_LO_intime'] == 1].drop_duplicates(subset = 'OBJECTID', keep = 'first')
Matched_DLights1 = Matched_DLights1.drop(['index_right', 'geometry'], axis = 1)

In [None]:
#%% To excel
Matched_DLights0.to_excel(choice + '/geoDLights0.xlsx')
Matched_DLights1.to_excel(choice + '/geoDLights1.xlsx')