In [1]:
import time
import pandas as pd
import numpy as np

import geopandas as gpd
from shapely.geometry import Point, Polygon

import warnings
warnings.filterwarnings('ignore')

import yaml
with open('config.yml') as f:
    config = yaml.load(f)

dwp = config['dcsedwp']
dcgis = config['dcgisprd']

In [2]:
import datetime as dt     
date = dt.datetime.today().strftime("%Y/%m/%d")
print(date)

2018/07/11


#### Connect to DC Data Warehouse (location of 311 data)

In [3]:
import cx_Oracle
dsn_tns = cx_Oracle.makedsn(dwp['host'], dwp['port'], service_name=dwp['service_name'])
dcsedwp = cx_Oracle.connect(dwp['username'], dwp['password'], dsn_tns)

### Get Census Blocks

In [4]:
blocks = gpd.read_file('data/Census_Blocks__2010.geojson')
blocks = blocks[['GEOID', 'BLOCK', 'BLKGRP', 'P0010001', 'SqMiles', 'ACRES', 'geometry']]

In [5]:
blks = blocks[['GEOID', 'P0010001', 'SqMiles']].set_index('GEOID')
blks['pop_density'] = blks['P0010001']*1.0/blks['SqMiles']
blks['tot_pop'] = blks['P0010001']
blks = blks.drop(['P0010001', 'SqMiles'], axis=1)

#### Check most recent 311 data

In [6]:
pd.read_sql('''
select 
max(serviceorderdate)
from EDW_CIRM.VW_SERVICE_REQUESTS_DC_MSSQL
where longitude is not null and latitude is not null
and cast(longitude as float) < 0 and cast(latitude as float) > 0
''', dcsedwp)

Unnamed: 0,MAX(SERVICEORDERDATE)
0,2018-07-08 20:08:06


#### Check Lat-Long Data

In [7]:
pd.read_sql('''
select 
avg(longitude) as avg_long
, max(longitude) as max_long
, min(longitude) as min_long
, avg(latitude) as avg_lat
, max(latitude) as max_lat
, min(latitude) as min_lat

from EDW_CIRM.VW_SERVICE_REQUESTS_DC_MSSQL
where longitude is not null and latitude is not null
and serviceorderdate >= TO_DATE('2017-03-18', 'yyyy-mm-dd')

''', dcsedwp)

Unnamed: 0,AVG_LONG,MAX_LONG,MIN_LONG,AVG_LAT,MAX_LAT,MIN_LAT
0,-77.013069,-76.909531,-77.114206,38.912211,38.99554,38.812739


#### Pull Outcome Data

In [8]:
print('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))  
rats = pd.read_sql('''
select 
sr_history_id
, service_request_id__c
, servicecode
, servicecodedescription
, serviceorderdate
, serviceorderstatus
, inspectiondate
, resolution
, resolutiondate
, servicenotes
, ward
, longitude
, latitude
from EDW_CIRM.VW_SERVICE_REQUESTS_DC_MSSQL
where serviceorderdate >= TO_DATE('2015-08-01', 'yyyy-mm-dd')
and resolutiondate < TO_DATE(\''''+date+'''\', 'yyyy-mm-dd')
and (servicecodedescription = 'Rat Abatement'
or servicecodedescription = 'Rodent Inspection and Treatment')
and longitude is not null and latitude is not null
and cast(longitude as float) < 0 and cast(latitude as float) > 0
''', dcsedwp)
print('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 

Started pulling data at Wed, 11 Jul 2018 20:16:47 +0000
Finished pulling data at Wed, 11 Jul 2018 20:17:01 +0000


In [9]:
def outcome(x):
    if x is None:
        return None
    elif 'baited' in x.lower():
        return 1
    elif 'treatment' in x.lower():
        return 1
    elif 'treated' in x.lower():
        return 1
    elif 'found rat burrows' in x.lower():
        return 1
    elif 'found mice' in x.lower():
        return 0
    elif 'found mice in home' in x.lower():
        return 0
    elif 'no evidence' in x.lower():
        return 0
    elif 'no rat' in x.lower():
        return 0
    elif 'no rats' in x.lower():
        return 0
    elif 'no rat burrow' in x.lower():
        return 0
    elif 'no rat burrows' in x.lower():
        return 0
    elif 'no rat burrows' in x.lower():
        return 0
    elif 'no rats burrows' in x.lower():
        return 0
    elif 'no rat holes' in x.lower():
        return 0
    elif 'no burrows' in x.lower():
        return 0
    elif 'no rat activity' in x.lower():
        return 0
    elif 'no rodent activity' in x.lower():
        return 0
    elif 'no activity' in x.lower():
        return 0
    elif 'no visible' in x.lower():
        return 0
    elif 'gate locked' in x.lower():
        return 0
    elif 'fenced and no permission' in x.lower():
        return 0
    elif 'no access' in x.lower():
        return 0

In [10]:
rats['week'] = pd.to_datetime(rats.SERVICEORDERDATE).dt.week
rats['year'] = pd.to_datetime(rats.SERVICEORDERDATE).dt.year
rats['month'] = pd.to_datetime(rats.SERVICEORDERDATE).dt.month

In [11]:
rats['calls'] = 1
rats['activity'] = rats['SERVICENOTES'].apply(outcome)
closed = rats[rats['SERVICEORDERSTATUS']=='Closed']
closed = closed[closed.SERVICENOTES.isnull()==False]

In [12]:
closed[pd.to_datetime(closed.SERVICEORDERDATE) < pd.to_datetime('2017-9-01')].activity.mean()

0.45542744656917883

## Match 311 Rodent Data to Census Blocks

In [13]:
## Create geometry for 311 data
geometry = [Point(xy) for xy in zip(closed.LONGITUDE.apply(float), closed.LATITUDE.apply(float))]
crs = {'init': 'epsg:4326'}
points = gpd.GeoDataFrame(closed, crs=crs, geometry=geometry)

## Join rats data to Census blocks
geo_rats = gpd.sjoin(blocks, points, how='left', op='intersects')

In [15]:
geo_rats.set_index('GEOID').to_csv('data/rats_to_blocks.csv.gz', compression = 'gzip')