In [1]:
import time
import pandas as pd
import numpy as np

import geopandas as gpd
from shapely.geometry import Point, Polygon

import warnings
warnings.filterwarnings('ignore')

import yaml
with open('config.yml') as f:
    config = yaml.load(f)

dwp = config['dcsedwp']
dcgis = config['dcgisprd']

import datetime as dt     
date = dt.datetime.today().strftime("%Y/%m/%d")
print(date)

2018/07/11


#### Connect to DC Data Warehouse 

In [2]:
import cx_Oracle
dsn_tns = cx_Oracle.makedsn(dwp['host'], dwp['port'], service_name=dwp['service_name'])
dcsedwp = cx_Oracle.connect(dwp['username'], dwp['password'], dsn_tns)

#### Connect to DC GIS

In [3]:
import cx_Oracle
dsn_tns = cx_Oracle.makedsn(dcgis['host'], dcgis['port'], service_name=dcgis['service_name'])
dcgisprd = cx_Oracle.connect(dcgis['username'], dcgis['password'], dsn_tns)

## Pull Census Block Data

In [4]:
blocks = gpd.read_file('data/Census_Blocks__2010.geojson')
blocks = blocks[['GEOID', 'BLOCK', 'BLKGRP', 'P0010001', 'SqMiles', 'ACRES', 'geometry']]

In [5]:
blks = blocks[['GEOID', 'P0010001', 'SqMiles']].set_index('GEOID')
blks['pop_density'] = blks['P0010001']*1.0/blks['SqMiles']
blks['tot_pop'] = blks['P0010001']
blks = blks.drop(['P0010001', 'SqMiles'], axis=1)

## DCRA Data

### Pull DCRA BBL Data

In [6]:
print('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))
bbl = pd.read_sql('''
select 
    OBJECTID,
    BBL_LICENSE_FACT_ID,
    LICENSESTATUS,
    LICENSECATEGORY,
    LICENSE_START_DATE,
    LICENSE_EXPIRATION_DATE,
    LICENSE_ISSUE_DATE,
    LASTMODIFIEDDATE,
    SITEADDRESS,
    LATITUDE,
    LONGITUDE,
    XCOORD,
    YCOORD,
    ZIPCODE,
    MARADDRESSREPOSITORYID
from DCRA_DATA.BASICBUSINESSLICENSEPT
where license_start_date <= TO_DATE(\''''+date+'''\', 'YYYY/MM/DD')
and license_start_date IS NOT NULL
''', dcgisprd)
print('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))

Started pulling data at Wed, 11 Jul 2018 21:18:40 +0000
Finished pulling data at Wed, 11 Jul 2018 21:18:53 +0000


In [7]:
licenses = {
'Ambulance':'other'
,'Apartment':'multifamily_rental'
,'Asbestos Business':'other'
,'Athletic Exhibition':'other'
,'Auction Sale':'other'
,'Auction Sale DPW MPD':'other'
,'Auction Sales Temporary':'other'
,'Auctioneer':'other'
,'Auctioneer DC':'other'
,'Auctioneer Temporary':'other'
,'Auto Rental':'other'
,'Auto Wash':'water'
,'Automobile Repossessor':'other'
,'Automobile Repossessor - Bus':'other'
,'Bakery':'food'
,'Barber Chair':'other'
,'Barber Shop':'other'
,'Beauty Booth':'other'
,'Beauty Shop':'other'
,'Beauty Shop Braiding':'other'
,'Beauty Shop Electrology':'other'
,'Beauty Shop Esthetics':'other'
,'Beauty Shop Nails':'other'
,'Bed and Breakfast':'hotel'
,'Billiard Parlor':'other'
,'Boarding House':'hotel'
,'Bowling Alley':'other'
,'Bulk Fuel Above Ground Tank':'other'
,'Bulk Fuel Storage Plant':'other'
,'Candy Manufacturing':'food'
,'Carnival and Fair':'other'
,'Caterers':'food'
,'Charitable Exempt':'other'
,'Charitable Solicitation':'other'
,'Cigarette Retail':'other'
,'Cigarette Wholesale':'other'
,'Circus':'animals'
,'Commission Merchant Food':'food'
,'Consumer Goods (Auto Repair)':'other'
,'Consumer Goods (Elect Repair)':'other'
,'Cooperative Association':'other'
,'Delicatessen':'food'
,'Driving School':'other'
,'Dry Cleaners':'other'
,'Employer Paid Personnel Serv':'other'
,'Employment Agency':'other'
,'Employment Counseling':'other'
,'Fireworks Sales':'other'
,'Fireworks Sales Temp':'other'
,'Fireworks Wholesale':'other'
,'Food Products':'food'
,'Food Vending Machine':'food'
,'Funeral Establishment':'other'
,'Gasoline Dealer':'gas'
,'Gen Contr-Construction Mngr':'other'
,'General Business Licenses':'other'
,'Grocery Store':'food'
,'Health Spa':'other'
,'Health Spa Sales':'other'
,'Home Improvement Contractor':'other'
,'Home Improvement Salesman':'other'
,'Horse Drawn Carriage Trade':'animals'
,'Hotel':'hotel'
,'Ice Cream Manufacture':'food'
,'Inn And Motel':'hotel'
,'Kerosene':'other'
,'Marine Food Product Wholesale':'food'
,'Marine Food Retail':'food'
,'Massage Establishment':'other'
,'Mattress Sale':'other'
,'Mattress Storage':'other'
,'Mechanical Amusement Machine':'other'
,'Mobile Delicatessen':'other'
,'Motion Picture Theatre':'other'
,'Motor Vehicle Dealer':'other'
,'Motor Vehicle Reinsp. Station':'other'
,'Motor Vehicle Reinspector':'other'
,'Motor Vehicle Salesman':'other'
,'Moving And Storage':'storage'
,'New and Used Tire Dealer':'other'
,'One Family Rental':'single_family_rental'
,'Outdoor Signs':'other'
,'Parking Facility':'parking'
,'Parking Facility Attendant':'parking'
,'Patent Medicine':'other'
,'Pawnbrokers':'other'
,'Pesticide Applicator':'other'
,'Pesticide Operator':'other'
,'Pesticide Public Applicator':'other'
,'Pesticide Public Operator':'other'
,'Pet Shop':'animals'
,'Power Laundry':'other'
,'Public Hall':'other'
,'Public School Cafeteria':'food'
,'Pyroxylin':'other'
,'Restaurant':'restaurant'
,'Rooming House':'hotel'
,'Secondhand Dealers (A)':'other'
,'Secondhand Dealers (B)':'other'
,'Secondhand Dealers (C)':'other'
,'Secondhand Dealers (Temp)':'other'
,'Security Agency (Firm)':'other'
,'Security Agent (Person)':'other'
,'Security Alarm Agent':'other'
,'Security Alarm Dealer':'other'
,'Solicitor':'other'
,'Solid Waste Collection':'waste'
,'Solid Waste Vehicle':'waste'
,'Solvent Sales':'other'
,'Special Events':'events'
,'Swimming Pool':'pool'
,'Swimming Pool DC':'pool'
,'Theater (Live)':'other'
,'Tow Truck':'other'
,'Tow Truck Business':'other'
,'Tow Truck Storage Lot':'other'
,'Two Family Rental':'two_family_rental'
,'Used Car Buyer Seller':'other'
,'Used Car Lot':'other'
,'Used Car Seller Only':'other'
,'Valet Parking':'other'
,'Varsol Sales':'other'
}

In [8]:
bbl['license'] = bbl['LICENSECATEGORY'].map(licenses)
license_dummies = pd.get_dummies(bbl.license, prefix='bbl').set_index(bbl.index)
bbl = bbl.merge(license_dummies, how='left', left_index=True, right_index=True)
bbl['bbl'] = 1

In [9]:
print(len(bbl))
bbl = bbl.drop_duplicates(['BBL_LICENSE_FACT_ID'])
print(len(bbl))

122143
122143


In [10]:
## Create week, month, and year columns for dataframe
bbl['license_start_date'] = pd.to_datetime(bbl.LICENSE_START_DATE)
bbl['license_exp_date'] = pd.to_datetime(bbl.LICENSE_EXPIRATION_DATE)

In [11]:
## Create geometry for BBL
geometry = [Point(xy) for xy in zip(bbl.LONGITUDE.apply(float), bbl.LATITUDE.apply(float))]
crs = {'init': 'epsg:4326'}
points = gpd.GeoDataFrame(bbl, crs=crs, geometry=geometry)

In [12]:
## Spatial join points to Census block polygons
print('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))
points = points.reset_index()
geo_bbl = gpd.sjoin(blocks, points, how='left', op='intersects')
geo_bbl = geo_bbl[geo_bbl.license_start_date.notnull()]
print('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 

Started spatial join at Wed, 11 Jul 2018 21:18:55 +0000
Finished spatial join at Wed, 11 Jul 2018 21:19:02 +0000


In [13]:
geo_bbl.to_csv('data/bbls_to_blocks.csv.gz', compression = 'gzip')

### Pull DCRA Building Permits

In [14]:
print('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))
df2 = pd.read_sql('''
select 
    OBJECTID,
    DCRAINTERNALNUMBER,
    ISSUE_DATE,
    PERMIT_ID,
    PERMIT_TYPE_NAME,
    PERMIT_SUBTYPE_NAME,
    PERMIT_CATEGORY_NAME,
    APPLICATION_STATUS_NAME,
    FULL_ADDRESS,
    ZONING,
    LATITUDE,
    LONGITUDE
from DCRA_DATA.BUILDINGPERMITPT
where ISSUE_DATE >= TO_DATE('2015-08-01', 'yyyy-mm-dd')
and ISSUE_DATE < TO_DATE(\''''+date+'''\', 'yyyy-mm-dd')
''', dcgisprd)
print('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))

Started pulling data at Wed, 11 Jul 2018 21:19:44 +0000
Finished pulling data at Wed, 11 Jul 2018 21:19:59 +0000


In [15]:
print(df2.PERMIT_ID.value_counts().head())
df2 = df2.drop_duplicates(['PERMIT_ID', 'ISSUE_DATE'])
print(df2.PERMIT_ID.value_counts().head())

D1800361    1
E1705731    1
E1806349    1
P1803775    1
P1806342    1
Name: PERMIT_ID, dtype: int64
D1800361    1
E1705731    1
E1806349    1
P1803775    1
P1806342    1
Name: PERMIT_ID, dtype: int64


In [16]:
permits = df2.loc[df2.PERMIT_TYPE_NAME=='CONSTRUCTION']
permits = permits.set_index(np.arange(0, permits.shape[0]))
permits['issue_date'] = permits['ISSUE_DATE']
permits['dcrapermit_new_building'] = 1*(permits.PERMIT_SUBTYPE_NAME=='NEW BUILDING')
permits['dcrapermit_demolition'] = 1*(permits.PERMIT_SUBTYPE_NAME=='DEMOLITION')
permits['dcrapermit_raze'] = 1*(permits.PERMIT_SUBTYPE_NAME=='RAZE')
permits['dcrapermit_addition'] = 1*(permits.PERMIT_SUBTYPE_NAME=='ADDITION')
permits['dcrapermit_retain_wall'] = 1*(permits.PERMIT_SUBTYPE_NAME=='RETAINING WALL')
permits['dcrapermit_excavation'] = 1*(permits.PERMIT_SUBTYPE_NAME=='EXCAVATION ONLY')
permits['dcrapermit_pool'] = 1*(permits.PERMIT_SUBTYPE_NAME=='SWIMMING POOL')
permits['dcrapermit_garage'] = 1*(permits.PERMIT_SUBTYPE_NAME=='GARAGE')

In [17]:
## Create geometry for building permits
geometry = [Point(xy) for xy in zip(permits.LONGITUDE.apply(float), permits.LATITUDE.apply(float))]
crs = {'init': 'epsg:4326'}
points = gpd.GeoDataFrame(permits, crs=crs, geometry=geometry)

## Spatial join points to Census block polygons
print('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()))
points = points.reset_index()
geo_permits = gpd.sjoin(blocks, points, how='left', op='intersects')
geo_permits = geo_permits[geo_permits.issue_date.notnull()]
print('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 

Started spatial join at Wed, 11 Jul 2018 21:20:00 +0000
Finished spatial join at Wed, 11 Jul 2018 21:20:03 +0000


In [18]:
geo_permits.to_csv('data/permits_to_blocks.csv.gz', compression = 'gzip')