# RFR - 2018 Data Only

In [1]:
import os
import pandas as pd
import numpy as np
import datetime
import urllib
import requests
from sodapy import Socrata
import geopandas as gp
import matplotlib.pyplot as plt

import shapely
import geopandas as gp
from geopandas import GeoDataFrame
from fiona.crs import from_epsg
from shapely.geometry import Point, MultiPoint
import io
from geopandas.tools import sjoin
from shapely.ops import nearest_points


import warnings
warnings.filterwarnings(action = 'once')

### Read in Zoning

In [2]:
zoning = gp.GeoDataFrame.from_file('nyczoning/nyzd.shp')
zoning.to_crs(epsg=4326, inplace=True)

In [3]:
def simplifying_zone(x):
    if x in ['PLAYGROUND','PARK','PUBLIC PLACE','BALL FIELD' ,'BPC']:
        return 'P'
    if '/' in x:
        return 'O'
    if x[:3] == 'R10':
        return x[:3]
    else:
        return x[:2]

In [4]:
zoning['simple_zone'] = zoning['ZONEDIST'].map(simplifying_zone)


In [5]:
zoning.drop(columns=['ZONEDIST','Shape_Leng','Shape_Area'],inplace=True)

### Read in DOB Permits & Census Block Groups

In [6]:
constr_cbg = gp.GeoDataFrame.from_file('Data/permits_blkgrp/permits.shp')
constr_cbg.to_crs(epsg=4326, inplace=True)


### Read in Census Block Group Shapefile

In [7]:
cbg = gp.GeoDataFrame.from_file('Data/nyc_census_clipped/nyc_census_clipped.shp')

In [8]:
cbg.drop(columns=['OBJECTID', 'STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE','NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT',
       'INTPTLON','GEOID_1', 'B02001e1', 'B02001m1', 'B02001e2', 'B02001m2', 'B02001e3',
       'B02001m3', 'B02001e4', 'B02001m4', 'B02001e5', 'B02001m5', 'B02001e6',
       'B02001m6', 'B02001e7', 'B02001m7', 'B02001e8', 'B02001m8', 'B02001e9',
       'B02001m9', 'B02001e10', 'B02001m10', 'B02008e1', 'B02008m1',
       'B02009e1', 'B02009m1', 'B02010e1', 'B02010m1', 'B02011e1', 'B02011m1',
       'B02012e1', 'B02012m1', 'B02013e1', 'B02013m1', 'C02003e1', 'C02003m1',
       'C02003e2', 'C02003m2', 'C02003e3', 'C02003m3', 'C02003e4', 'C02003m4',
       'C02003e5', 'C02003m5', 'C02003e6', 'C02003m6', 'C02003e7', 'C02003m7',
       'C02003e8', 'C02003m8', 'C02003e9', 'C02003m9', 'C02003e10',
       'C02003m10', 'C02003e11', 'C02003m11', 'C02003e12', 'C02003m12',
       'C02003e13', 'C02003m13', 'C02003e14', 'C02003m14', 'C02003e15',
       'C02003m15', 'C02003e16', 'C02003m16', 'C02003e17', 'C02003m17',
       'C02003e18', 'C02003m18', 'C02003e19', 'C02003m19', 'OBJECTID_1'],inplace=True)

In [9]:
cbg = cbg.merge(constr_cbg.groupby('GEOID_Data')['total_popu', 'Count_', 'GEOID', 'per_capita',
       'white_po_1', 'african__1', 'asian_po_1', 'native_h_1', 'american_1'].mean().reset_index())

In [10]:
cbg.drop(columns=['GEOID','Shape_Leng','Shape_Area'],inplace=True)

### Construction Permits

In [12]:
constr_cbg = constr_cbg[(constr_cbg.Job_Type == 'NB') | 
                        (constr_cbg.Job_Type == 'A1') | 
                        (constr_cbg.Job_Type == 'DM')][[ 'Job_Type', 
                        'Work_Type', 'Permit_Typ','Issuance_D', 'Expiration', 'Job_Start_', 'JobStartDa',
       'ExpireDate',  'IssueDate', 'ExpireYear', 'GEOID_Data']]


### Weather

In [8]:
weather = pd.read_csv('Data/2018weather.csv', usecols = ['Date','Avg_T','PRECIP',
       'NEW_SNOW'])

In [9]:
def clean_precip(x):
    try:
        return float(x)
    except:
        return 0

In [10]:
weather['Date']=pd.to_datetime(weather['Date'])
weather['Date'] = weather['Date'].apply(lambda x: x.date())
weather['PRECIP'] = weather['PRECIP'].apply(clean_precip)
weather['NEW_SNOW'] = weather['NEW_SNOW'].apply(clean_precip)
weather.head()

Unnamed: 0,Date,Avg_T,PRECIP,NEW_SNOW
0,2018-01-01,13.0,0.0,0.0
1,2018-01-02,19.5,0.0,0.0
2,2018-01-03,23.0,0.0,0.0
3,2018-01-04,24.0,0.76,9.8
4,2018-01-05,14.0,0.0,0.0
5,2018-01-06,9.5,0.0,0.0
6,2018-01-07,11.5,0.0,0.0
7,2018-01-08,24.0,0.01,0.0
8,2018-01-09,37.0,0.0,0.0
9,2018-01-10,36.5,0.0,0.0


### Read in Pluto - Census Merged Shapefile

### Complaints

In [15]:
complaints = pd.read_csv('Data/311DEPcomplaints.csv', usecols=['address_type','borough','city',
       'closed_date', 'community_board','created_date',
       'cross_street_1', 'cross_street_2', 'descriptor', 'due_date',
       'facility_type', 'incident_address', 'incident_zip',
       'intersection_street_1', 'intersection_street_2', 'latitude',
       'location_type', 'longitude', 'resolution_action_updated_date',
       'resolution_description', 'status', 'street_name', 'unique_key'])

  interactivity=interactivity, compiler=compiler, result=result)


complaints.dropna(subset=['longitude', 'latitude'],inplace=True)
complaints['createdate'] = pd.to_datetime(complaints['created_date'])
complaints = complaints[(complaints['createdate'] >= pd.Timestamp(2018,1,1)) &
                        (complaints['createdate'] < pd.Timestamp(2019,1,1))]

In [16]:
complaints.dropna(subset=['longitude', 'latitude'],inplace=True)
complaints['createdate'] = pd.to_datetime(complaints['created_date'])
complaints = complaints[complaints.createdate >= datetime.datetime(2018,1,1)]
complaints = complaints[complaints.createdate < datetime.datetime(2019,1,1)]
complaints['lonlat']=list(zip(complaints.longitude.astype(float), complaints.latitude.astype(float)))
complaints['geometry']=complaints[['lonlat']].applymap(lambda x:shapely.geometry.Point(x))
crs = {'init':'epsg:4326', 'no_defs': True}
complaints = gp.GeoDataFrame(complaints, crs=crs, geometry=complaints['geometry'])


In [17]:
complaints.drop(columns=['address_type', 'borough', 'city', 'closed_date', 'community_board',
       'created_date', 'cross_street_1', 'cross_street_2','due_date', 'facility_type', 'incident_address', 'incident_zip',
       'intersection_street_1', 'intersection_street_2', 'latitude',
       'location_type', 'longitude', 'resolution_action_updated_date',
                         'status', 'street_name', 'unique_key'],inplace=True)



In [18]:
def TOD_shifts(x):
    if x.hour <=6:
        return 'M'
    if x.hour >6 & x.hour<19:
        return 'D'
    if x.hour >=19 :
        return 'N'

In [19]:
complaints['Date'] =  complaints.createdate.apply(lambda x: x.date())

In [20]:
complaints['TOD'] = complaints.createdate.apply(TOD_shifts)

## JOIN DATA

In [21]:
complaints = sjoin(complaints,zoning).drop(columns='index_right')

In [22]:
complaints = sjoin(complaints, cbg).drop(columns=['createdate','resolution_description',
                                                 'lonlat','index_right'])

  warn('CRS of frames being joined does not match!')


In [23]:
complaints.rename(columns={'day':'Date'}, inplace=True)

In [24]:
complaints = complaints.merge(weather, on='Date')

In [27]:
zones_cbg = pd.DataFrame(complaints.groupby(['GEOID_Data','Date'])['simple_zone'].agg(pd.Series.mode))


In [28]:
import numpy
def takeOne(x):
    if type(x) == numpy.ndarray:
        return x[0]
    else:
        return x

In [29]:
zones_cbg['simple_zone'] = zones_cbg['simple_zone'].apply(takeOne)

In [30]:
complaints = complaints.groupby(['GEOID_Data','Date']).sum()

In [33]:
complaints = complaints.merge(zones_cbg, on=['GEOID_Data','Date'])

In [None]:
complaints = pd.get_dummies(complaints,'simple_zone')

In [36]:
complaints['constr_permits'] = 0

In [49]:
constr_cbg['JobStartDa'] = pd.to_datetime(constr_cbg['JobStartDa'])
constr_cbg['ExpireDate'] = pd.to_datetime(constr_cbg['ExpireDate'])


In [58]:
complaints.index[1]

('15000US360050002001', Timestamp('2018-05-11 00:00:00'))

In [61]:
complaints['constr_permits'][complaints.index[2][0]][complaints.index[2][1]]

0