In [2]:
import pandas as pd # version '0.18.1'
from datetime import datetime,timedelta
import time
import ast

This notebook assignes complaints a binary value based on whether there was an AHV permit nearby. The way this is done is by iterating through each of the permits for a given complaint and checking if there is a permit within 300 metres and is issued for the time of the complaint.

In [3]:
complaints19 = pd.read_csv('2019_week.csv')

In [14]:
complaints19.shape

(833, 41)

In [17]:
complaints19['Created Date'] = pd.to_datetime(complaints19['Created Date'])

In [18]:
ahv_2019 = pd.read_csv('ahv_data_with_geo_20190622_20190630.csv')

In [19]:
ahv_2019.shape

(4540, 15)

In [20]:
ahv_2019.columns

Index(['ahv_id', 'timestamp_utc', 'building_identification_number', 'status',
       'residence_within_200_feet', 'enclosed_building_work',
       'full_or_partial_demolition', 'crane_use', 'requested_for_date_ranges',
       'apply_reason', 'work_description', 'the_geom', 'house_number',
       'street_name', 'zip_code'],
      dtype='object')

In [21]:
ahv_2019['timestamp_utc'] = pd.to_datetime(ahv_2019['timestamp_utc'])

In [22]:
ahv_2019.dropna().sort_values('timestamp_utc',ascending=False).head()

Unnamed: 0,ahv_id,timestamp_utc,building_identification_number,status,residence_within_200_feet,enclosed_building_work,full_or_partial_demolition,crane_use,requested_for_date_ranges,apply_reason,work_description,the_geom,house_number,street_name,zip_code
4531,895400,2019-07-02 00:15:23.180243,1086107.0,PENDING DOB REVIEW,True,True,False,False,"{""07/06/2019"": [""9:00 AM"", ""5:00 PM""], ""07/07/...",CONSTRUCTION ACTIVITIES WITH MINIMAL NOISE IMPACT,"GENERAL CONSTRUCTION, MECHANICAL, PLUMBING AND...",POINT (-73.9733516657426 40.79044104921986),203,W 90 ST,10024.0
4530,895400,2019-07-02 00:15:23.180243,1086107.0,PENDING DOB REVIEW,True,True,False,False,"{""07/06/2019"": [""9:00 AM"", ""5:00 PM""], ""07/07/...",CONSTRUCTION ACTIVITIES WITH MINIMAL NOISE IMPACT,"GENERAL CONSTRUCTION, MECHANICAL, PLUMBING AND...",POINT (-73.97362627275918 40.7905563800375),201,W 90 ST,10024.0
4529,895400,2019-07-02 00:15:23.180243,1086107.0,PENDING DOB REVIEW,True,True,False,False,"{""07/06/2019"": [""9:00 AM"", ""5:00 PM""], ""07/07/...",CONSTRUCTION ACTIVITIES WITH MINIMAL NOISE IMPACT,"GENERAL CONSTRUCTION, MECHANICAL, PLUMBING AND...",POINT (-73.97313410813321 40.790576314930966),622,AMSTERDAM AVE,10024.0
4528,895400,2019-07-02 00:15:23.180243,1086107.0,PENDING DOB REVIEW,True,True,False,False,"{""07/06/2019"": [""9:00 AM"", ""5:00 PM""], ""07/07/...",CONSTRUCTION ACTIVITIES WITH MINIMAL NOISE IMPACT,"GENERAL CONSTRUCTION, MECHANICAL, PLUMBING AND...",POINT (-73.97321602091303 40.79046365460613),620,AMSTERDAM AVE,10024.0
4527,895399,2019-07-02 00:15:22.828550,1035421.0,PENDING DOB REVIEW,True,False,True,False,"{""07/03/2019"": [""6:00 PM"", ""7:00 AM""], ""07/04/...",PUBLIC SAFETY,INTERIOR DEMOLITION / DEMOLITION WASTE LOAD OUT,POINT (-73.97658179551124 40.75623043429942),401,MADISON AVE,10017.0


In [23]:
def filtered_time(complaint, ahv_2019):
    ''' 
    This function returns the indexes of all the AHV permits
    that the complaint lies within.
    Input : Single complaint, dataframe of AHV permits
    Output : AHV permit indexes
    '''
    column_list = []
    for index , x in ahv_2019.dropna().iterrows():
        for i in ast.literal_eval(x.requested_for_date_ranges).keys():
            start_time = pd.to_datetime(str(pd.to_datetime(i))+' '+\
                                        str(datetime.strptime(ast.literal_eval(x.requested_for_date_ranges)[i][0], '%I:%M %p'))[11:])
            if ("PM" in ast.literal_eval(x.requested_for_date_ranges)[i][0]) & \
        ("AM" in ast.literal_eval(x.requested_for_date_ranges)[i][1]):
                end_time = pd.to_datetime(str(pd.to_datetime(i)+timedelta(days=1))+' '+\
                                          str(datetime.strptime(ast.literal_eval(x.requested_for_date_ranges)[i][1],\
                                                                '%I:%M %p'))[11:])
            else:
                end_time = pd.to_datetime(str(pd.to_datetime(i))+' '+\
                                          str(datetime.strptime(ast.literal_eval(x.requested_for_date_ranges)[i][1], '%I:%M %p'))[11:])
            if ((complaint['Created Date']>start_time)&(complaint['Created Date']<end_time)):
                column_list.append(index)
    return(column_list)

In [24]:
def return_dist(complaint_lat,complaint_lon,ahv_lat,ahv_lon):
    ''' 
    This function returns the distance between two points
    Input : Latitude1, Longitude1, Latitude2, Longitude2
    Output : Distance in Metres
    '''
    from math import sin, cos, sqrt, atan2, radians
    R = 6373.0
    lat1 = radians(complaint_lat)
    lon1 = radians(complaint_lon)
    lat2 = radians(ahv_lat)
    lon2 = radians(ahv_lon)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return(distance*1000)

In [25]:
def filterdistance(complaint, ahv_df):
       ''' 
       This function returns the indexes of all the AHV permits
       that the complaint lies within.
       Input : Single complaint, dataframe of AHV permits
       Output : AHV permit indexes
       '''
    ahv_list = []
    for index,i in ahv_df.iterrows(): #for each ahv record
        ahv_lon = float(i.the_geom.replace('POINT (','').replace(')','').split(' ')[0])
        ahv_lat = float(i.the_geom.replace('POINT (','').replace(')','').split(' ')[1])
        if return_dist(float(complaint.Latitude), float(complaint.Longitude), ahv_lat, ahv_lon)<200: #if distance is less than 200m
            ahv_list.append(index)
#             ahv_within_radius.append(list(i))

    return (ahv_list)

In [None]:
complaints19['AHV'] = 0
start= time.time()
for index,i in complaints19.iterrows():
    time_indices = []
    space_indices = []
    time_indices = filtered_time(i,ahv_2019)
    #print (time_indices)
    ahv_final = ahv_2019.ix[time_indices]
    space_indices = filterdistance(i,ahv_final)
    if len(space_indices)>0:
        complaints19.AHV.loc[index] = 1

print("Time to run: {} seconds".format(time.time()-start))

In [31]:
complaints19.to_csv('ahv_assigned_files/ahv_assigned_complaints1.csv')