In [54]:
import pandas as pd
import numpy as np
import feather
import re
pd.options.display.max_columns = None

# Load and Clean the Data

## Ticket Data for 2020 and 2021

- Load preprocessed files from feather

In [3]:
ticket20 = feather.read_dataframe('ticket_2020.feather')
# ticket19 = feather.read_dataframe('ticket_2019.feather')

### Peek at the Data

In [4]:
ticket20.sample(5)

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation
7088245,8785984115,HBG2517,NY,PAS,12/20/2019 12:00:00 AM,21,SUBN,NISSA,T,42420,26190,26220,20210502,40.0,40,40,352093,T201,O,1002A,,BX,F,652.0,Jackson Ave,,0,408,D1,,Y Y,0930A,1100A,BK,,2013,,0,J,,,,
1594373,4661501901,HCC9055,NY,PAS,08/05/2019 12:00:00 AM,36,SUBN,NISSA,V,0,0,0,0,,0,0,0,,,0800P,,BK,,,WB FLATLANDS AVE @ E,77TH ST,0,1180,B,T,,,,WH,,2019,,0,,PHTO SCHOOL ZN SPEED VIOLATION,,,
10252271,4680877303,JEB8356,NY,PAS,03/11/2020 12:00:00 AM,36,SUBN,SUBAR,V,0,0,0,0,,0,0,0,,,1253P,,QN,,,EB HILLSIDE AVE @ 25,1ST ST,0,1180,B,T,,,,RD,,2013,,0,,PHTO SCHOOL ZN SPEED VIOLATION,,,
6787618,1411268106,JHD2442,NY,PAS,12/12/2019 12:00:00 AM,14,SUBN,LINCO,P,45050,47568,40602,20210501,120.0,120,120,957382,0120,0000,0215P,,R,F,55.0,RICHMOND TERRACE,,0,408,F2,,BBBBBBB,ALL,ALL,,0.0,0,-,0,,,,,
5690992,8738373610,HKM6825,NY,OMS,11/12/2019 12:00:00 AM,14,2DSD,SMART,T,53970,58730,18950,20200930,90.0,90,90,356345,T301,X,0706A,,K,F,274.0,Keap St,,0,408,E3,,SCHOOL,0700A,0400P,WH,,2016,,0,SPCL,,,,


In [5]:
ticket20.shape

(12495734, 43)

In [43]:
# ticket19.sample(5)

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation
5815938,8708361654,2350756,IN,PAS,12/05/2018,47,DELV,FRUEH,T,10810,34110,34130,88880088,13.0,13,13,368490,T102,N,1246P,,NY,F,151,8th Ave,,0,408,C4,,YYYYYY,0700A,0700P,WHITE,,0,,0,7,,,,
5274120,8692217074,HXA3576,NY,PAS,12/26/2018,74,SUBN,FORD,T,24590,0,0,20200227,103.0,103,103,362232,T402,F,0631P,,Q,I,W,170th St,20ft S/of 93rd Ave,0,408,I4,,YYYYYYY,,,BK,,2005,,0,43,,,,
5201912,8710262763,HFS5209,NY,PAS,12/09/2018,14,CONV,CADIL,T,35070,0,0,20200612,20.0,20,20,356225,T103,L,1222P,,NY,I,N,W 65th St,5ft E/of Riverside B,0,408,C,,YYYYYYY,,,BK,,2008,,0,21,,,,
7859406,8660572609,HAE9471,NY,PAS,03/25/2019,71,4DSD,FORD,T,67730,14880,62930,20191027,61.0,61,61,367955,T302,G,1214P,,K,F,2601,Ocean Pky,,0,408,E2,,YYYYYYY,,,BK,,0,,0,41,,,,
3432713,8580856930,BLANKPLATE,99,999,10/11/2018,74,MCY,YAMAH,T,53030,55530,82930,88880088,81.0,81,81,346348,T730,B,0914A,,K,F,579,Jefferson Ave,,0,408,E3,,YYYYYYY,,,RED,,0,,0,1,,,,


In [54]:
# ticket19.shape

(11467506, 43)

### Step 1 - Drop Missing/Blank Street Codes (for now)

Since we plan to use street codes to get a location of each ticket, we drop every ticket with no street code in Street Code1, Street Code2, Street Code3 (note: these represent ~30% of the 12mn rows - not an insignificant amount of data - potentially can group by Violation Location/Precinct perhaps?)

In [147]:
ticket20_clean=ticket20[(ticket20['Street Code1']!=0) | (ticket20['Street Code2']!=0) | (ticket20['Street Code3']!=0)].copy()

### Step 2 - Add Leading Zeros Back to Street Codes - ensure 5 digit

In [148]:
ticket20_clean['Street Code1'] = ticket20_clean['Street Code1'].apply('{0:0>5}'.format)
ticket20_clean['Street Code2'] = ticket20_clean['Street Code2'].apply('{0:0>5}'.format)
ticket20_clean['Street Code3'] = ticket20_clean['Street Code3'].apply('{0:0>5}'.format)

In [149]:
ticket20_clean.head()

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation
0,1477633194,J58JKX,NJ,PAS,05/08/1972 12:00:00 AM,16,SDN,HONDA,P,8730,5130,5280,0,72.0,72,504,342924,T504,0,0523P,,K,F,270,43 ST,,0,408,E2,,YYYYYBB,0800A,0400P,BK,0.0,0,-,0,,,,,
1,1449715424,KRE6058,PA,PAS,08/29/1977 12:00:00 AM,98,SUBN,ME/BE,P,86530,71800,73110,0,77.0,77,77,961115,0077,0,0428P,,K,F,1953,UNION ST,,0,408,F1,,BBBBBBB,ALL,ALL,BLK,0.0,0,-,0,,,,,
2,1455779155,444326R,NJ,PAS,10/03/1988 12:00:00 AM,20,SDN,LEXUS,P,27030,41330,69230,0,88.0,88,730,535422,T730,0,0625A,,K,O,45,CLERMONT AVENUE,,0,408,D,,BBBBBBB,ALL,ALL,BLACK,0.0,0,-,0,,,,,
3,1458800908,F728330,OH,PAS,01/03/1990 12:00:00 AM,21,SDN,CHEVR,P,33030,93630,58730,0,90.0,90,301,355074,T301,0,1106A,,K,F,218,DIVISION AVE,,0,408,C,,BYBBYBB,1100A,1230P,,0.0,0,-,0,,,,,
4,1466038676,FMY9090,NY,PAS,02/14/1990 12:00:00 AM,21,SUBN,JEEP,S,45130,23930,68130,20210915,90.0,90,0,668676,KNBO,0,1253A,,K,F,850,GRAND ST,,0,408,D1,,BYBBYBB,1200A,0300A,GREY,0.0,2015,-,0,,,,,


In [150]:
ticket20_clean['Violation County'].unique()

array(['K', 'Q', 'BX', None, 'NY', 'R', 'BK', 'QN', 'MN', 'K   F', 'ST',
       'QUEEN', 'KINGS', 'QNS'], dtype=object)

### Step 3 - Map Violation County to Borough Code

Code Borough InTicket

- 1 Manhattan NY/MN
- 2 Bronx BX
- 3 Brooklyn K/BK/KINGS
- 4 Queens Q/QN/QUEEN/QNS
- 5 Staten Island R/ST
- 6 Nassau County
- 7 Westchester
- 8 New Jersey

Drop the 'K   F' - only one data point

In [151]:
# ticket20_clean[ticket20_clean['Violation County']=='K   F']
ticket20_clean.drop([94098],inplace=True)

In [152]:
ticket20_clean['Violation County'].unique()

array(['K', 'Q', 'BX', None, 'NY', 'R', 'BK', 'QN', 'MN', 'ST', 'QUEEN',
       'KINGS', 'QNS'], dtype=object)

In [153]:
boro_dict = {'NY':'1',
             'MN':'1',
             'BX':'2',
             'K':'3',
             'BK':'3',
             'KINGS':'3',
             'Q':'4',
             'QN':'4',
             'QUEEN':'4',
             'QNS':'4',
             'R':'5',
             'ST':'5',
             'None':'0'
            }

In [154]:
ticket20_clean['Boro Code'] = ticket20_clean['Violation County'].map(boro_dict)
ticket20_clean.head()

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation,Boro Code
0,1477633194,J58JKX,NJ,PAS,05/08/1972 12:00:00 AM,16,SDN,HONDA,P,8730,5130,5280,0,72.0,72,504,342924,T504,0,0523P,,K,F,270,43 ST,,0,408,E2,,YYYYYBB,0800A,0400P,BK,0.0,0,-,0,,,,,,3
1,1449715424,KRE6058,PA,PAS,08/29/1977 12:00:00 AM,98,SUBN,ME/BE,P,86530,71800,73110,0,77.0,77,77,961115,0077,0,0428P,,K,F,1953,UNION ST,,0,408,F1,,BBBBBBB,ALL,ALL,BLK,0.0,0,-,0,,,,,,3
2,1455779155,444326R,NJ,PAS,10/03/1988 12:00:00 AM,20,SDN,LEXUS,P,27030,41330,69230,0,88.0,88,730,535422,T730,0,0625A,,K,O,45,CLERMONT AVENUE,,0,408,D,,BBBBBBB,ALL,ALL,BLACK,0.0,0,-,0,,,,,,3
3,1458800908,F728330,OH,PAS,01/03/1990 12:00:00 AM,21,SDN,CHEVR,P,33030,93630,58730,0,90.0,90,301,355074,T301,0,1106A,,K,F,218,DIVISION AVE,,0,408,C,,BYBBYBB,1100A,1230P,,0.0,0,-,0,,,,,,3
4,1466038676,FMY9090,NY,PAS,02/14/1990 12:00:00 AM,21,SUBN,JEEP,S,45130,23930,68130,20210915,90.0,90,0,668676,KNBO,0,1253A,,K,F,850,GRAND ST,,0,408,D1,,BYBBYBB,1200A,0300A,GREY,0.0,2015,-,0,,,,,,3


### Step 4 - Create lookup String to get Street Names

In [155]:
ticket20_clean['Street1LU'] = 'PF'+ticket20_clean['Boro Code']+ticket20_clean['Street Code1']
ticket20_clean['Street2LU'] = 'PF'+ticket20_clean['Boro Code']+ticket20_clean['Street Code2']
ticket20_clean['Street3LU'] = 'PF'+ticket20_clean['Boro Code']+ticket20_clean['Street Code3']

In [156]:
ticket20_clean.head()

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation,Boro Code,Street1LU,Street2LU,Street3LU
0,1477633194,J58JKX,NJ,PAS,05/08/1972 12:00:00 AM,16,SDN,HONDA,P,8730,5130,5280,0,72.0,72,504,342924,T504,0,0523P,,K,F,270,43 ST,,0,408,E2,,YYYYYBB,0800A,0400P,BK,0.0,0,-,0,,,,,,3,PF308730,PF305130,PF305280
1,1449715424,KRE6058,PA,PAS,08/29/1977 12:00:00 AM,98,SUBN,ME/BE,P,86530,71800,73110,0,77.0,77,77,961115,0077,0,0428P,,K,F,1953,UNION ST,,0,408,F1,,BBBBBBB,ALL,ALL,BLK,0.0,0,-,0,,,,,,3,PF386530,PF371800,PF373110
2,1455779155,444326R,NJ,PAS,10/03/1988 12:00:00 AM,20,SDN,LEXUS,P,27030,41330,69230,0,88.0,88,730,535422,T730,0,0625A,,K,O,45,CLERMONT AVENUE,,0,408,D,,BBBBBBB,ALL,ALL,BLACK,0.0,0,-,0,,,,,,3,PF327030,PF341330,PF369230
3,1458800908,F728330,OH,PAS,01/03/1990 12:00:00 AM,21,SDN,CHEVR,P,33030,93630,58730,0,90.0,90,301,355074,T301,0,1106A,,K,F,218,DIVISION AVE,,0,408,C,,BYBBYBB,1100A,1230P,,0.0,0,-,0,,,,,,3,PF333030,PF393630,PF358730
4,1466038676,FMY9090,NY,PAS,02/14/1990 12:00:00 AM,21,SUBN,JEEP,S,45130,23930,68130,20210915,90.0,90,0,668676,KNBO,0,1253A,,K,F,850,GRAND ST,,0,408,D1,,BYBBYBB,1200A,0300A,GREY,0.0,2015,-,0,,,,,,3,PF345130,PF323930,PF368130


## Load Street Names DB and Create Lookup to StreetName Dict

In [157]:
stnames = pd.read_csv('/Users/stuartong/uberticketsyelp/Data/snd20d/snd20Dcow.txt')
stnames.columns=['Raw']

# from SND_metadata.pdf - location of stnames and Lookup prefixes
stnames['Street Name']=stnames['Raw'].str[2:34].str.strip()
stnames['Lookup']=stnames['Raw'].str[34:42]
stnames_dict= pd.Series(stnames['Street Name'].values,index=stnames['Lookup']).to_dict()

In [158]:
stnames

Unnamed: 0,Raw,Street Name,Lookup
0,11 0 BOND STREET PF1006890101...,0 BOND STREET,PF100689
1,11 1 AVENUE PF1100100101...,1 AVENUE,PF110010
2,11 1 AVENUE LOOP PF1317200102...,1 AVENUE LOOP,PF131720
3,11 1 AVENUE LOWER NB ROADBED VF1100100701...,1 AVENUE LOWER NB ROADBED,VF110010
4,11 1 AVENUE NORTHBOUND ROADBED VF1100100601...,1 AVENUE NORTHBOUND ROADBED,VF110010
...,...,...,...
106756,15ZENI PLACE VS5341050102...,ZENI PLACE,VS534105
106757,15ZEPHYR AVENUE PF5566960101...,ZEPHYR AVENUE,PF556696
106758,15ZEV PLACE PF5703670101...,ZEV PLACE,PF570367
106759,15ZOE STREET PF5567150101...,ZOE STREET,PF556715


In [159]:
stnames_dict

{'PF100689': '0 BOND STREET',
 'PF110010': '1 AVENUE',
 'PF131720': '1 AVENUE LOOP',
 'VF110010': 'VETERANS WAY',
 'PF101127': '1 AVENUE TUNNEL',
 'PF113655': '1 DRIVE',
 'VS112740': '1 PARK',
 'VS100058': 'PRECINCT    1 STATIONHOUSE',
 'PF110015': '1 PLACE',
 'PF100058': '1 PRECINCT STATIONHOUSE',
 'VS101232': 'FIRST ST GARDEN',
 '        ': 'TREMONT AVENUE WB ROADBED',
 'VS101346': 'ONE VANDERBILT',
 'VS111115': 'WSH',
 'PF110110': '2 AVENUE',
 'VF110110': 'YITZHAK RABIN WAY',
 'VS110780': 'SECOND AVENUE NB ENTRANCE HRD',
 'VF127310': 'WHITE PLAINS ROAD LINE',
 'VS132622': 'TWO BRIDGES URA SITE    7 HOUSE',
 'PF113470': '2 DRIVE',
 'PF110115': '2 PLACE',
 'VS120490': 'ROOSEVELT DRIVE SB ROADBED',
 'VF197710': '3 AVE BRDG PED AND BIKE PATH',
 'VS197510': 'WEST INTREPID MUSEUM PED OVPS',
 'VF197510': 'WEST 178-179 STREET PED OVERPASS',
 'PF110210': '3 AVENUE',
 'VS197850': 'THIRD AVENUE BRIDGE APPROACH',
 'VS197710': 'THIRD AVENUE BRIDGE BIKE PATH',
 'VS110785': 'THIRD AVENUE BRIDGE SB

### Step 5 - Get Street Names 

In [160]:
ticket20_clean['Street1'] = ticket20_clean['Street1LU'].map(stnames_dict)
ticket20_clean['Street2'] = ticket20_clean['Street2LU'].map(stnames_dict)
ticket20_clean['Street3'] = ticket20_clean['Street3LU'].map(stnames_dict)

In [162]:
ticket20_clean.sample(20)

Unnamed: 0,Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Issuing Agency,Street Code1,Street Code2,Street Code3,Vehicle Expiration Date,Violation Location,Violation Precinct,Issuer Precinct,Issuer Code,Issuer Command,Issuer Squad,Violation Time,Time First Observed,Violation County,Violation In Front Of Or Opposite,House Number,Street Name,Intersecting Street,Date First Observed,Law Section,Sub Division,Violation Legal Code,Days Parking In Effect,From Hours In Effect,To Hours In Effect,Vehicle Color,Unregistered Vehicle?,Vehicle Year,Meter Number,Feet From Curb,Violation Post Code,Violation Description,No Standing or Stopping Violation,Hydrant Violation,Double Parking Violation,Boro Code,Street1LU,Street2LU,Street3LU,Street1,Street2,Street3
5741008,8753276530,GXF4667,NY,PAS,11/13/2019 12:00:00 AM,37,4DSD,NISSA,T,64190,0,0,20210409,109.0,109,109,367318,T401,U,1220P,1201P,Q,I,N,Station Rd,40ft W/of 162nd St,0,408,D1,,YYYYYY,0900A,0700P,GY,,2015,,0,06-A,,,,,4,PF464190,PF400000,PF400000,STATION ROAD,,
7241701,1464681430,M72ANA,NJ,PAS,12/26/2019 12:00:00 AM,31,SUBN,GMC,P,34290,10510,10610,0,,0,401,962611,0401,0000,0549P,,NY,F,122,W 26 STREET,,0,408,E9,,YYYYYBB,0700A,0700P,BLUE,0.0,0,-,0,,,,,,1,PF134290,PF110510,PF110610,WEST 26 STREET,AVENUE OF THE AMERICAS,7 AVENUE
1202178,8620729421,37662MK,NY,COM,07/26/2019 12:00:00 AM,69,VAN,CHEVR,T,10210,17910,17930,20200531,17.0,17,17,346167,T106,F,1134A,,NY,F,750,3rd Ave,,0,408,H1,,YYYYYY,0700A,0400P,WH,,2018,,0,10,,,,,1,PF110210,PF117910,PF117930,3 AVENUE,EAST 46 STREET,EAST 47 STREET
7345501,8748436860,HXN2923,NY,PAS,12/28/2019 12:00:00 AM,38,2DSD,VOLKS,T,34790,10610,13610,20200227,18.0,18,18,363509,T106,J,0834A,1200P,NY,F,200,W 51st St,,0,408,I3,,Y,0800A,1200A,WH,,2017,,0,21,,,,,1,PF134790,PF110610,PF113610,WEST 51 STREET,7 AVENUE,BROADWAY
5281816,4670376378,GTE4171,NY,PAS,11/01/2019 12:00:00 AM,36,4DSD,INFIN,V,0,10810,0,0,,0,0,0,,,1136A,,QN,,,WB HEMPSTEAD AVE @ 9,8TH AVE,0,1180,B,T,,,,BK,,2011,,0,,PHTO SCHOOL ZN SPEED VIOLATION,,,,4,PF400000,PF410810,PF400000,,VAN WYCK EXPRESSWAY EXIT 9 NB,
9907642,8805567036,X64LYD,NJ,PAS,03/04/2020 12:00:00 AM,14,4DSD,TOYOT,T,42730,30430,48430,88880088,75.0,75,75,357357,T301,I,0411P,,K,O,3364,Fulton St,,0,408,E2,,YYYYYYY,,,GREY,,0,,0,C,,,,,3,PF342730,PF330430,PF348430,FULTON STREET,CRESCENT STREET,HEMLOCK STREET
6294934,8647843198,HZK8022,NY,PAS,11/26/2019 12:00:00 AM,38,SUBN,BMW,T,51090,22890,23020,20200528,103.0,103,103,370818,T402,G,0218P,,Q,F,144-21,Jamaica Ave,,0,408,C3,,YYYYYY,0900A,0700A,GY,,2018,,0,17,,,,,4,PF451090,PF422890,PF423020,JAMAICA AVENUE,144 PLACE,145 STREET
3079551,8742485915,XCYV14,NJ,PAS,09/10/2019 12:00:00 AM,42,VAN,FORD,T,17190,0,0,88880088,9.0,9,9,358901,T101,N,0219P,,NY,I,N,E 10th St,5ft W/of Broadway,0,408,H1,,YYYYY,0830A,0600P,YELLO,,0,,0,K,,,,,1,PF117190,PF100000,PF100000,EAST 10 STREET,,
6213581,8788868035,92400MD,NY,COM,11/25/2019 12:00:00 AM,46,DELV,WORKH,T,25390,18910,18930,20200228,23.0,23,23,361347,T103,M,1134A,,NY,F,1407,Madison Ave,,0,408,I4,,YYYYYYY,,,WH,,2004,,0,A,,,,,1,PF125390,PF118910,PF118930,MADISON AVENUE,EAST 97 STREET,EAST 98 STREET
10293681,8815102097,GEP5740,NY,PAS,03/12/2020 12:00:00 AM,21,4DSD,ME/BE,T,89730,80330,56030,20210306,77.0,77,77,369612,T301,W,0836A,,K,F,824B,Washington Ave,,0,408,J3,,YYYYYY,0830A,0900A,GY,,2017,,0,19,,,,,3,PF389730,PF380330,PF356030,WASHINGTON AVENUE,ST JOHNS PLACE,LINCOLN PLACE


## Load Centerline Database

In [163]:
cl_df= pd.read_csv('/Users/stuartong/uberticketsyelp/Data/Centerline.csv')
cl_df.sample(10)

Unnamed: 0,the_geom,L_LOW_HN,PHYSICALID,L_HIGH_HN,R_LOW_HN,R_HIGH_HN,L_ZIP,R_ZIP,L_BLKFC_ID,R_BLKFC_ID,ST_LABEL,STATUS,BIKE_LANE,BOROCODE,ST_WIDTH,CREATED,MODIFIED,TRAFDIR,RW_TYPE,FRM_LVL_CO,TO_LVL_CO,SNOW_PRI,PRE_MODIFI,PRE_DIRECT,PRE_TYPE,POST_TYPE,POST_DIREC,POST_MODIF,FULL_STREE,ST_NAME,BIKE_TRAFD,SHAPE_Leng
96479,MULTILINESTRING ((-73.8103684877966 40.8245048...,,98901,,,,10465.0,10465.0,1422600634,1422609152,DOCK ST,2,,2,20,11/29/2007 12:00:00 AM +0000,03/17/2017 12:00:00 AM +0000,TW,1,13,13,V,,,,ST,,,DOCK ST,DOCK,,53.872311
8330,MULTILINESTRING ((-73.93163814734841 40.808050...,,176159,,,,10451.0,10451.0,0,0,3 AVENUE BRIDGE BIKE PATH,2,1.0,2,0,05/26/2011 12:00:00 AM +0000,04/19/2018 12:00:00 AM +0000,NV,3,17,17,V,,,,PATH,,,THIRD AVENUE BR BIKE PATH,THIRD AVENUE BR BIKE,TW,241.830151
113713,MULTILINESTRING ((-73.87623300358646 40.768085...,23-001,8981,23-009,23-000,23-008,11369.0,11369.0,102265463,102267264,94 ST,2,,4,50,11/29/2007 12:00:00 AM +0000,04/15/2019 12:00:00 AM +0000,TW,1,13,13,H,,,,ST,,,94 ST,94,,144.573649
58893,MULTILINESTRING ((-73.8808139802381 40.6627262...,,190561,,,,11208.0,11208.0,2022601099,2022601112,LINDEN BLVD,2,,3,32,11/29/2007 12:00:00 AM +0000,03/15/2021 12:00:00 AM +0000,FT,1,13,13,C,,,,BLVD,,,LINDEN BLVD,LINDEN,,627.672214
98197,MULTILINESTRING ((-73.97988298176301 40.783800...,0,3007,0,2220,2236,10024.0,10024.0,1322607781,1322602619,BROADWAY,2,,1,40,11/29/2007 12:00:00 AM +0000,04/15/2019 12:00:00 AM +0000,FT,1,13,13,C,,,,,,,BROADWAY,BROADWAY,,285.871357
100966,MULTILINESTRING ((-74.10302593083756 40.615781...,2,102945,30,1,31,10301.0,10301.0,1622607675,1622610241,NIAGARA ST,2,,5,30,11/29/2007 12:00:00 AM +0000,03/17/2017 12:00:00 AM +0000,TF,1,13,13,H,,,,ST,,,NIAGARA ST,NIAGARA,,356.526641
31204,MULTILINESTRING ((-73.96842220291677 40.611920...,,124108,,,,11230.0,11230.0,1822609837,1822609729,AVE O,2,,3,42,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TW,1,13,13,C,,,,AVE,,,O AVE,O,,37.169895
77471,MULTILINESTRING ((-73.93663370560812 40.585536...,41,102191,41,42,42,11235.0,11235.0,0,0,LOSEE TER,2,,3,5,11/29/2007 12:00:00 AM +0000,11/03/2015 12:00:00 AM +0000,NV,6,13,13,,,,,TER,,,LOSEE TER,LOSEE,,130.385122
99190,MULTILINESTRING ((-73.74585697785389 40.697777...,207-001,7533,207-099,207-000,207-098,11411.0,11411.0,72264772,72264920,116 AVE,2,,4,38,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TW,1,13,13,C,,,,AVE,,,116 AVE,116,,258.785262
73696,MULTILINESTRING ((-74.006298417107 40.72254950...,430,656,468,429,475,10013.0,10013.0,1222601121,1222602756,CANAL ST,2,,1,62,11/29/2007 12:00:00 AM +0000,04/15/2019 12:00:00 AM +0000,TW,1,13,13,C,,,,ST,,,CANAL ST,CANAL,,505.348076


In [11]:
cl_df= pd.read_csv('/Users/stuartong/uberticketsyelp/Data/Centerline.csv')

In [12]:
cl_df

Unnamed: 0,the_geom,L_LOW_HN,PHYSICALID,L_HIGH_HN,R_LOW_HN,R_HIGH_HN,L_ZIP,R_ZIP,L_BLKFC_ID,R_BLKFC_ID,ST_LABEL,STATUS,BIKE_LANE,BOROCODE,ST_WIDTH,CREATED,MODIFIED,TRAFDIR,RW_TYPE,FRM_LVL_CO,TO_LVL_CO,SNOW_PRI,PRE_MODIFI,PRE_DIRECT,PRE_TYPE,POST_TYPE,POST_DIREC,POST_MODIF,FULL_STREE,ST_NAME,BIKE_TRAFD,SHAPE_Leng
0,MULTILINESTRING ((-73.87861544017795 40.861915...,,164809,,,,10458.0,10458.0,0,0,MITSUBISHI WILD WETLAND TRL,2,,2,0,08/19/2014 12:00:00 AM +0000,12/22/2015 12:00:00 AM +0000,NV,6,13,13,,,,,TRL,,,MITSUBISHI WILD WETLAND TRL,MITSUBISHI WILD WETLAND,,1026.077523
1,MULTILINESTRING ((-73.7729030190404 40.7778042...,215-001,6110,215-027,215-000,215-026,11360.0,11360.0,112261166,112262650,28 AVE,2,,4,35,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TW,1,13,13,S,,,,AVE,,,28 AVE,28,,258.859740
2,MULTILINESTRING ((-73.90711253281893 40.905186...,5631,61140,5699,5602,5698,10471.0,10471.0,1522604870,1522601877,ARLINGTON AVE,2,,2,42,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TW,1,13,13,H,,,,AVE,,,ARLINGTON AVE,ARLINGTON,,454.932922
3,MULTILINESTRING ((-73.98181677514282 40.687329...,317,145494,399,316,360,11217.0,11217.0,1922603730,1922612977,SCHERMERHORN ST,2,2.0,3,50,11/29/2007 12:00:00 AM +0000,04/15/2019 12:00:00 AM +0000,TW,1,13,13,C,,,,ST,,,SCHERMERHORN ST,SCHERMERHORN,TW,609.424375
4,MULTILINESTRING ((-73.8300230194527 40.7140059...,120-011,12438,120-011,0,0,11415.0,11415.0,92261717,92269521,QUEENS BLVD,2,,4,40,11/29/2007 12:00:00 AM +0000,04/15/2019 12:00:00 AM +0000,TF,1,13,13,C,,,,BLVD,,,QUEENS BLVD,QUEENS,,47.399228
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120049,MULTILINESTRING ((-73.93548534289174 40.826725...,2682,19130,2698,0,0,10039.0,10039.0,1322602332,1322607645,ADAM CLAYTON POWELL JR BLVD,2,,1,24,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TF,1,13,13,C,,,,BLVD,,,ADAM POWELL BLVD,ADAM POWELL,,326.475365
120050,MULTILINESTRING ((-73.86135368776299 40.724607...,62-001,34628,62-099,62-000,62-098,11374.0,11374.0,92268709,92265209,BOELSEN CRES,2,,4,30,11/29/2007 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TW,1,13,13,S,,,,CRES,,,BOELSEN CRES,BOELSEN,,431.092532
120051,MULTILINESTRING ((-73.72436994161143 40.725305...,,22078,,,,11426.0,11426.0,72260009,72260119,CROSS ISLAND PKWY,2,,4,32,06/16/2009 12:00:00 AM +0000,08/28/2020 12:00:00 AM +0000,TF,2,9,9,C,,,,,,,CIP,CIP,,328.648160
120052,MULTILINESTRING ((-74.17256103080061 40.602977...,2,101850,198,1,199,10314.0,10314.0,1622606072,1622606138,SPARK PL,2,,5,24,11/29/2007 12:00:00 AM +0000,03/17/2017 12:00:00 AM +0000,TF,1,13,13,C,,,,PL,,,SPARK PL,SPARK,,710.949045


In [37]:
cl_df[(cl_df['FULL_STREE'].str.contains('63 ST')) & (cl_df['BOROCODE']==1) & (cl_df['RW_TYPE']==1)]['the_geom'].values.tolist()

['MULTILINESTRING ((-73.94224145731903 40.8378434363573, -73.9430091189847 40.838167191816105, -73.94324855801871 40.83826867971469, -73.94373631643596 40.838469720598646))',
 'MULTILINESTRING ((-73.96251524054111 40.763112733624226, -73.96155703699257 40.762708370255645))',
 'MULTILINESTRING ((-73.95914883433508 40.761698733149856, -73.9583419202339 40.76136331545021))',
 'MULTILINESTRING ((-73.9647554491405 40.7640570700971, -73.96251524054111 40.763112733624226))',
 'MULTILINESTRING ((-73.95733230614904 40.76100332691441, -73.95704972900496 40.76089447250021, -73.95688540628484 40.760826085028114))',
 'MULTILINESTRING ((-73.97118520979612 40.76677028966978, -73.96958441466369 40.76609086532323))',
 'MULTILINESTRING ((-73.94010227609465 40.83694508452738, -73.94204096173274 40.83776058068131))',
 'MULTILINESTRING ((-73.98011181561736 40.770528368678384, -73.98204042141927 40.77134357833112))',
 'MULTILINESTRING ((-73.95790133695364 40.76118089671778, -73.95779088519572 40.76117998567

In [38]:
cl_df[(cl_df['FULL_STREE'].str.contains('2 AVE')) & (cl_df['BOROCODE']==1) & (cl_df['RW_TYPE']==1)]['the_geom'].values.tolist()

['MULTILINESTRING ((-73.98942914226258 40.726187462929246, -73.98898482054337 40.72677887514424))',
 'MULTILINESTRING ((-73.97126234449263 40.75112224029439, -73.97080361265334 40.75174820121419))',
 'MULTILINESTRING ((-73.98590859659755 40.73101907998302, -73.98575300286302 40.731232945334646, -73.98544389943555 40.731657806760126))',
 'MULTILINESTRING ((-73.98767161543276 40.728595104268415, -73.9872497770919 40.72918616120114))',
 'MULTILINESTRING ((-73.96988589832242 40.75299884867012, -73.96943102550703 40.753623508485795))',
 'MULTILINESTRING ((-73.97636377328153 40.74409868112593, -73.97588117681829 40.74477922428447))',
 'MULTILINESTRING ((-73.96760620834938 40.75613198849283, -73.96715254341488 40.75676519476172))',
 'MULTILINESTRING ((-73.95836533252375 40.768797733573905, -73.95786679270606 40.76948404767775))',
 'MULTILINESTRING ((-73.98311380691291 40.73483804730168, -73.98270105748783 40.73543184572241))',
 'MULTILINESTRING ((-73.9324129069313 40.80441537406221, -73.93218