In [9]:
import pandas as pd
import numpy as np
import feather
import pickle
import pickle5
import re
import sqlite3
import geopandas as gpd

# note pandarallel works well on mac but has issue with windows
# see requirements for windows  - https://github.com/nalepae/pandarallel
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

pd.options.display.max_columns = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# connect to the database
# note: connects to/creates a db file with the name in the quotes if does not exist
con = sqlite3.connect('streetsofnyc.db')
cur = con.cursor()

INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


## Load LION - Street Data

In [2]:
lion = gpd.read_file("Data/LION/LION.shp")

# note some address contain '-' we only want the portion before the dash

lion['l_lowadd'] = lion['LLo_Hyphen'].str.split('-').str[0].astype(float)
lion['l_highadd'] = lion['LHi_Hyphen'].str.split('-').str[0].astype(float)
lion['r_lowadd'] = lion['RLo_Hyphen'].str.split('-').str[0].astype(float)
lion['r_highadd'] = lion['RHi_Hyphen'].str.split('-').str[0].astype(float)

# get combined lower and upper limit for street segment

lion['r_lowadd'].replace({'0':np.nan},inplace=True)
lion['l_lowadd'].replace({'0':np.nan},inplace=True)
lion['c_lowadd']= lion[['l_lowadd','r_lowadd']].min(axis=1,skipna=True)
lion['c_highadd']=lion[['l_highadd','r_highadd']].max(axis=1)

lion.head(5)

Unnamed: 0,OBJECTID,Street,SAFStreetN,FeatureTyp,SegmentTyp,IncExFlag,RB_Layer,NonPed,TrafDir,TrafSrc,SpecAddr,FaceCode,SeqNum,StreetCode,SAFStreetC,LGC1,LGC2,LGC3,LGC4,LGC5,LGC6,LGC7,LGC8,LGC9,BOE_LGC,SegmentID,SegCount,LocStatus,LZip,RZip,LBoro,RBoro,L_CD,R_CD,LATOMICPOL,RATOMICPOL,LCT2010,LCT2010Suf,RCT2010,RCT2010Suf,LCB2010,LCB2010Suf,RCB2010,RCB2010Suf,LCT2000,LCT2000Suf,RCT2000,RCT2000Suf,LCB2000,LCB2000Suf,RCB2000,RCB2000Suf,LCT1990,LCT1990Suf,RCT1990,RCT1990Suf,LAssmDist,LElectDist,RAssmDist,RElectDist,SplitElect,LSchlDist,RSchlDist,SplitSchl,LSubSect,RSubSect,SanDistInd,MapFrom,MapTo,BoroBndry,MH_RI_Flag,XFrom,YFrom,XTo,YTo,ArcCenterX,ArcCenterY,CurveFlag,Radius,NodeIDFrom,NodeIDTo,NodeLevelF,NodeLevelT,ConParity,Twisted,RW_TYPE,PhysicalID,GenericID,NYPDID,FDNYID,LBlockFace,RBlockFace,LegacyID,Status,StreetWidt,StreetWi_1,StreetWi_2,BikeLane,BIKE_TRAFD,ACTIVE_FLA,POSTED_SPE,Snow_Prior,Number_Tra,Number_Par,Number_Tot,Carto_Disp,FCC,ROW_Type,LLo_Hyphen,LHi_Hyphen,RLo_Hyphen,RHi_Hyphen,FromLeft,ToLeft,FromRight,ToRight,Join_ID,L_PD_Servi,R_PD_Servi,TRUCK_ROUT,Shape__Len,geometry,l_lowadd,l_highadd,r_lowadd,r_highadd,c_lowadd,c_highadd
0,1,EAST 168 STREET,,0,U,,B,,T,DOT,,2510,3070,226700,,1,,,,,,,,,1,78126,1,X,10456,10456,2.0,2.0,203,203,402,101,149,,185,,3001,,2000,,149,,137,,4000,,1000,,149,,137,,79,40,79,40,,9,9,,1B,1B,,3D,3D,,,1010964,241812,1011265,241555,0,0,,0,47740,9045677,M,M,,,1,35231.0,30694.0,,,1422600653,1422602017,78126,2,34.0,34.0,,,,,25,S,2,,4,,,,599.0,699.0,596.0,716.0,599,699,596,716,2251001000000,,,,396.031,"LINESTRING (-73.90347 40.83036, -73.90238 40.8...",599.0,699.0,596.0,716.0,596.0,716.0
1,2,WEST 192 STREET,,0,U,,B,,A,DOT,,7984,40,274810,,1,,,,,,,,,1,79796,1,,10468,10468,2.0,2.0,207,207,302,104,265,,265,,2000,,1004,,265,,265,,3001,,1003,,265,,265,,78,45,78,59,,10,10,,1A,1A,,3C,3C,,,1011577,255024,1011335,255164,0,0,,0,48679,48678,M,M,,,1,35248.0,30711.0,,,1522607129,1522607721,79796,2,30.0,30.0,,,,,25,S,1,,3,,,,58.0,98.0,63.0,99.0,58,98,63,99,2798401000000,,,,279.361,"LINESTRING (-73.90120 40.86662, -73.90207 40.8...",58.0,98.0,63.0,99.0,58.0,99.0
2,3,UNION AVENUE,,0,U,,B,,W,DOT,,7280,130,270420,,1,,,,,,,,,1,77356,4,X,10459,10459,2.0,2.0,203,203,402,401,135,,131,,2000,,3006,,135,,131,,4000,,4001,,135,,131,,79,46,79,26,,12,12,,1A,1A,,6C,6C,,,1011601,239640,1011786,240230,0,0,,0,47288,47822,M,M,,,1,35252.0,30715.0,,,1422603726,1422604132,77356,2,34.0,34.0,,,,,25,S,1,,3,,,,1017.0,1079.0,1016.0,1084.0,1017,1079,1016,1084,2728001000000,,,,618.327,"LINESTRING (-73.90118 40.82440, -73.90051 40.8...",1017.0,1079.0,1016.0,1084.0,1016.0,1084.0
3,4,UNION AVENUE,BEHAGEN PLAYGROUND COMFORT STA,0,U,,B,,W,DOT,X,7280,130,270420,212795.0,1,,,,,,,,,1,77356,4,X,10459,10459,2.0,2.0,203,203,402,401,135,,131,,2000,,3006,,135,,131,,4000,,4001,,135,,131,,79,46,79,26,,12,12,,1A,1A,,6C,6C,,,1011601,239640,1011786,240230,0,0,,0,47288,47822,M,M,,,1,35252.0,30715.0,,,1422603726,1422604132,77356,2,34.0,34.0,,,,,25,S,1,,3,,,,,,,,0,0,0,0,21279502000000X,,,,618.327,"LINESTRING (-73.90118 40.82440, -73.90051 40.8...",,,,,,
4,5,UNION AVENUE,BEHAGEN PLAYGROUND FIELD NORTH,0,U,,B,,W,DOT,X,7280,130,270420,212795.0,1,,,,,,,,,1,77356,4,X,10459,10459,2.0,2.0,203,203,402,401,135,,131,,2000,,3006,,135,,131,,4000,,4001,,135,,131,,79,46,79,26,,12,12,,1A,1A,,6C,6C,,,1011601,239640,1011786,240230,0,0,,0,47288,47822,M,M,,,1,35252.0,30715.0,,,1422603726,1422604132,77356,2,34.0,34.0,,,,,25,S,1,,3,,,,,,,,0,0,0,0,21279503000000X,,,,618.327,"LINESTRING (-73.90118 40.82440, -73.90051 40.8...",,,,,,


In [3]:
%%time

# create LION table in database from df - note geomerty data not supported in SQLITE
lion.drop(columns='geometry').to_sql('LION',con,if_exists='replace',index=False)

CPU times: user 19.5 s, sys: 1.1 s, total: 20.6 s
Wall time: 23 s


## Load cd_indicators - Community Disctrict Demographics

In [4]:
cd_indicators = pd.read_csv('Data/cd_demographics/cd_indicators_overall.csv')

In [14]:
cd_indicators

Unnamed: 0,the_geom,cartodb_id,the_geom_webmercator,acres,acs_tooltip,acs_tooltip_2,acs_tooltip_3,area_sqmi,borocd,cb_email,cb_website,cd_full_title,cd_short_title,cd_son_fy2018,cd_tot_bldgs,cd_tot_resunits,count_hosp_clinic,count_libraries,count_parks,count_public_schools,crime_count,crime_count_boro,crime_count_nyc,crime_per_1000,crime_per_1000_boro,crime_per_1000_nyc,female_10_14,female_15_19,female_20_24,female_25_29,female_30_34,female_35_39,female_40_44,female_45_49,female_5_9,female_50_54,female_55_59,female_60_64,female_65_69,female_70_74,female_75_79,female_80_84,female_85_over,female_under_5,fp_100_area,fp_100_bldg,fp_100_cost_burden,fp_100_cost_burden_value,fp_100_mhhi,fp_100_mortg_value,fp_100_openspace,fp_100_ownerocc,fp_100_ownerocc_value,fp_100_permortg,fp_100_pop,fp_100_rent_burden,fp_100_rent_burden_value,fp_100_resunits,fp_100_openspace2,fp_500_bldg,fp_500_cost_burden,fp_500_cost_burden_value,fp_500_mhhi,fp_500_mortg_value,fp_500_openspace,fp_500_ownerocc,fp_500_ownerocc_value,fp_500_permortg,fp_500_pop,fp_500_rent_burden,fp_500_rent_burden_value,fp_500_resunits,lep_rate,lep_rate_boro,lep_rate_nyc,lot_area_commercial_office,lot_area_industrial_manufacturing,lot_area_mixed_use,lot_area_open_space,lot_area_other_no_data,lot_area_parking,lot_area_public_facility_institution,lot_area_res_1_2_family_bldg,lot_area_res_multifamily_elevator,lot_area_res_multifamily_walkup,lot_area_transportation_utility,lot_area_vacant,lots_commercial_office,lots_industrial_manufacturing,lots_mixed_use,lots_open_space,lots_other_no_data,lots_parking,lots_public_facility_institution,lots_res_1_2_family_bldg,lots_res_multifamily_elevator,lots_res_multifamily_walkup,lots_total,lots_transportation_utility,lots_vacant,male_10_14,male_15_19,male_20_24,male_25_29,male_30_34,male_35_39,male_40_44,male_45_49,male_5_9,male_50_54,male_55_59,male_60_64,male_65_69,male_70_74,male_75_79,male_80_84,male_85_over,male_under_5,mean_commute,mean_commute_boro,mean_commute_nyc,moe_bach_deg,moe_bach_deg_boro,moe_bach_deg_nyc,moe_foreign_born,moe_hh_rent_burd,moe_hh_rent_burd_boro,moe_hh_rent_burd_nyc,moe_lep_rate,moe_lep_rate_boro,moe_lep_rate_nyc,moe_mean_commute,moe_mean_commute_boro,moe_mean_commute_nyc,moe_over65_rate,moe_over65_rate_boro,moe_over65_rate_nyc,moe_poverty_rate,moe_under18_rate,moe_under18_rate_boro,moe_under18_rate_nyc,moe_unemployment_nyc,moe_unemployment_boro,moe_unemployment,neighborhoods,over65_rate,over65_rate_boro,over65_rate_nyc,pct_asian_nh,pct_bach_deg,pct_bach_deg_boro,pct_bach_deg_nyc,pct_black_nh,pct_clean_strts,pct_clean_strts_boro,pct_clean_strts_nyc,pct_foreign_born,pct_hh_rent_burd,pct_hh_rent_burd_boro,pct_hh_rent_burd_nyc,pct_hispanic,pct_other_nh,pct_served_parks,pct_white_nh,pop_2000,pop_2010,pop_acs,pop_change_00_10,poverty_rate,poverty_rate_boro,poverty_rate_nyc,puma,shared_puma,shared_puma_cd,son_issue_1,son_issue_2,son_issue_3,total_lot_area,under18_rate,under18_rate_boro,under18_rate_nyc,unemployment_boro,unemployment,unemployment_nyc,v_pluto,v_acs,v_facdb,v_crime
0,,1,,976.3,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.5,101,man01@cb.nyc.gov,www1.nyc.gov/site/manhattancb1/index.page,Manhattan Community District 1,Manhattan CD 1,,1725,45629,10,2,13,19,1055,26271,92480,6.8,16.1,11,1.1,2.4,3.9,7.2,6.5,4.7,3.6,2.9,1.8,2.3,2.4,2.6,2.4,1.8,1.0,0.7,1.2,2.7,0.85,653,0.285,2164,121000,4900,0.048,0.221,7599,0.645,16733,0.353,7367,19756,0.048,1036,0.285,2913,118000,6272,0.04,0.242,10238,0.613,29221,0.35,8750,28097,6.2,15.8,23.1,,,,,,,,,,,,,359,4,635,23,17,23,72,34,167,66,1461,36,25,1.4,1.8,2.8,5.7,6.3,5.2,3.6,3.6,1.7,2.7,2.7,2.7,2.0,1.3,0.9,0.7,0.8,2.6,25.4,32.1,41.2,0.9,0.4,0.2,0.9,1.6,0.5,0.3,0.7,0.2,0.1,0.6,0.2,0.1,0.6,0,0,0.7,0.6,0,0,0.1,0.1,0.3,"Battery Park City, Civic Center, Ellis Island,...",12.8,15.8,14.1,15.5,82.2,60.8,37.4,2.2,95.4,94.1,95.1,23.3,30.1,36.4,44.2,6.5,3.6,100,72.2,34420,60978,154636,0.77,8.8,14.4,19.8,3810,True,Manhattan CD 2,Infrastructure resiliency,Traffic,Other,2881166.373,12.7,14.4,20.9,3.8,2.8,4.4,20v4,Y2014-2018,6/24/20,2019
1,,2,,865.9,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.4,102,bgormley@cb.nyc.gov,www.nyc.gov/html/mancb2,Manhattan Community District 2,Manhattan CD 2,,5256,60610,19,3,6,8,2313,26271,92480,15.0,16.1,11,1.1,2.4,3.9,7.2,6.5,4.7,3.6,2.9,1.8,2.3,2.4,2.6,2.4,1.8,1.0,0.7,1.2,2.7,0.21,357,0.285,2164,121000,4900,0.001,0.221,7599,0.645,5038,0.353,7367,4672,0.001,777,0.285,2913,118000,6272,0.002,0.242,10238,0.613,10198,0.35,8750,7620,6.2,15.8,23.1,,,,,,,,,,,,,594,13,1766,29,29,52,173,628,444,921,4725,26,50,1.4,1.8,2.8,5.7,6.3,5.2,3.6,3.6,1.7,2.7,2.7,2.7,2.0,1.3,0.9,0.7,0.8,2.6,25.4,32.1,41.2,0.9,0.4,0.2,0.9,1.6,0.5,0.3,0.7,0.2,0.1,0.6,0.2,0.1,0.6,0,0,0.7,0.6,0,0,0.1,0.1,0.3,"Greenwich Village, Hudson Square, Little Italy...",12.8,15.8,14.1,15.5,82.2,60.8,37.4,2.2,96.3,94.1,95.1,23.3,30.1,36.4,44.2,6.5,3.6,100,72.2,93119,90016,154636,-0.03,8.8,14.4,19.8,3810,True,Manhattan CD 1,Parks,Schools,Senior services,2231472.12,12.7,14.4,20.9,3.8,2.8,4.4,20v4,Y2014-2018,6/24/20,2019
2,,3,,1076.9,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.7,103,mn03@cb.nyc.gov,www.nyc.gov/html/mancb3,Manhattan Community District 3,Manhattan CD 3,,4644,82557,41,5,13,42,2360,26271,92480,15.2,16.1,11,1.5,2.9,4.2,6.4,4.1,3.2,3.0,3.2,1.2,2.8,3.3,3.4,2.9,2.1,1.6,1.6,2.1,1.5,0.35,514,0.155,360,18000,932,0.122,0.112,2318,0.402,32294,0.463,8169,21117,0.122,819,0.161,500,21000,1315,0.063,0.107,3104,0.424,47273,0.458,11382,27544,28.6,15.8,23.1,,,,,,,,,,,,,298,23,2076,124,12,56,276,65,255,926,4259,39,109,1.8,2.5,3.7,6.2,5.6,3.9,2.9,3.3,1.6,3.0,2.8,2.5,2.3,2.0,1.3,1.0,1.0,1.5,31.3,32.1,41.2,1.2,0.4,0.2,1.1,1.8,0.5,0.3,1.0,0.2,0.1,0.7,0.2,0.1,0.7,0,0,1.1,0.7,0,0,0.1,0.1,0.4,"Chinatown, East Village, Lower East Side, NoHo...",17.9,15.8,14.1,30.6,43.3,60.8,37.4,8.4,91.0,94.1,95.1,34.5,38.4,36.4,44.2,25.1,2.6,100,33.3,164407,163277,154995,-0.01,19.3,14.4,19.8,3809,False,,Affordable housing,Senior services,Homelessness,2985434.526,11.4,14.4,20.9,3.8,3.6,4.4,20v4,Y2014-2018,6/24/20,2019
3,,4,,1131.6,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.8,104,jbodine@cb.nyc.gov,www.nyc.gov/mcb4,Manhattan Community District 4,Manhattan CD 4,,3829,85706,38,2,7,29,2792,26271,92480,18.1,16.1,11,0.9,1.7,4.0,7.2,6.4,3.8,3.0,2.9,1.0,2.3,2.7,2.7,2.1,2.1,1.4,0.7,0.9,1.8,0.49,377,0.244,327,105000,907,0.007,0.086,1339,0.677,2957,0.423,5007,9668,0.007,600,0.246,458,103000,1229,0.016,0.102,1860,0.661,9609,0.435,5962,17213,9.7,15.8,23.1,,,,,,,,,,,,,479,55,1008,25,8,73,163,162,320,960,3417,94,70,0.8,1.1,3.3,7.0,7.2,5.1,4.8,3.9,1.0,3.6,3.7,3.1,2.4,1.4,1.1,0.8,0.6,1.5,26.7,32.1,41.2,1.2,0.4,0.2,1.2,1.9,0.5,0.3,1.0,0.2,0.1,0.6,0.2,0.1,0.7,0,0,0.8,0.6,0,0,0.1,0.1,0.4,"Chelsea, Clinton, Hudson Yards",13.4,15.8,14.1,17.2,74.1,60.8,37.4,5.2,95.6,94.1,95.1,30.8,33.9,36.4,44.2,15.5,3.0,94,59.1,87479,103245,154496,0.18,11.3,14.4,19.8,3807,True,Manhattan CD 5,Affordable housing,"Land use trends (zoning, development, neighbor...",Traffic,3110433.364,8.2,14.4,20.9,3.8,3.5,4.4,20v4,Y2014-2018,6/24/20,2019
4,,5,,1005.4,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.6,105,office@cb5.org,www.cb5.org,Manhattan Community District 5,Manhattan CD 5,,3127,49436,23,7,3,13,4538,26271,92480,29.4,16.1,11,0.9,1.7,4.0,7.2,6.4,3.8,3.0,2.9,1.0,2.3,2.7,2.7,2.1,2.1,1.4,0.7,0.9,1.8,0.0,0,0.0,327,0,907,0.0,0.0,1339,0.0,0,0.0,5007,0,0.0,0,0.0,458,0,1229,0.0,0.0,1860,0.0,0,0.0,5962,0,9.7,15.8,23.1,,,,,,,,,,,,,1643,12,780,7,17,38,113,15,186,76,3018,22,109,0.8,1.1,3.3,7.0,7.2,5.1,4.8,3.9,1.0,3.6,3.7,3.1,2.4,1.4,1.1,0.8,0.6,1.5,26.7,32.1,41.2,1.2,0.4,0.2,1.2,1.9,0.5,0.3,1.0,0.2,0.1,0.6,0.2,0.1,0.7,0,0,0.8,0.6,0,0,0.1,0.1,0.4,"Flatiron, Gramercy Park, Herald Square, Midtow...",13.4,15.8,14.1,17.2,74.1,60.8,37.4,5.2,94.4,94.1,95.1,30.8,33.9,36.4,44.2,15.5,3.0,95,59.1,44028,51673,154496,0.17,11.3,14.4,19.8,3807,True,Manhattan CD 4,Traffic,Trash removal & cleanliness,Homelessness,2741102.661,8.2,14.4,20.9,3.8,3.5,4.4,20v4,Y2014-2018,6/24/20,2019
5,,6,,888.5,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.4,106,Office@cbsix.org,www.cbsix.org,Manhattan Community District 6,Manhattan CD 6,,3056,100985,48,3,6,16,1715,26271,92480,11.7,16.1,11,1.0,2.0,4.8,8.4,5.4,3.7,3.1,2.5,1.2,2.9,2.6,3.1,3.0,2.7,1.8,1.3,1.7,1.8,0.29,172,0.193,358,92000,758,0.017,0.16,1854,0.409,9455,0.408,3416,18432,0.017,222,0.176,303,88000,649,0.004,0.122,1720,0.377,14084,0.441,4872,20058,5.8,15.8,23.1,,,,,,,,,,,,,319,1,940,37,4,23,209,260,418,498,2812,41,62,1.1,1.3,4.1,7.3,5.7,3.8,2.7,3.0,1.0,2.4,2.9,2.2,2.5,1.9,1.2,1.0,0.9,1.8,27.1,32.1,41.2,1.3,0.4,0.2,1.4,2.0,0.5,0.3,0.8,0.2,0.1,0.7,0.2,0.1,0.8,0,0,0.9,0.8,0,0,0.1,0.1,0.4,"Beekman Place, Gramercy Park, Murray Hill, Pet...",18.2,15.8,14.1,16.6,80.5,60.8,37.4,3.4,96.5,94.1,95.1,23.2,35.1,36.4,44.2,7.3,3.0,91,69.7,136152,142745,146915,0.05,9.8,14.4,19.8,3808,False,,Affordable housing,Parks,Homelessness,2412762.607,8.8,14.4,20.9,3.8,2.5,4.4,20v4,Y2014-2018,6/24/20,2019
6,,7,,1220.2,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.9,107,office@cb7.org,www.nyc.gov/mcb7,Manhattan Community District 7,Manhattan CD 7,,4719,129548,18,4,4,35,1860,26271,92480,9.5,16.1,11,2.1,1.5,2.3,4.4,5.3,4.2,4.1,3.4,2.4,3.5,3.5,3.6,3.8,3.2,1.9,1.3,2.0,2.7,1.08,17,0.171,554,87000,1777,0.196,0.295,3249,0.547,1436,0.375,2132,2197,0.196,6,0.175,572,88000,1777,0.295,0.274,3267,0.544,1436,0.369,2341,3099,8.4,15.8,23.1,,,,,,,,,,,,,120,4,787,36,1,17,208,475,794,1921,4426,12,51,1.8,1.4,1.2,3.9,4.4,3.4,3.3,3.3,1.7,3.3,2.8,3.0,2.8,2.3,1.1,0.8,1.4,2.8,32.2,32.1,41.2,1.2,0.4,0.2,1.1,1.8,0.5,0.3,1.0,0.2,0.1,0.6,0.2,0.1,0.8,0,0,0.8,0.6,0,0,0.1,0.1,0.5,"Lincoln Square, Manhattan Valley, Upper West Side",20.6,15.8,14.1,9.1,77.0,60.8,37.4,5.4,97.0,94.1,95.1,22.1,31.4,36.4,44.2,14.6,2.9,100,68.0,207699,209084,195143,0.01,9.2,14.4,19.8,3806,False,,Affordable housing,Schools,Social services,3860415.293,15.5,14.4,20.9,3.8,3.3,4.4,20v4,Y2014-2018,6/24/20,2019
7,,8,,1266.3,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,2.0,108,info@cb8m.com,www.cb8m.com,Manhattan Community District 8,Manhattan CD 8,,5950,151357,40,5,18,20,2192,26271,92480,10.2,16.1,11,1.6,1.1,2.3,7.0,6.9,4.2,3.2,3.3,2.3,3.3,3.0,2.9,3.5,3.0,2.5,1.6,1.8,2.4,0.28,193,0.28,855,72000,1558,0.031,0.194,3058,0.509,7223,0.473,5253,15996,0.031,264,0.249,928,73000,1912,0.024,0.214,3730,0.513,10647,0.473,5631,20696,6.1,15.8,23.1,,,,,,,,,,,,,260,11,1596,8,15,27,302,1053,953,1184,5520,42,69,1.8,1.3,1.5,3.6,4.7,3.9,2.8,3.0,1.9,3.0,2.7,2.5,2.4,1.9,1.6,1.2,1.4,2.9,31.4,32.1,41.2,1.3,0.4,0.2,1.1,2.1,0.5,0.3,0.7,0.2,0.1,0.7,0.2,0.1,0.7,0,0,0.6,0.6,0,0,0.1,0.1,0.3,"Carnegie Hill, Lenox Hill, Roosevelt Island, U...",20.8,15.8,14.1,10.2,80.5,60.8,37.4,2.3,97.2,94.1,95.1,23.6,32.5,36.4,44.2,9.7,2.5,95,75.3,217063,219920,215449,0.01,7.2,14.4,19.8,3805,False,,Affordable housing,Parks,Schools,3521415.105,14.6,14.4,20.9,3.8,2.0,4.4,20v4,Y2014-2018,6/24/20,2019
8,,9,,961.7,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.5,109,eprince@cb9m.org,www.cb9m.org,Manhattan Community District 9,Manhattan CD 9,,2608,45968,19,3,6,18,1450,26271,92480,11.2,16.1,11,1.8,3.7,6.9,6.8,4.2,3.4,2.6,3.0,2.0,3.2,2.2,2.7,2.1,1.8,1.4,1.3,1.1,2.1,0.11,31,0.162,158,32000,625,0.03,0.119,977,0.64,0,0.516,3393,0,0.03,38,0.159,182,34000,691,0.038,0.125,1144,0.604,0,0.531,3885,42,20.8,15.8,23.1,,,,,,,,,,,,,76,16,326,17,4,25,196,379,374,937,2466,25,91,2.3,3.1,5.6,6.6,4.5,3.3,2.4,3.0,2.0,3.1,2.4,2.4,1.6,1.0,0.8,0.7,0.5,2.3,36.5,32.1,41.2,1.7,0.4,0.2,1.5,1.8,0.5,0.3,1.3,0.2,0.1,0.8,0.2,0.1,0.8,0,0,1.4,0.8,0,0,0.1,0.1,0.6,"Hamilton Heights, Manhattanville, Morningside ...",12.4,15.8,14.1,8.8,45.4,60.8,37.4,21.5,93.6,94.1,95.1,34.6,45.1,36.4,44.2,38.8,3.3,100,27.6,111724,110193,129467,-0.01,20.7,14.4,19.8,3802,False,,Affordable housing,Crime,Unemployment,2403301.958,15.1,14.4,20.9,3.8,3.8,4.4,20v4,Y2014-2018,6/24/20,2019
9,,10,,897.1,American Community Survey 2014-2018 5-Year Est...,American Community Survey (ACS) 2013-2017 5-ye...,2010 Census population counts for floodplain a...,1.4,110,MN10CB@cb.nyc.gov,www.nyc.gov/html/mancb10,Manhattan Community District 10,Manhattan CD 10,,4623,61032,39,5,7,22,1950,26271,92480,14.2,16.1,11,2.8,2.5,3.7,5.9,5.5,4.3,3.4,3.5,2.7,3.8,3.2,2.8,2.1,1.6,0.8,1.0,1.4,3.5,0.12,53,0.108,60,25000,41,0.016,0.126,555,0.074,2372,0.457,1652,5576,0.016,112,0.159,102,25000,103,0.016,0.097,642,0.16,7880,0.506,2845,7669,11.3,15.8,23.1,,,,,,,,,,,,,117,9,719,61,9,28,275,606,305,2017,4342,14,182,2.4,2.2,3.6,4.9,4.2,3.6,3.5,3.0,2.6,3.7,2.5,2.3,1.4,0.9,0.8,0.2,0.3,3.6,37.2,32.1,41.2,1.5,0.4,0.2,1.4,1.7,0.5,0.3,1.0,0.2,0.1,0.7,0.2,0.1,0.5,0,0,1.2,0.9,0,0,0.1,0.1,0.6,Central Harlem,10.4,15.8,14.1,3.4,38.6,60.8,37.4,55.3,91.3,94.1,95.1,23.4,38.6,36.4,44.2,23.8,3.3,99,14.2,107109,115723,137181,0.08,20.2,14.4,19.8,3803,False,,Affordable housing,"Commercial development (retail mix, small busi...",Trash removal & cleanliness,2475466.121,20.4,14.4,20.9,3.8,6.2,4.4,20v4,Y2014-2018,6/24/20,2019


In [5]:
%%time

# create cd_indic table in database from df 
cd_indicators.to_sql('cd_indic',con,if_exists='replace',index=False)

CPU times: user 35 ms, sys: 12.1 ms, total: 47.1 ms
Wall time: 73.4 ms


## Load weather data
Preprocessed in a seperate file by Moutaz

In [11]:
weather=pickle5.load(open('Data/weather.pkl','rb'))

In [12]:
weather

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,DATE,Precipitation,Snowfall,Snow/depth,Water_equivalent_snow_on_ground,Fog/ice_fog/freezing_fog
0,USC00280907,"BOONTON 1 SE, NJ US",40.892,-74.396,2018-01-01,0.000,0.000,0.000,0.000,0.000
1,USC00280907,"BOONTON 1 SE, NJ US",40.892,-74.396,2018-01-02,0.000,0.000,0.000,0.000,0.000
2,USC00280907,"BOONTON 1 SE, NJ US",40.892,-74.396,2018-01-03,0.000,0.000,0.000,0.000,0.000
3,USC00280907,"BOONTON 1 SE, NJ US",40.892,-74.396,2018-01-04,0.000,0.000,0.000,0.000,0.000
4,USC00280907,"BOONTON 1 SE, NJ US",40.892,-74.396,2018-01-05,0.560,5.000,5.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...
98316,USW00054743,"CALDWELL ESSEX CO AIRPORT, NJ US",40.876,-74.283,2021-04-15,0.220,0.000,0.000,0.000,0.000
98317,USW00054743,"CALDWELL ESSEX CO AIRPORT, NJ US",40.876,-74.283,2021-04-16,0.020,0.000,0.000,0.000,0.000
98318,USW00054743,"CALDWELL ESSEX CO AIRPORT, NJ US",40.876,-74.283,2021-04-17,0.000,0.000,0.000,0.000,0.000
98319,USW00054743,"CALDWELL ESSEX CO AIRPORT, NJ US",40.876,-74.283,2021-04-18,0.000,0.000,0.000,0.000,0.000


In [13]:
%%time

# create weather table in database from df 
weather.to_sql('weather',con,if_exists='replace',index=False)

CPU times: user 392 ms, sys: 98.9 ms, total: 491 ms
Wall time: 560 ms


## Load Collision Data

Preprocessed in a seperate file by Sheila

In [20]:
col20=pd.read_csv('Data/collisions/collision20.csv')
col19=pd.read_csv('Data/collisions/collision19.csv')

col_combined=pd.concat([col20,col19])

# create collision table in database from df 
col_combined.to_sql('collision',con,if_exists='replace',index=False)

## Create LION + Demographics Table
Query create new table LION_DEM that takes key features of Street plus the community demographic data joined on the L_CD in LION

Will need to delete this table if it exist

In [None]:
%%time

query='''
CREATE TABLE LION_Dem AS
SELECT 
b.OBJECTID,b.Street,b.FeatureTyp,b.SegmentTyp,b.NonPed,b.TrafDir,b.LocStatus,b.LZip,b.RZip,b.LBoro,b.RBoro,
b.L_CD,b.R_CD,b.CurveFlag,b.Radius,b.RW_Type,b.PhysicalID,b.StreetWidt,b.BikeLane,b.BIKE_Trafd,b.Number_Tra,
b.Number_Par,b.Number_Tot,b.Posted_Spe,b.Truck_Rout,b.c_lowadd,b.c_highadd,b.StreetCode,
a.cd_short_title,a.cd_tot_bldgs,a.cd_tot_resunits,a.crime_count,a.crime_per_1000,a.lep_rate,a.lots_commercial_office,a.lots_industrial_manufacturing,
a.lots_mixed_use,a.lots_open_space,a.lots_parking,a.lots_total,a.mean_commute,a.over65_rate,a.under18_rate,
a.pct_bach_deg,a.pct_foreign_born,a.pct_hh_rent_burd,a.pct_white_nh,a.pct_black_nh,(100-a.pct_white_nh) AS pct_non_white_nh,
a.poverty_rate,a.unemployment 
FROM LION b
LEFT OUTER JOIN cd_indic a
ON b.L_CD = a.borocd
'''

con.execute(query)

## Merge Tickets with Streets and Demographics to New Table

Query to create new table ticketstreetdem - combines tickets data with street and demographic details

Will need to delete this table if it exist

In [None]:
%%time

query='''
CREATE TABLE IF NOT EXISTS ticketstreetdem AS
SELECT a.`Summons Number`,a.`Violation Code`,a.`Clean Violation Des`,a.`Issue Date`,a.`Violation Time`,b.*
FROM tickets a
LEFT OUTER JOIN LION_Dem b
ON a.Street1LU = b.StreetCode
WHERE b.c_lowadd<=a.`House Number Clean`
AND b.c_highadd>=a.`House Number Clean`
'''
con.execute(query)