### Standard Python and R imports

In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [2]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
%%R

# My commonly used R imports

require('tidyverse')

R[write to console]: Loading required package: tidyverse



── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


In [4]:
pd.set_option('display.max_columns', None)

### Load the data

In [5]:
# URL: https://data.cityofnewyork.us/City-Government/Building-Footprints/5zhs-2jue/about_data

In [6]:
%%R
merged_data_new <- read.csv('merged_data_new.csv')

In [7]:
%%R
Building_Footprints <- read.csv('Building Footprints.csv')

In [8]:
%%R
merged_with_footprint <- full_join(merged_data_new, Building_Footprints, 
                               by = c("BIN.Number" = "BIN")) 

In [9]:
%%R
# Create a binary variable 'active_shed' based on GEOID
merged_with_footprint <- merged_with_footprint %>%
  mutate(active_shed = ifelse(is.na(GEOID), 0, 1))  # 0 for no active shed, 1 for active shed

In [10]:
%%R
# Save the merged data as a CSV file:
write.csv(merged_with_footprint, "merged_with_footprint.csv", row.names = FALSE)

In [11]:
merged_with_footprint = pd.read_csv("merged_with_footprint.csv")
merged_with_footprint.head(-30)

Unnamed: 0,GEOID,active_shed_licenses,Job.Number,Borough.Name,Count.Permits,First.Permit.Date,Current.Date,Age..in.years.,Permit.Expiration.Date,Sidewalk.Shed.Linear.Feet,Construction.Material,Current.Job.Status,BIN.Number,Community.Board,Latitude.Point,Longitude.Point,House.Number,Street.Name,Borough.Digit,Block,Lot,Applicant.Business.Name,ProCert,Source,activity,Commercial,STATE,COUNTY,TRACT,BLOCK,NAME.x,population_estimate,black_african_estimate,occupied_estimate,vacant_estimate,owner_occupied_estimate,renter_occupied_estimate,owner_income_estimate,renter_income_estimate,population_moe,black_african_moe,occupied_moe,vacant_moe,owner_occupied_moe,renter_occupied_moe,owner_income_moe,renter_income_moe,the_geom,NAME.y,CNSTRCT_YR,LSTMODDATE,LSTSTATYPE,DOITT_ID,HEIGHTROOF,FEAT_CODE,GROUNDELEV,SHAPE_AREA,SHAPE_LEN,BASE_BBL,MPLUTO_BBL,GEOMSOURCE,GLOBALID,active_shed
0,3.600500e+10,1.0,X01112467-I1,Bronx,,2024-10-18,2025-05-03,0.536986,2025-06-02,35.0,,Permit Entire,2020305.0,209.0,40.81055,-73.85100,216,HUSSON AVENUE,2.0,3460.0,13.0,MSM ENGINEERING SERVICES PLLC,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,400.0,2002.0,"Census Tract 4, Bronx County, New York",6000.0,1572.0,2199.0,58.0,1505.0,694.0,99121.0,,903.0,462.0,312.0,47.0,191.0,347.0,30528.0,,MULTIPOLYGON (((-73.85099338682896 40.81058940...,,1901.0,08/22/2017,Constructed,682075.0,20.44000,2100.0,11.0,0.0,0.0,2.034600e+09,2.034600e+09,Photogramm,{FE86AF37-43DA-4C27-8FF4-5F5A6584D885},1
1,3.600500e+10,3.0,X00974554-I1,Bronx,,2023-12-13,2025-05-03,1.386301,2025-12-02,342.0,,Permit Entire,2130803.0,209.0,40.82163,-73.85979,1847,SEWARD AVENUE,2.0,3600.0,30.0,ASHRAF CORP,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,1600.0,1000.0,"Census Tract 16, Bronx County, New York",6038.0,2593.0,2187.0,0.0,356.0,1831.0,100833.0,35774.0,665.0,531.0,260.0,18.0,119.0,260.0,44227.0,6704.0,,,,,,,,,,,,,,,,1
2,3.600500e+10,3.0,X00974557-I1,Bronx,,2023-12-13,2025-05-03,1.386301,2025-12-02,350.0,,Permit Entire,2130800.0,209.0,40.81969,-73.86113,1841,SEWARD AVENUE,2.0,3600.0,50.0,..,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,1600.0,1000.0,"Census Tract 16, Bronx County, New York",6038.0,2593.0,2187.0,0.0,356.0,1831.0,100833.0,35774.0,665.0,531.0,260.0,18.0,119.0,260.0,44227.0,6704.0,,,,,,,,,,,,,,,,1
3,3.600500e+10,3.0,X00002036-I1,Bronx,,2017-11-11,2025-05-03,7.476712,2025-05-15,110.0,,Permit Entire,2000765.0,201.0,40.80814,-73.92984,9,BRUCKNER BOULEVARD,2.0,2317.0,19.0,JOEL PHAGOO P.E. PLLC,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,1901.0,1006.0,"Census Tract 19.01, Bronx County, New York",2168.0,904.0,885.0,46.0,0.0,885.0,,55924.0,263.0,246.0,108.0,33.0,13.0,108.0,,12028.0,MULTIPOLYGON (((-73.92983345477573 40.80829915...,,1931.0,08/22/2017,Constructed,166782.0,27.64000,2100.0,9.0,0.0,0.0,2.023170e+09,2.023170e+09,Photogramm,{B2396520-C561-4F0B-97A4-F7F09F394335},1
4,3.600500e+10,3.0,X00677889-I1,Bronx,,2022-02-11,2025-05-03,3.221918,2025-07-24,350.0,,Permit Entire,2117112.0,201.0,40.80565,-73.92635,331,EAST 132 STREET,2.0,2295.0,7501.0,ENTHINK ENGINEERING LLC,1.0,DOB NOW,Local Law 11,Other Zoning Districts,36.0,5.0,1901.0,1012.0,"Census Tract 19.01, Bronx County, New York",2168.0,904.0,885.0,46.0,0.0,885.0,,55924.0,263.0,246.0,108.0,33.0,13.0,108.0,,12028.0,MULTIPOLYGON (((-73.92682884567984 40.80577481...,,2009.0,01/24/2019,Constructed,1252450.0,103.46217,2100.0,13.0,0.0,0.0,2.022950e+09,2.022958e+09,Photogramm,{9AA265EE-F334-463F-81A9-B3ABE481F715},1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083763,,,,,,,,,,,,,5117018.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-74.16387443590938 40.58601992...,,1985.0,08/22/2017,Constructed,805065.0,35.00000,2100.0,42.0,0.0,0.0,5.024010e+09,5.024018e+09,Other (Man,{8E548335-0EDF-48E3-AF74-0C50E7F97EAC},0
1083764,,,,,,,,,,,,,5117052.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-74.16381076494602 40.58552891...,,1985.0,08/22/2017,Constructed,35281.0,38.00000,2100.0,44.0,0.0,0.0,5.024010e+09,5.024018e+09,Other (Man,{2B7074BD-79DA-4EE1-8961-69F02C75D984},0
1083765,,,,,,,,,,,,,5117049.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-74.16367038393751 40.58567637...,,1985.0,08/22/2017,Constructed,845898.0,40.00000,2100.0,43.0,0.0,0.0,5.024010e+09,5.024018e+09,Other (Man,{7E28860A-32BD-4669-9479-92D8568EBE92},0
1083766,,,,,,,,,,,,,5117016.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-74.16378171491016 40.58611706...,,1985.0,08/22/2017,Constructed,608894.0,34.00000,2100.0,42.0,0.0,0.0,5.024010e+09,5.024018e+09,Other (Man,{DA0E9033-62A5-4F2C-A8BC-A3B0C4463C63},0


In [12]:
no_construction_year = merged_with_footprint[merged_with_footprint['CNSTRCT_YR'].isna()]
no_construction_year

Unnamed: 0,GEOID,active_shed_licenses,Job.Number,Borough.Name,Count.Permits,First.Permit.Date,Current.Date,Age..in.years.,Permit.Expiration.Date,Sidewalk.Shed.Linear.Feet,Construction.Material,Current.Job.Status,BIN.Number,Community.Board,Latitude.Point,Longitude.Point,House.Number,Street.Name,Borough.Digit,Block,Lot,Applicant.Business.Name,ProCert,Source,activity,Commercial,STATE,COUNTY,TRACT,BLOCK,NAME.x,population_estimate,black_african_estimate,occupied_estimate,vacant_estimate,owner_occupied_estimate,renter_occupied_estimate,owner_income_estimate,renter_income_estimate,population_moe,black_african_moe,occupied_moe,vacant_moe,owner_occupied_moe,renter_occupied_moe,owner_income_moe,renter_income_moe,the_geom,NAME.y,CNSTRCT_YR,LSTMODDATE,LSTSTATYPE,DOITT_ID,HEIGHTROOF,FEAT_CODE,GROUNDELEV,SHAPE_AREA,SHAPE_LEN,BASE_BBL,MPLUTO_BBL,GEOMSOURCE,GLOBALID,active_shed
1,3.600500e+10,3.0,X00974554-I1,Bronx,,2023-12-13,2025-05-03,1.386301,2025-12-02,342.0,,Permit Entire,2130803.0,209.0,40.82163,-73.85979,1847,SEWARD AVENUE,2.0,3600.0,30.0,ASHRAF CORP,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,1600.0,1000.0,"Census Tract 16, Bronx County, New York",6038.0,2593.0,2187.0,0.0,356.0,1831.0,100833.0,35774.0,665.0,531.0,260.0,18.0,119.0,260.0,44227.0,6704.0,,,,,,,,,,,,,,,,1
2,3.600500e+10,3.0,X00974557-I1,Bronx,,2023-12-13,2025-05-03,1.386301,2025-12-02,350.0,,Permit Entire,2130800.0,209.0,40.81969,-73.86113,1841,SEWARD AVENUE,2.0,3600.0,50.0,..,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,1600.0,1000.0,"Census Tract 16, Bronx County, New York",6038.0,2593.0,2187.0,0.0,356.0,1831.0,100833.0,35774.0,665.0,531.0,260.0,18.0,119.0,260.0,44227.0,6704.0,,,,,,,,,,,,,,,,1
75,3.600500e+10,3.0,X01079144-I1,Bronx,,2024-07-15,2025-05-03,0.797260,2025-06-26,270.0,,Permit Entire,2022155.0,209.0,40.81968,-73.86323,714,BEACH AVENUE,2.0,3598.0,12.0,PAUL G. JONES P.E.,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,3800.0,1001.0,"Census Tract 38, Bronx County, New York",1313.0,355.0,343.0,14.0,146.0,197.0,121000.0,,153.0,232.0,25.0,19.0,55.0,58.0,21236.0,,,,,,,,,,,,,,,,,1
94,3.600500e+10,8.0,X01199911-I1,Bronx,,2025-03-25,2025-05-03,0.104110,2025-10-06,27.0,,Permit Entire,2130939.0,201.0,40.81164,-73.91844,463,EAST 143 STREET,2.0,2288.0,70.0,..,1.0,DOB NOW,Construction or Maintenance,Other Zoning Districts,36.0,5.0,4100.0,2000.0,"Census Tract 41, Bronx County, New York",5798.0,2436.0,2318.0,86.0,190.0,2128.0,91316.0,17477.0,1391.0,945.0,339.0,99.0,155.0,348.0,30577.0,7204.0,,,,,,,,,,,,,,,,1
178,3.600501e+10,7.0,X01128605-I1,Bronx,,2024-10-18,2025-05-03,0.536986,2025-08-27,160.0,,Permit Entire,2130373.0,204.0,40.82619,-73.92164,869,CONCOURSE VILLAGE WEST,2.0,2459.0,49.0,MJE PROFESSIONAL SERVICES,1.0,DOB NOW,Construction or Maintenance,Commercial District/Overlay,36.0,5.0,5902.0,1000.0,"Census Tract 59.02, Bronx County, New York",2730.0,998.0,1172.0,17.0,371.0,801.0,74026.0,35662.0,480.0,406.0,201.0,29.0,152.0,191.0,8841.0,10886.0,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083221,,,,,,,,,,,,,1090511.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-73.94625319720076 40.84705412...,,,03/14/2025,Constructed,1283782.0,0.000000,2100.0,8.0,0.0,0.0,1.021400e+09,1.021400e+09,Other (Man,{09BDB8EB-B32D-402F-A6CD-1F1C2F82413F},0
1083277,,,,,,,,,,,,,3397444.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-73.99984919403173 40.68862382...,,,01/22/2019,Constructed,1114440.0,73.179002,2100.0,16.0,0.0,0.0,3.003040e+09,3.003048e+09,Photogramm,{9B137E5C-68B4-408A-B449-E5BF57F3B6E3},0
1083304,,,,,,,,,,,,,4461648.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-73.87578515464118 40.74229868...,,,04/07/2025,,1303308.0,10.000000,5110.0,50.0,0.0,0.0,4.015860e+09,4.015860e+09,,{813BF228-46B1-496A-9759-5A98F5A85EB3},0
1083749,,,,,,,,,,,,,3414135.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MULTIPOLYGON (((-73.99119497770594 40.68891604...,,,01/22/2019,Constructed,1113606.0,72.000000,2100.0,43.0,0.0,0.0,3.002780e+09,3.002788e+09,Other (Man,{FC13BA9F-ABC8-4B21-9006-01E4CF46ECC8},0


In [13]:
%%R 

# print count of merged_with_footprint
print(nrow(merged_with_footprint))

merged_with_footprint <- merged_with_footprint %>% 
  filter(!is.na(active_shed) & !is.na(CNSTRCT_YR))

print(nrow(merged_with_footprint))

[1] 1083798
[1] 1072928


In [14]:
%%R

library(DescTools) 

logistic_model <- glm(active_shed ~ CNSTRCT_YR + HEIGHTROOF + GROUNDELEV, 
                      data=merged_with_footprint,
                      family = binomial)

print(summary(logistic_model))


Call:
glm(formula = active_shed ~ CNSTRCT_YR + HEIGHTROOF + GROUNDELEV, 
    family = binomial, data = merged_with_footprint)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) 20.6111856  0.8007185  25.741  < 2e-16 ***
CNSTRCT_YR  -0.0137038  0.0004161 -32.938  < 2e-16 ***
HEIGHTROOF   0.0215923  0.0001933 111.692  < 2e-16 ***
GROUNDELEV   0.0015891  0.0002983   5.328 9.95e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 91180  on 1072525  degrees of freedom
Residual deviance: 77075  on 1072522  degrees of freedom
  (402 observations deleted due to missingness)
AIC: 77083

Number of Fisher Scoring iterations: 8



In [15]:
%%R 
print(PseudoR2(logistic_model, which="McFadden"))

# CNSTRCT_YR: 0.002195937
# GROUNDELEV: 1.339341e-06 
# HEIGHTROOF: 0.141834 

# CNSTRCT_YR + GROUNDELEV: 0.002251043 
# CNSTRCT_YR + HEIGHTROOF: 0.1539
# HEIGHTROOF + GROUNDELEV: 0.1421619 

# CNSTRCT_YR + HEIGHTROOF + GROUNDELEV: 0.1542526

 McFadden 
0.1546922 


In [16]:
%%R 

df_with_predictions <- merged_with_footprint %>% 
    mutate(
        prediction_proba = predict(logistic_model, newdata = merged_with_footprint, type = "response"),
        prediction = ifelse(prediction_proba > 0.0365, 1, 0)
    )

df_with_predictions

           GEOID active_shed_licenses   Job.Number Borough.Name Count.Permits
1    36005000400                    1 X01112467-I1        Bronx            NA
2    36005001901                    3 X00002036-I1        Bronx            NA
3    36005001901                    3 X00677889-I1        Bronx            NA
4    36005001901                    3 X01059624-I1        Bronx            NA
5    36005001902                    8 X00372376-I1        Bronx            NA
6    36005001902                    8 X00763478-I1        Bronx            NA
7    36005001902                    8 X01013799-I1        Bronx            NA
8    36005001902                    8 X01057734-I1        Bronx            NA
9    36005001902                    8 X01082169-I1        Bronx            NA
10   36005001902                    8 X01140314-I1        Bronx            NA
11   36005001902                    8 X01166927-I1        Bronx            NA
12   36005001902                    8 X08026975-I1        Bronx 

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



             2025-06-25
560         2023-06-13   2025-05-03    1.887671233             2025-06-25
561         2023-06-13   2025-05-03    1.887671233             2025-06-25
562         2023-06-13   2025-05-03    1.887671233             2025-06-25
563         2023-06-13   2025-05-03    1.887671233             2025-06-25
564         2023-06-13   2025-05-03    1.887671233             2025-06-25
565         2023-06-13   2025-05-03    1.887671233             2025-06-25
566         2022-12-23   2025-05-03    2.358904110             2025-09-29
567         2022-12-22   2025-05-03    2.361643836             2025-08-14
568         2024-01-05   2025-05-03    1.323287671             2026-01-02
569         2024-04-10   2025-05-03    1.060273973             2025-12-22
570         2024-04-17   2025-05-03    1.041095890             2025-07-15
571         2024-06-06   2025-05-03    0.904109589             2025-06-06
572         2025-02-11   2025-05-03    0.219178082             2025-09-29
573         20

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



    2092723             209       40.82032       -73.87576          820
50      2092721             209       40.82046       -73.87709          875
51      2092719             209       40.82005       -73.87905          820
52      2092722             209       40.82005       -73.87905          880
53      2092720             209       40.82046       -73.87709          825
54      2004084             201       40.81085       -73.90531          500
55      2004091             201       40.81122       -73.90440          527
56      2128608             201       40.81209       -73.90506          828
57      2128607             201       40.81171       -73.90534          500
58      2003665             201       40.80887       -73.91494          592
59      2003874             201       40.80763       -73.90890          781
60      2003657             201       40.80809       -73.91389          615
61      2003662             201       40.80843       -73.91332          345
62      2091294 

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



  5440    1
404            EAST TREMONT AVENUE             2  5435    5
405             SOUTHERN BOULEVARD             2  2724  161
406                 RANDALL AVENUE             2  5481   48
407                    BOSTON ROAD             2  2991   48
408               EAST  174 STREET             2  2998  135
409                    VYSE AVENUE             2  2991   91
410            EAST TREMONT AVENUE             2  5427    8
411                 CROTONA AVENUE             2  2942    1
412                  FULTON AVENUE             2  2930    1
413                    CLAY AVENUE             2  2889    5
414               EAST  173 STREET             2  2888   39
415                BATHGATE AVENUE             2  2913    1
416                  FULTON AVENUE             2  2929   82
417               EAST  171 STREET             2  2927   46
418                  FULTON AVENUE             2  2927   36
419               EAST  162 STREET             2  2421   22
420               EAST  162 

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




1499          AP PROFESSIONAL ENG PLLC       1 DOB NOW
1500          AP PROFESSIONAL ENG PLLC       1 DOB NOW
1501                  LAWSON CHRISTIAN       1 DOB NOW
1502                ASR ENGINEERING PC       1 DOB NOW
1503           ENTHINK ENGINEERING LLC       1 DOB NOW
1504       AK CONSULTING SERVICES  INC       0 DOB NOW
1505                                PR       1 DOB NOW
1506                  LAWSON CHRISTIAN       1 DOB NOW
1507         MJE PROFESSIONAL SERVICES       1 DOB NOW
1508                ASR ENGINEERING PC       1 DOB NOW
1509      PAUL PERDEK  PROF. ENG. PLLC       1 DOB NOW
1510           ENTHINK ENGINEERING LLC       1 DOB NOW
1511                ASR ENGINEERING PC       1 DOB NOW
1512                ASR ENGINEERING PC       1 DOB NOW
1513                ASR ENGINEERING PC       1 DOB NOW
1514         MJE PROFESSIONAL SERVICES       1 DOB NOW
1515                 AME SERVICES INC.       0 DOB NOW
1516      RIGID STRUCTURAL DESIGN  LLC       1 DOB NOW
1517     

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




588   6000 Census Tract 210.02, Bronx County, New York                8192
589   6000 Census Tract 210.02, Bronx County, New York                8192
590   6000 Census Tract 210.02, Bronx County, New York                8192
591   6000 Census Tract 210.02, Bronx County, New York                8192
592   6000 Census Tract 210.02, Bronx County, New York                8192
593   6000 Census Tract 210.02, Bronx County, New York                8192
594   4000    Census Tract 211, Bronx County, New York                6036
595   3000    Census Tract 211, Bronx County, New York                6036
596   1000    Census Tract 211, Bronx County, New York                6036
597   4002    Census Tract 211, Bronx County, New York                6036
598   5000    Census Tract 212, Bronx County, New York                5157
599   5000    Census Tract 212, Bronx County, New York                5157
600   5000    Census Tract 212, Bronx County, New York                5157
601   5000    Census Tra

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



                     2392                    NA
360                        2                     2024                    NA
361                        2                     2024                    NA
362                        2                     2024                    NA
363                        2                     2024                    NA
364                        2                     2024                    NA
365                        2                     2024                    NA
366                        2                     2024                    NA
367                        2                     2024                    NA
368                        2                     2024                    NA
369                        2                     2024                    NA
370                        2                     2024                    NA
371                        2                     2024                    NA
372                        2            

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



          241
1250                  28850            825               972          241
1251                  28850            825               972          241
1252                  28850            825               972          241
1253                  28850            825               972          241
1254                  28850            825               972          241
1255                  28850            825               972          241
1256                  28850            825               972          241
1257                  28850            825               972          241
1258                  28850            825               972          241
1259                  28850            825               972          241
1260                  28850            825               972          241
1261                  28850            825               972          241
1262                  28850            825               972          241
1263                  28

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



              9951
1147              9951
1148              7190
1149              7190
1150              3045
1151              3045
1152             15197
1153             15197
1154              5001
1155              5001
1156              5001
1157              5001
1158              5001
1159              5001
1160              5001
1161              5001
1162              5001
1163              5001
1164              5001
1165              5001
1166             12995
1167             12995
1168             12995
1169             12995
1170             12995
1171             21572
1172             21572
1173             21572
1174             21572
1175             21572
1176             21572
1177             21572
1178             21572
1179             21572
1180              8288
1181              8288
1182              7460
1183              7460
1184              7460
1185              7460
1186              7460
1187              7460
1188              7460
1189           

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



   185762  140.98000      2100         39
40                Constructed    22589  143.24890      2100         21
41                Constructed   225837  146.54000      2100         28
42                Constructed   231380  146.49429      2100         38
43                Constructed   710023  146.93000      2100         43
44                Constructed    68753  178.67000      2100         49
45                Constructed   297008   64.36000      2100         40
46                Constructed   470858   59.02000      2100         33
47                Constructed   636210   60.32000      2100         44
48                Constructed   446661   57.57000      2100         46
49                Constructed   267812  173.24000      2100         18
50                Constructed   333309  169.50000      2100         17
51                Constructed    78665  169.73000      2100         17
52                Constructed   482800  168.73000      2100         18
53                Constructed   762

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



 2029290082 Photogramm
417           0         0 2029270046 2029270046 Photogramm
418           0         0 2029270036 2029270036 Photogramm
419           0         0 2024210022 2024210022 Photogramm
420           0         0 2024210024 2024210024 Photogramm
421           0         0 2024220021 2024220021 Photogramm
422           0         0 2024220004 2024220004 Photogramm
423           0         0 2024230053 2024230053 Photogramm
424           0         0 2024390001 2024390001 Photogramm
425           0         0 2024390090 2024390090 Photogramm
426           0         0 2024390080 2024390080 Photogramm
427           0         0 2024340080 2024340080 Other (Man
428           0         0 2024340074 2024340074 Photogramm
429           0         0 2024390090 2024390090 Photogramm
430           0         0 2024390080 2024390080 Photogramm
431           0         0 2024390001 2024390001 Photogramm
432           0         0 2024370024 2024370024 Photogramm
433           0         0 2024370

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



           1      0.015662412
1106 {CBB3DB85-B285-409F-ADA1-7E51B572B279}           1      0.008769683
1107 {26A4F0E4-88E6-47E0-8478-787F592148CF}           1      0.006487389
1108 {A489C09F-85EF-4BEA-A8D3-B657FEBE2471}           1      0.006336785
1109 {5E314456-1150-41F5-91BC-049DC012E018}           1      0.007239490
1110 {CECC1E9C-FACC-4F4D-9E41-6542AD1F0396}           1      0.011861752
1111 {07CC2770-0158-46BD-899F-B5A054C49067}           1      0.011929561
1112 {CB21F904-868D-4DA6-A0B6-B12721881007}           1      0.012355492
1113 {196E29C8-2936-4E5D-BA7F-BA5C6613DC0D}           1      0.064595945
1114 {B0AB66F0-8857-4E94-8A64-B867738BA2A2}           1      0.059944009
1115 {B5AA4A98-350D-4230-A6C9-52F1051935BB}           1      0.013641854
1116 {7B45F0A5-ABB0-4385-8061-A4421BC602F9}           1      0.008504292
1117 {CB710C5A-9241-4C4D-B183-BB6F3D829C08}           1      0.012897005
1118 {4126167A-8D95-46BD-A24E-F80DE81EBFA6}           1      0.014092718
1119 {79032BD5-EC04-4

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [17]:
%%R 
colnames(df_with_predictions)

 [1] "GEOID"                     "active_shed_licenses"     
 [3] "Job.Number"                "Borough.Name"             
 [5] "Count.Permits"             "First.Permit.Date"        
 [7] "Current.Date"              "Age..in.years."           
 [9] "Permit.Expiration.Date"    "Sidewalk.Shed.Linear.Feet"
[11] "Construction.Material"     "Current.Job.Status"       
[13] "BIN.Number"                "Community.Board"          
[15] "Latitude.Point"            "Longitude.Point"          
[17] "House.Number"              "Street.Name"              
[19] "Borough.Digit"             "Block"                    
[21] "Lot"                       "Applicant.Business.Name"  
[23] "ProCert"                   "Source"                   
[25] "activity"                  "Commercial"               
[27] "STATE"                     "COUNTY"                   
[29] "TRACT"                     "BLOCK"                    
[31] "NAME.x"                    "population_estimate"      
[33] "black_african_esti

#### Outlier buildings:

In [21]:
%%R 
# this is based on the logistics model
df_with_predictions %>%
  filter(prediction == 1, active_shed == 1) %>%       
  arrange(CNSTRCT_YR) %>%                      
  select(CNSTRCT_YR, GROUNDELEV, prediction_proba, Age..in.years., active_shed_licenses, Job.Number, House.Number, Street.Name, Borough.Name) %>%
  head(10) 

   CNSTRCT_YR GROUNDELEV prediction_proba Age..in.years. active_shed_licenses
1        1807          5       0.04328061      1.2136986                   23
2        1813         42       0.04356312      1.9972603                   21
3        1819          6       0.04537271      0.6547945                   23
4        1819          6       0.04540078      0.6520548                   23
5        1819          5       0.04210177      0.4520548                   23
6        1829         39       0.04408301      1.4684932                   21
7        1830         87       0.04069715      0.5452055                   10
8        1831         27       0.04806410      0.4219178                   24
9        1832         43       0.04427896      0.1178082                   18
10       1835          9       0.04179784      1.8986301                   15
     Job.Number House.Number      Street.Name Borough.Name
1  M00982341-I1          160     SOUTH STREET    Manhattan
2  M00865840-I1         

In [22]:
%%R 

df_with_predictions %>%
  filter(prediction == 1, active_shed == 1) %>%       
  arrange(-GROUNDELEV) %>%                      
  select(CNSTRCT_YR, GROUNDELEV, prediction_proba, Age..in.years., active_shed_licenses, Job.Number, House.Number, Street.Name, Borough.Name) %>%
  head(10) 

   CNSTRCT_YR GROUNDELEV prediction_proba Age..in.years. active_shed_licenses
1        1961        238       0.16084636      0.5369863                    4
2        1939        235       0.08424024      3.1917808                    8
3        1973        218       0.16202190      3.5123288                    6
4        1973        213       0.09017517      3.7698630                    6
5        1926        206       0.06727767      3.6904110                   12
6        1926        206       0.06839446      3.6876712                   12
7        1929        202       0.06361176      6.9013699                    7
8        1929        202       0.06361176      0.9506849                    7
9        1939        201       0.13029866      3.1917808                    8
10       1939        197       0.11084236      3.1917808                    8
     Job.Number House.Number       Street.Name Borough.Name
1  X01125006-I1         5800  ARLINGTON AVENUE        Bronx
2  M00680578-I1       

In [20]:
%%R 

df_with_predictions %>%
  filter(prediction == 1, active_shed == 1) %>%       
  arrange(-Age..in.years.) %>%                      
  select(CNSTRCT_YR, GROUNDELEV, prediction_proba, Age..in.years., active_shed_licenses, Job.Number, House.Number, Street.Name, Borough.Name) %>%
  head(10) 

   CNSTRCT_YR GROUNDELEV prediction_proba Age..in.years. active_shed_licenses
1        1914         26       0.04362753       13.37534                    8
2        1923         70       0.06167600       13.36712                    6
3        1931         18       0.03850009       13.17808                   15
4        1965         24       0.99730255       13.03288                   28
5        1915         76       0.08020376       11.64932                   29
6        1904         60       0.11178395       11.41370                   27
7        1913         35       0.28586435       10.40274                   17
8        1855         26       0.03763289       10.09589                   28
9        1973         22       0.11436440       10.05753                    8
10       1973         15       0.11803367       10.03562                    8
   Job.Number House.Number       Street.Name Borough.Name
1   420516873        41-43         28 STREET       Queens
2   220161278          900

In [23]:
%%R 

df_with_predictions %>%
  filter(prediction == 1, active_shed == 1) %>%       
  arrange(-active_shed_licenses) %>%                      
  select(CNSTRCT_YR, GROUNDELEV, prediction_proba, Age..in.years., active_shed_licenses, Job.Number, House.Number, Street.Name, Borough.Name) %>%
  head(10) 

   CNSTRCT_YR GROUNDELEV prediction_proba Age..in.years. active_shed_licenses
1        1915         31       0.08106536       8.054795                   46
2        1861         27       0.04043039       7.909589                   46
3        1948         19       0.18377090       4.394521                   46
4        1910         32       0.61968898       3.572603                   46
5        1860         31       0.04871951       3.594521                   46
6        1920         23       0.08458811       3.339726                   46
7        1915          9       0.05666409       3.241096                   46
8        1865         19       0.04396510       3.038356                   46
9        1948         19       0.18377090       3.041096                   46
10       1929         18       0.50076828       1.904110                   46
     Job.Number House.Number     Street.Name Borough.Name
1     140629862          349        BROADWAY    Manhattan
2     140638139           

In [24]:
%%R 

df_with_predictions %>%
  filter(prediction == 1, active_shed == 1) %>%       
  arrange(-prediction_proba) %>%                      
  select(CNSTRCT_YR, GROUNDELEV, prediction_proba, Age..in.years., active_shed_licenses, Job.Number, House.Number, Street.Name, Borough.Name) %>%
  head(10) 

   CNSTRCT_YR GROUNDELEV prediction_proba Age..in.years. active_shed_licenses
1        2022         40        0.9999998      9.3041096                    8
2        2022         40        0.9999998      5.1479452                    8
3        1932         15        0.9999926      2.5589041                   23
4        1930         31        0.9999912      0.9506849                   23
5        1913         33        0.9999875      1.2136986                   28
6        1987         75        0.9999564      0.3452055                   37
7        1970         58        0.9999488      0.2684932                   21
8        2003         35        0.9999267      0.6054795                   24
9        2003         35        0.9999267      0.9424658                   24
10       1931         68        0.9999232      1.0356164                   17
     Job.Number House.Number      Street.Name Borough.Name
1     321298387            9   DE KALB AVENUE     Brooklyn
2  B00326128-I1         

In [23]:
%%R 

conf_mat <- table(df_with_predictions$prediction, df_with_predictions$active_shed)
print(conf_mat)

# Extract values from confusion matrix
TP <- conf_mat["1", "1"]
FP <- conf_mat["1", "0"]
FN <- conf_mat["0", "1"]

# Precision and recall calculations
precision <- TP / (TP + FP) * 100
recall <- TP / (TP + FN) * 100

# Print them
cat("Precision:", round(precision, 3), "\n")
cat("Recall:", round(recall, 3), "\n")

   
          0       1
  0 1059363    5677
  1    5501    2006
Precision: 26.722 
Recall: 26.11 


In [24]:
%%R 
f1 <- 2 * (precision * recall) / (precision + recall)
cat("F1 Score:", round(f1, 3), "\n")

F1 Score: 26.412 
