In [1]:
import requests
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_colwidth", None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)


from requests.exceptions import ReadTimeout, HTTPError
import time
from time import sleep
from sodapy import Socrata

In [2]:
def get_opendata(dataset_identifier):
    client = Socrata("data.cityofnewyork.us", app_token=None)
    limit = 1000000
    offset = 0
    all_response = []

    max_retries = 5
    retry = 0

    while True:
        try:
            response = client.get(
                dataset_identifier = dataset_identifier, 
                limit=limit,
                offset=offset
            )
            if not response:
                break

            all_response.extend(response)
            offset += limit
            print(f"Working")
            retry = 0  # Reset retry after a successful request

        except Exception as e:
            retry += 1
            print(f"Retry No. {retry} due to error: {e}")
            time.sleep(2 * retry)  # Exponential backoff
            if retry > max_retries:
                print("Max retries reached. Failed")
                break
        
        print(f'Retrieved {len(all_response)} rows of data')
    return all_response


In [3]:
nycha_residential_address = "3ub5-4ph8"
nycha_residential_address_json = get_opendata(nycha_residential_address)

nycha_development_data_book = "evjd-dqpz"
nycha_development_data_book_json = get_opendata(nycha_development_data_book)



Working
Retrieved 3054 rows of data




Working
Retrieved 346 rows of data


In [4]:
import pandas as pd
pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns', 20)

In [23]:
df1 = pd.DataFrame(nycha_residential_address_json)
df2 = pd.DataFrame(nycha_development_data_book_json)

In [24]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3054 entries, 0 to 3053
Data columns (total 26 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   development                     3054 non-null   object
 1   tds                             3054 non-null   object
 2   building                        3054 non-null   object
 3   stairhall_                      3054 non-null   object
 4   borough                         3054 non-null   object
 5   house                           3054 non-null   object
 6   street                          3054 non-null   object
 7   address                         3054 non-null   object
 8   city                            3054 non-null   object
 9   state                           3054 non-null   object
 10  zip_code                        3054 non-null   object
 11  bin                             3054 non-null   object
 12  block                           3054 non-null   

In [25]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 346 entries, 0 to 345
Data columns (total 52 columns):
 #   Column                                     Non-Null Count  Dtype 
---  ------                                     --------------  ----- 
 0   data_as_of                                 346 non-null    object
 1   development                                346 non-null    object
 2   hud_amp_                                   343 non-null    object
 3   tds_                                       344 non-null    object
 4   consolidated_tds_                          340 non-null    object
 5   development_edp_                           339 non-null    object
 6   operating_edp_                             339 non-null    object
 7   hud__                                      340 non-null    object
 8   program                                    346 non-null    object
 9   method                                     343 non-null    object
 10  type                                  

In [26]:
print(df1.development.nunique())
print(df2.development.nunique())

242
346


In [27]:
columns_mask = df1.columns.str.lower().isin(df2.columns.str.lower())

df1.columns[columns_mask]

Index(['development', 'borough', 'us_congressional_district'], dtype='object')

In [29]:
pd.to_numeric(df2.total_population.str.replace(',','')).sum().astype(int)

365353

In [30]:
df2 = df2[['development', 'total_population']]
df2.shape

(346, 2)

In [31]:
merged = df2.merge(df1, on='development', how='left')
merged = merged.drop_duplicates('development')
merged.shape

(346, 27)

In [33]:
merged.development.nunique()

346

In [32]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
Index: 346 entries, 0 to 3157
Data columns (total 27 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   development                     346 non-null    object
 1   total_population                338 non-null    object
 2   tds                             242 non-null    object
 3   building                        242 non-null    object
 4   stairhall_                      242 non-null    object
 5   borough                         242 non-null    object
 6   house                           242 non-null    object
 7   street                          242 non-null    object
 8   address                         242 non-null    object
 9   city                            242 non-null    object
 10  state                           242 non-null    object
 11  zip_code                        242 non-null    object
 12  bin                             242 non-null    object

In [37]:
merged[merged['latitude'].isna()]

Unnamed: 0,development,total_population,tds,building,stairhall_,borough,house,street,address,city,...,neighborhood_tabulation_area,neighborhood_tabulation_area_1,community_district,city_council_district,state_assembly_district,state_senate_district,us_congressional_district,latitude,longitude,privately_managed
183,BOSTON ROAD PLAZA,255,,,,,,,,,...,,,,,,,,,,
184,BOSTON SECOR,1182,,,,,,,,,...,,,,,,,,,,
708,DOUGLASS,3898,,,,,,,,,...,,,,,,,,,,
855,EASTCHESTER GARDENS,1852,,,,,,,,,...,,,,,,,,,,
955,LA PRECIOSA,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3153,WASHINGTON HEIGHTS REHAB PHASE IV (D),60,,,,,,,,,...,,,,,,,,,,
3154,WEEKSVILLE GARDENS,697,,,,,,,,,...,,,,,,,,,,
3155,WILLIAMS PLAZA,1290,,,,,,,,,...,,,,,,,,,,
3156,WILLIAMSBURG,2873,,,,,,,,,...,,,,,,,,,,


In [46]:
df1[df1.development.str.lower().str.contains('east')]

Unnamed: 0,development,tds,building,stairhall_,borough,house,street,address,city,state,...,neighborhood_tabulation_area,neighborhood_tabulation_area_1,community_district,city_council_district,state_assembly_district,state_senate_district,us_congressional_district,latitude,longitude,privately_managed
0,1010 EAST 178TH STREET,180,1,013,BRONX,1010,EAST 178TH STREET,1010 EAST 178TH STREET,BRONX,NY,...,BX0601,West Farms,6,15,87,32,15,40.840795,-73.880298,
367,BRYANT AVENUE-EAST 174TH STREET,235,1,015,BRONX,1705,BRYANT AVENUE,1705 BRYANT AVENUE,BRONX,NY,...,BX0303,Crotona Park East,3,17,79,32,14,40.835934,-73.885745,YES
646,COLLEGE AVENUE-EAST 165TH STREET,236,1,002,BRONX,1020,COLLEGE AVENUE,1020 COLLEGE AVENUE,BRONX,NY,...,BX0401,Concourse-Concourse Village,4,16,77,32,15,40.829265,-73.915109,
736,EAST 152ND STREET-COURTLANDT AVENUE,237,2,011,BRONX,370,EAST 153RD STREET,370 EAST 153RD STREET,BRONX,NY,...,BX0102,Melrose,1,17,84,29,15,40.818853,-73.917644,
737,EAST 152ND STREET-COURTLANDT AVENUE,237,1,010,BRONX,372,EAST 152ND STREET,372 EAST 152ND STREET,BRONX,NY,...,BX0102,Melrose,1,17,84,29,15,40.818140,-73.917880,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2212,RED HOOK EAST,4,7,023,BROOKLYN,774,HENRY STREET,774 HENRY STREET,BROOKLYN,NY,...,BK0601,Carroll Gardens-Cobble Hill-Gowanus-Red Hook,6,38,51,26,10,40.675198,-74.004683,
2213,RED HOOK EAST,4,9,029,BROOKLYN,123,LORRAINE STREET,123 LORRAINE STREET,BROOKLYN,NY,...,BK0601,Carroll Gardens-Cobble Hill-Gowanus-Red Hook,6,38,51,26,10,40.675198,-74.004683,
2214,RED HOOK EAST,4,5,017,BROOKLYN,748,HENRY STREET,748 HENRY STREET,BROOKLYN,NY,...,BK0601,Carroll Gardens-Cobble Hill-Gowanus-Red Hook,6,38,51,26,10,40.675198,-74.004683,
2636,TELLER AVENUE-EAST 166TH STREET,223,1,001,BRONX,1100,TELLER AVENUE,1100 TELLER AVENUE,BRONX,NY,...,BX0401,Concourse-Concourse Village,4,16,77,32,15,40.830031,-73.912611,
