In [None]:
# Install Request 
pip install request

In [None]:
import pandas as pd
import requests

API_KEY = "<API Key>"

nyc_zips = set([
    '10001', '10002', '10003', '10004', '10005', '10006', '10007', '10009', '10010',
    '10011', '10012', '10013', '10014', '10016', '10017', '10018', '10019', '10021', '10022', '10023',
    '10024', '10025', '10026', '10027', '10028', '10029', '10030', '10031', '10032', '10033', '10034',
    '10035', '10036', '10037', '10038', '10039', '10040', '10044', '10065', '10069', '10075', '10128',
    '10301', '10302', '10303', '10304', '10305', '10306', '10307', '10308', '10309', '10310', '10312',
    '10314', '10451', '10452', '10453', '10454', '10455', '10456', '10457', '10458', '10459', '10460',
    '10461', '10462', '10463', '10464', '10465', '10466', '10467', '10468', '10469', '10470', '10471',
    '10472', '10473', '10474', '10475', '11004', '11005', '11101', '11102', '11103', '11104', '11105',
    '11106', '11354', '11355', '11356', '11357', '11358', '11360', '11361', '11362', '11363', '11364',
    '11365', '11366', '11367', '11368', '11369', '11370', '11372', '11373', '11374', '11375', '11377',
    '11378', '11379', '11385', '11411', '11412', '11413', '11414', '11415', '11416', '11417', '11418',
    '11419', '11420', '11421', '11422', '11423', '11426', '11427', '11428', '11429', '11432', '11433',
    '11434', '11435', '11436', '11691', '11692', '11693', '11694', '11697', '11201', '11203', '11204',
    '11205', '11206', '11207', '11208', '11209', '11210', '11211', '11212', '11213', '11214', '11215',
    '11216', '11217', '11218', '11219', '11220', '11221', '11222', '11223', '11224', '11225', '11226',
    '11228', '11229', '11230', '11231', '11232', '11233', '11234', '11235', '11236', '11237', '11238',
    '11239'
])

years = [2019, 2020, 2021, 2022]
variables = {
    "B01001_001E": "Total Population",
    "B01001_002E": "Male Population",
    "B01001_026E": "Female Population"
}

records = []

for year in years:
    print(f"\n📦 Fetching data for {year}")
    var_string = ",".join(variables.keys())

    if year == 2019:
        # Query all ZCTAs once for 2019
        url = (
            f"https://api.census.gov/data/{year}/acs/acs5"
            f"?get=NAME,{var_string}&for=zip%20code%20tabulation%20area:*&key={API_KEY}"
        )
        try:
            r = requests.get(url)
            if r.status_code == 200:
                data = r.json()
                headers = data[0]
                for row in data[1:]:
                    zcta = row[-1]
                    if zcta in nyc_zips:
                        record = dict(zip(headers, row))
                        record["Year"] = year
                        records.append(record)
                        #print(f"✅ 2019 ZIP {zcta}")
                    else:
                        continue
            else:
                print(f"❌ 2019 wildcard query failed: {r.status_code}")
        except Exception as e:
            print(f"❌ Error in 2019 wildcard query: {e}")
    else:
        # ZIP-by-ZIP for 2020–2022
        for zcta in nyc_zips:
            url = (
                f"https://api.census.gov/data/{year}/acs/acs5"
                f"?get=NAME,{var_string}&for=zip%20code%20tabulation%20area:{zcta}&key={API_KEY}"
            )
            try:
                r = requests.get(url)
                if r.status_code == 200:
                    data = r.json()
                    headers, values = data[0], data[1]
                    record = dict(zip(headers, values))
                    record["Year"] = year
                    records.append(record)
                    #print(f"✅ {year} ZIP {zcta}")
                else:
                    print(f"❌ {year} ZIP {zcta}: {r.status_code}")
            except Exception as e:
                print(f"❌ Error {year} ZIP {zcta}: {e}")

# Build DataFrame
df = pd.DataFrame(records)
df.rename(columns={
    "zip code tabulation area": "ZIP",
    "B01001_001E": "Total Population",
    "B01001_002E": "Male Population",
    "B01001_026E": "Female Population"
}, inplace=True)

# Convert numerics
for col in ["Total Population", "Male Population", "Female Population"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

df = df[["ZIP", "Year", "Total Population", "Male Population", "Female Population"]]

# Merge with existing income dataset
df_income = pd.read_csv("nyc_zip_income_2019_2022_merged.csv")

df["ZIP"] = df["ZIP"].astype(str)
df_income["ZIP"] = df_income["ZIP"].astype(str)

df_final = pd.merge(df_income, df, on=["ZIP", "Year"], how="left")

# Save final file
df_final.to_csv("nyc_zip_income_pop_gender_2019_2022.csv", index=False)
print("\n✅ Final saved: nyc_zip_income_pop_gender_2019_2022.csv")