In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
import requests

In [3]:
# Define the base URL for the Census API
BASE_URL = "https://api.census.gov/data/2023/acs/acs5"

# Define the variables to request
variables = [
    "NAME",
    "B01003_001E",  # Total Population
    "B03002_003E",  # White alone, Non-Hispanic
    "B03002_004E",  # Black or African American alone, Non-Hispanic
    "B03002_012E",  # Hispanic or Latino (of any race)
    "B03002_006E",  # Asian alone, Non-Hispanic
    "B19013_001E"   # Median Household Income
]

# Define the API parameters
params = {
    "get": ",".join(variables),
    "for": "tract:*",
    "in": "state:06",  # California FIPS code is 06
    "key": "acd34ae166e6b374193071dac5d9090c7825cd3f"  # Replace with your Census API Key
}

# Make the API request
response = requests.get(BASE_URL, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    # Convert response to DataFrame
    df = pd.DataFrame(data[1:], columns=data[0])
    # Rename columns for readability
    df.rename(columns={
        "NAME": "Name",
        "B01003_001E": "Total_Population",
        "B03002_003E": "White_NonHispanic",
        "B03002_004E": "Black_NonHispanic",
        "B03002_012E": "Hispanic",
        "B03002_006E": "Asian_NonHispanic",
        "B19013_001E": "Median_Household_Income",
        "state": "State",
        "county": "County",
        "tract": "Tract"
    }, inplace=True)

# Display the first few rows
df.head()

Unnamed: 0,Name,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract
0,Census Tract 4001; Alameda County; California,3094,2107,137,200,462,250001,6,1,400100
1,Census Tract 4002; Alameda County; California,2093,1408,43,196,256,225880,6,1,400200
2,Census Tract 4003; Alameda County; California,5727,3365,524,497,609,157731,6,1,400300
3,Census Tract 4004; Alameda County; California,4395,2645,433,604,422,159612,6,1,400400
4,Census Tract 4005; Alameda County; California,3822,1696,911,557,306,96250,6,1,400500


In [4]:
# Extract text before the first semicolon
df["Census_Tract_Clean"] = df["Name"].str.split(";").str[0].str.strip()

# Drop the original NAME column if no longer needed
df.drop(columns=["Name"], inplace=True)

# Display cleaned data
df

Unnamed: 0,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract,Census_Tract_Clean
0,3094,2107,137,200,462,250001,06,001,400100,Census Tract 4001
1,2093,1408,43,196,256,225880,06,001,400200,Census Tract 4002
2,5727,3365,524,497,609,157731,06,001,400300,Census Tract 4003
3,4395,2645,433,604,422,159612,06,001,400400,Census Tract 4004
4,3822,1696,911,557,306,96250,06,001,400500,Census Tract 4005
...,...,...,...,...,...,...,...,...,...,...
9124,2072,1328,137,284,99,57356,06,115,040902,Census Tract 409.02
9125,3451,2469,45,739,0,67009,06,115,041001,Census Tract 410.01
9126,3365,2560,31,295,58,96414,06,115,041002,Census Tract 410.02
9127,2156,1749,39,237,0,38828,06,115,041101,Census Tract 411.01


In [5]:
# Ensure numeric data types
df[["Total_Population", "White_NonHispanic", "Black_NonHispanic", "Asian_NonHispanic", "Hispanic"]] = \
    df[["Total_Population", "White_NonHispanic", "Black_NonHispanic", "Asian_NonHispanic", "Hispanic"]].apply(pd.to_numeric, errors="coerce")

# Compute People of Color (POC) Share
df["POC_Share"] = 1 - (df["White_NonHispanic"] / df["Total_Population"])
df["POC_Share"] = df["POC_Share"].fillna(0)  # Replace NaNs with 0

# Assign maj_poc_flag: 1 if majority POC, else 0
df["maj_poc_flag"] = (df["POC_Share"] > 0.5).astype(int)

# Compute majority group flags
df["majority_white_flag"] = (df["White_NonHispanic"] / df["Total_Population"] > 0.5).astype(int)
df["majority_black_flag"] = (df["Black_NonHispanic"] / df["Total_Population"] > 0.5).astype(int)
df["majority_asian_flag"] = (df["Asian_NonHispanic"] / df["Total_Population"] > 0.5).astype(int)
df["majority_hispanic_flag"] = (df["Hispanic"] / df["Total_Population"] > 0.5).astype(int)

# Display updated DataFrame
df.head()

Unnamed: 0,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract,Census_Tract_Clean,POC_Share,maj_poc_flag,majority_white_flag,majority_black_flag,majority_asian_flag,majority_hispanic_flag
0,3094,2107,137,200,462,250001,6,1,400100,Census Tract 4001,0.319005,0,1,0,0,0
1,2093,1408,43,196,256,225880,6,1,400200,Census Tract 4002,0.327281,0,1,0,0,0
2,5727,3365,524,497,609,157731,6,1,400300,Census Tract 4003,0.412432,0,1,0,0,0
3,4395,2645,433,604,422,159612,6,1,400400,Census Tract 4004,0.39818,0,1,0,0,0
4,3822,1696,911,557,306,96250,6,1,400500,Census Tract 4005,0.556253,1,0,0,0,0


In [6]:
# Ensure 'Median_Household_Income' is numeric
df["Median_Household_Income"] = pd.to_numeric(df["Median_Household_Income"], errors="coerce")

# Step 1: Calculate the median income per county
county_median_income = df.groupby("County")["Median_Household_Income"].median().reset_index()
county_median_income.rename(columns={"Median_Household_Income": "County_Median_Income"}, inplace=True)

# Step 2: Merge county median income back into the main DataFrame
df = df.merge(county_median_income, on="County", how="left")

# Step 3: Create the below-median-income flag
df["bel_med_inc_flag"] = (df["Median_Household_Income"] < df["County_Median_Income"]).astype(int)

# Display the updated DataFrame
df.head()

Unnamed: 0,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract,Census_Tract_Clean,POC_Share,maj_poc_flag,majority_white_flag,majority_black_flag,majority_asian_flag,majority_hispanic_flag,County_Median_Income,bel_med_inc_flag
0,3094,2107,137,200,462,250001,6,1,400100,Census Tract 4001,0.319005,0,1,0,0,0,123107.0,0
1,2093,1408,43,196,256,225880,6,1,400200,Census Tract 4002,0.327281,0,1,0,0,0,123107.0,0
2,5727,3365,524,497,609,157731,6,1,400300,Census Tract 4003,0.412432,0,1,0,0,0,123107.0,0
3,4395,2645,433,604,422,159612,6,1,400400,Census Tract 4004,0.39818,0,1,0,0,0,123107.0,0
4,3822,1696,911,557,306,96250,6,1,400500,Census Tract 4005,0.556253,1,0,0,0,0,123107.0,1


## Redlining 2023

In [9]:
hcd_2023_redlined = pd.read_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/data/hcd_2023_redlined.csv')
hcd_2023_redlined

  hcd_2023_redlined = pd.read_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/data/hcd_2023_redlined.csv')


Unnamed: 0,Join_Count,TARGET_FID,Join_Count_1,TARGET_FID_1,JURIS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,...,city_1,state_1,city_survey,category,grade,label,residential,commercial,industrial,fill
0,1,1,0,1,ADELANTO,San Bernardino,2023,,310310314,10352 San Marcos Ct,...,,,,,,,,,,
1,1,2,0,2,ADELANTO,San Bernardino,2023,,310310315,10368 San Marcos Ct,...,,,,,,,,,,
2,1,3,0,3,ADELANTO,San Bernardino,2023,,310310320,18786 Hampton Ln,...,,,,,,,,,,
3,1,4,0,4,ADELANTO,San Bernardino,2023,,310310320,18792 Hampton Ln,...,,,,,,,,,,
4,1,5,0,5,ADELANTO,San Bernardino,2023,,310310329,10353 San Marcos Ct,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123755,1,123756,0,123756,YUCCA VALLEY,San Bernardino,2023,,0598-601-10,58855 MEREDITH CT,...,,,,,,,,,,
123756,1,123757,0,123757,YUCCA VALLEY,San Bernardino,2023,,0598-601-12,58871 MEREDITH CT,...,,,,,,,,,,
123757,1,123758,0,123758,YUCCA VALLEY,San Bernardino,2023,,0601-021-43,57750 PAXTON RD,...,,,,,,,,,,
123758,1,123759,0,123759,YUCCA VALLEY,San Bernardino,2023,,0601-081-66,59045 WILCOX LN,...,,,,,,,,,,


In [10]:
# Ensure NO_BUILDING_PERMITS is numeric to avoid errors
hcd_2023_redlined["NO_BUILDING_PERMITS"] = pd.to_numeric(hcd_2023_redlined["NO_BUILDING_PERMITS"], errors="coerce")

# Group by NAMELSAD and sum NO_BUILDING_PERMITS
grouped_hcd_2023 = hcd_2023_redlined.groupby("NAMELSAD", as_index=False)["NO_BUILDING_PERMITS"].sum()

# Display the grouped dataset
grouped_hcd_2023.head()

Unnamed: 0,NAMELSAD,NO_BUILDING_PERMITS
0,Census Tract 1,60
1,Census Tract 1.01,441
2,Census Tract 1.02,176
3,Census Tract 1.03,24
4,Census Tract 1.04,154


In [11]:
# Ensure both key columns are strings
#grouped_hcd_2023["NAMELSAD"] = grouped_hcd_2023["NAMELSAD"].astype(str).str.strip()
#df["Census_Tract_Clean"] = df["Census_Tract_Clean"].astype(str).str.strip()

# Perform the merge on NAMELSAD and Census_Tract_Clean
race_income_tract = df.merge(grouped_hcd_2023, left_on="Census_Tract_Clean", right_on="NAMELSAD", how="inner")

# Display the merged DataFrame
race_income_tract.head()
#race_income_dash_df.to_csv('race_income_dash_df.csv')

Unnamed: 0,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract,Census_Tract_Clean,POC_Share,maj_poc_flag,majority_white_flag,majority_black_flag,majority_asian_flag,majority_hispanic_flag,County_Median_Income,bel_med_inc_flag,NAMELSAD,NO_BUILDING_PERMITS
0,3094,2107,137,200,462,250001,6,1,400100,Census Tract 4001,0.319005,0,1,0,0,0,123107.0,0,Census Tract 4001,0
1,2093,1408,43,196,256,225880,6,1,400200,Census Tract 4002,0.327281,0,1,0,0,0,123107.0,0,Census Tract 4002,4
2,5727,3365,524,497,609,157731,6,1,400300,Census Tract 4003,0.412432,0,1,0,0,0,123107.0,0,Census Tract 4003,3
3,4395,2645,433,604,422,159612,6,1,400400,Census Tract 4004,0.39818,0,1,0,0,0,123107.0,0,Census Tract 4004,10
4,3822,1696,911,557,306,96250,6,1,400500,Census Tract 4005,0.556253,1,0,0,0,0,123107.0,1,Census Tract 4005,10


In [12]:
# Ensure NO_BUILDING_PERMITS is numeric
race_income_tract["NO_BUILDING_PERMITS"] = pd.to_numeric(race_income_tract["NO_BUILDING_PERMITS"], errors="coerce")

# Step 1: Calculate ADUs in majority POC tracts per county
majority_poc_tracts = race_income_tract[race_income_tract["maj_poc_flag"] == 1]
adu_majority_poc = majority_poc_tracts.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
adu_majority_poc.rename(columns={"NO_BUILDING_PERMITS": "ADUs_in_Majority_POC_Tracts"}, inplace=True)

# Step 2: Calculate total ADUs per county
total_adus = race_income_tract.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
total_adus.rename(columns={"NO_BUILDING_PERMITS": "Total_ADUs_in_County"}, inplace=True)

# Step 3: Merge both datasets
adu_share_df = total_adus.merge(adu_majority_poc, on="County", how="left")

# Step 4: Calculate the share of ADUs in majority POC tracts
adu_share_df["Share_ADUs_in_Majority_POC_Tracts"] = (
    adu_share_df["ADUs_in_Majority_POC_Tracts"] / adu_share_df["Total_ADUs_in_County"]
).fillna(0).round(3) * 100  # Convert to percentage

# Display the final DataFrame
adu_share_df.head()

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts
0,1,6220,5608.0,90.2
1,3,54,,0.0
2,5,1640,90.0,5.5
3,7,5185,567.0,10.9
4,9,1060,,0.0


In [13]:
# Step 1: Count the number of majority POC census tracts per county
maj_poc_tract_count = race_income_tract[race_income_tract["maj_poc_flag"] == 1].groupby("County", as_index=False)["Census_Tract_Clean"].nunique()

# Rename column for clarity
maj_poc_tract_count.rename(columns={"Census_Tract_Clean": "Majority_POC_Tracts_Count"}, inplace=True)

# Step 2: Merge with the existing ADU share DataFrame
adu_share_df = adu_share_df.merge(maj_poc_tract_count, on="County", how="left")

# Fill NaN values with 0 (if no majority POC tracts in a county)
adu_share_df["Majority_POC_Tracts_Count"] = adu_share_df["Majority_POC_Tracts_Count"].fillna(0).astype(int)

# Display the updated DataFrame
adu_share_df.head()

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts,Majority_POC_Tracts_Count
0,1,6220,5608.0,90.2,278
1,3,54,,0.0,0
2,5,1640,90.0,5.5,1
3,7,5185,567.0,10.9,7
4,9,1060,,0.0,0


In [14]:
# Step 1: Count the total number of census tracts per county
total_tracts_per_county = race_income_tract.groupby("County", as_index=False)["Census_Tract_Clean"].nunique()
total_tracts_per_county.rename(columns={"Census_Tract_Clean": "Total_Tracts_in_County"}, inplace=True)

# Step 2: Merge total tracts with the existing DataFrame
adu_share_df = adu_share_df.merge(total_tracts_per_county, on="County", how="left")

# Step 3: Calculate the share of majority POC tracts in each county
adu_share_df["Share_Majority_POC_Tracts"] = (
    adu_share_df["Majority_POC_Tracts_Count"] / adu_share_df["Total_Tracts_in_County"]
).fillna(0).round(3) * 100  # Convert to percentage

# Display the updated DataFrame
adu_share_df.head()

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts,Majority_POC_Tracts_Count,Total_Tracts_in_County,Share_Majority_POC_Tracts
0,1,6220,5608.0,90.2,278,343,81.0
1,3,54,,0.0,0,1,0.0
2,5,1640,90.0,5.5,1,10,10.0
3,7,5185,567.0,10.9,7,54,13.0
4,9,1060,,0.0,0,14,0.0


In [15]:
adu_share_df["Share_ADUs_in_Majority_POC_Tracts"] = adu_share_df["Share_ADUs_in_Majority_POC_Tracts"].fillna(0)
adu_share_df.head()

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts,Majority_POC_Tracts_Count,Total_Tracts_in_County,Share_Majority_POC_Tracts
0,1,6220,5608.0,90.2,278,343,81.0
1,3,54,,0.0,0,1,0.0
2,5,1640,90.0,5.5,1,10,10.0
3,7,5185,567.0,10.9,7,54,13.0
4,9,1060,,0.0,0,14,0.0


In [16]:
adu_share_df = adu_share_df.fillna(0)

# Ensure all numeric columns are properly converted
cols_to_convert = ["ADUs_in_Majority_POC_Tracts", "Share_ADUs_in_Majority_POC_Tracts"]
adu_share_df[cols_to_convert] = adu_share_df[cols_to_convert].apply(pd.to_numeric, errors="coerce").fillna(0)


In [17]:
# Dictionary mapping California county FIPS codes to county names
county_fips_to_name = {
    "001": "Alameda County",
    "003": "Alpine County",
    "005": "Amador County",
    "007": "Butte County",
    "009": "Calaveras County",
    "011": "Colusa County",
    "013": "Contra Costa County",
    "015": "Del Norte County",
    "017": "El Dorado County",
    "019": "Fresno County",
    "021": "Glenn County",
    "023": "Humboldt County",
    "025": "Imperial County",
    "027": "Inyo County",
    "029": "Kern County",
    "031": "Kings County",
    "033": "Lake County",
    "035": "Lassen County",
    "037": "Los Angeles County",
    "039": "Madera County",
    "041": "Marin County",
    "043": "Mariposa County",
    "045": "Mendocino County",
    "047": "Merced County",
    "049": "Modoc County",
    "051": "Mono County",
    "053": "Monterey County",
    "055": "Napa County",
    "057": "Nevada County",
    "059": "Orange County",
    "061": "Placer County",
    "063": "Plumas County",
    "065": "Riverside County",
    "067": "Sacramento County",
    "069": "San Benito County",
    "071": "San Bernardino County",
    "073": "San Diego County",
    "075": "San Francisco County",
    "077": "San Joaquin County",
    "079": "San Luis Obispo County",
    "081": "San Mateo County",
    "083": "Santa Barbara County",
    "085": "Santa Clara County",
    "087": "Santa Cruz County",
    "089": "Shasta County",
    "091": "Sierra County",
    "093": "Siskiyou County",
    "095": "Solano County",
    "097": "Sonoma County",
    "099": "Stanislaus County",
    "101": "Sutter County",
    "103": "Tehama County",
    "105": "Trinity County",
    "107": "Tulare County",
    "109": "Tuolumne County",
    "111": "Ventura County",
    "113": "Yolo County",
    "115": "Yuba County"
}

# Apply the mapping to your DataFrame
adu_share_df["County"] = adu_share_df["County"].map(county_fips_to_name)

# Display the updated DataFrame
adu_share_df

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts,Majority_POC_Tracts_Count,Total_Tracts_in_County,Share_Majority_POC_Tracts
0,Alameda County,6220,5608.0,90.2,278,343,81.0
1,Alpine County,54,0.0,0.0,0,1,0.0
2,Amador County,1640,90.0,5.5,1,10,10.0
3,Butte County,5185,567.0,10.9,7,54,13.0
4,Calaveras County,1060,0.0,0.0,0,14,0.0
5,Colusa County,1109,1109.0,100.0,6,6,100.0
6,Contra Costa County,3346,2310.0,69.0,127,213,59.6
7,Del Norte County,1329,10.0,0.8,1,8,12.5
8,El Dorado County,806,28.0,3.5,3,54,5.6
9,Fresno County,12128,11084.0,91.4,176,211,83.4


In [22]:
adu_share_df["State"] = "California"
adu_share_df

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Majority_POC_Tracts,Share_ADUs_in_Majority_POC_Tracts,Majority_POC_Tracts_Count,Total_Tracts_in_County,Share_Majority_POC_Tracts,State
0,Alameda County,6220,5608.0,90.2,278,343,81.0,California
1,Alpine County,54,0.0,0.0,0,1,0.0,California
2,Amador County,1640,90.0,5.5,1,10,10.0,California
3,Butte County,5185,567.0,10.9,7,54,13.0,California
4,Calaveras County,1060,0.0,0.0,0,14,0.0,California
5,Colusa County,1109,1109.0,100.0,6,6,100.0,California
6,Contra Costa County,3346,2310.0,69.0,127,213,59.6,California
7,Del Norte County,1329,10.0,0.8,1,8,12.5,California
8,El Dorado County,806,28.0,3.5,3,54,5.6,California
9,Fresno County,12128,11084.0,91.4,176,211,83.4,California


In [35]:
adu_share_df.to_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/outputs/race_dash.csv')

# California and Bay Area Breakdowns

In [37]:
# Convert County column to integer for proper filtering
race_income_tract["County"] = pd.to_numeric(race_income_tract["County"], errors='coerce').fillna(0).astype(int)

# Ensure numeric conversion of race columns
numeric_columns = ["Asian_NonHispanic", "Black_NonHispanic", "White_NonHispanic", "Hispanic", "Total_Population", "NO_BUILDING_PERMITS"]
race_income_tract[numeric_columns] = race_income_tract[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Step 1: Create Majority Racial Group Flags
for race in ["asian", "black", "white", "hispanic"]:
    race_col = f"{race.capitalize()}_NonHispanic" if race != "hispanic" else "Hispanic"
    flag_col = f"majority_{race}_flag"
    race_income_tract[flag_col] = ((race_income_tract[race_col] / race_income_tract["Total_Population"]) > 0.5).astype(int)

# Step 1.1: Create a Flag for Tracts with No Clear Majority
race_income_tract["no_majority_flag"] = (race_income_tract[["majority_asian_flag", "majority_black_flag", "majority_white_flag", "majority_hispanic_flag"]].sum(axis=1) == 0).astype(int)

# Step 2: Compute ADU breakdowns for majority racial group tracts
adu_majority = {}
for race in ["asian", "black", "white", "hispanic"]:
    col_name = f"majority_{race}_flag"
    adu_majority[race] = race_income_tract[race_income_tract[col_name] == 1].groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
    adu_majority[race].rename(columns={"NO_BUILDING_PERMITS": f"ADUs_in_Majority_{race.capitalize()}_Tracts"}, inplace=True)

# Compute ADU breakdown for tracts with no clear majority
adu_no_majority = race_income_tract[race_income_tract["no_majority_flag"] == 1].groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
adu_no_majority.rename(columns={"NO_BUILDING_PERMITS": "ADUs_in_No_Majority_Tracts"}, inplace=True)

# Step 3: Compute total ADUs per county
total_adus_county = race_income_tract.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
total_adus_county.rename(columns={"NO_BUILDING_PERMITS": "Total_ADUs_in_County"}, inplace=True)

# Step 4: Merge data into a final DataFrame
race_adu_df = total_adus_county
for race in ["asian", "black", "white", "hispanic"]:
    race_adu_df = race_adu_df.merge(adu_majority[race], on="County", how="left")
race_adu_df = race_adu_df.merge(adu_no_majority, on="County", how="left")

# Step 5: Compute the correct share of ADUs in majority racial group tracts
total_adus_in_majority_tracts = race_adu_df[
    [f"ADUs_in_Majority_{race}_Tracts" for race in ["Asian", "Black", "White", "Hispanic"]] + ["ADUs_in_No_Majority_Tracts"]
].sum(axis=1)

total_adus_in_majority_tracts = total_adus_in_majority_tracts.replace(0, 1)  # Prevent division by zero

for race in ["Asian", "Black", "White", "Hispanic"]:
    adu_col = f"ADUs_in_Majority_{race}_Tracts"
    share_col = f"Share_ADUs_in_Majority_{race}_Tracts"
    race_adu_df[share_col] = (
        race_adu_df[adu_col] / total_adus_in_majority_tracts
    ).fillna(0).round(3) * 100

# Compute share of ADUs in tracts with no clear majority
race_adu_df["Share_ADUs_in_No_Majority_Tracts"] = (
    race_adu_df["ADUs_in_No_Majority_Tracts"] / total_adus_in_majority_tracts
).fillna(0).round(3) * 100

# Step 6: Compute racial breakdowns for California as a whole
total_tracts_ca = len(race_income_tract)
total_majority = {race: race_income_tract[f"majority_{race}_flag"].sum() for race in ["asian", "black", "white", "hispanic"]}
total_no_majority = race_income_tract["no_majority_flag"].sum()

total_majority_sum = sum(total_majority.values()) + total_no_majority or 1  # Prevent division by zero

# Compute majority racial tract shares, ensuring sum to 100%
share_majority_ca = {
    race: ((total_majority[race] / total_majority_sum) * 100) if total_majority_sum > 0 else 0
    for race in ["asian", "black", "white", "hispanic"]
}
share_majority_ca["No_Majority"] = (total_no_majority / total_majority_sum) * 100 if total_majority_sum > 0 else 0

# Store California-wide results
california_race_breakdown = pd.DataFrame({
    "Region": ["California"],
    "Total_Tracts": [total_tracts_ca],
    **{f"Majority_{race.capitalize()}_Tracts": [total_majority[race]] for race in total_majority},
    "No_Majority_Tracts": [total_no_majority],
    **{f"Share_Majority_{race.capitalize()}_Tracts": [share_majority_ca[race]] for race in share_majority_ca}
})

# Display the updated DataFrames
california_race_breakdown.head()


Unnamed: 0,Region,Total_Tracts,Majority_Asian_Tracts,Majority_Black_Tracts,Majority_White_Tracts,Majority_Hispanic_Tracts,No_Majority_Tracts,Share_Majority_Asian_Tracts,Share_Majority_Black_Tracts,Share_Majority_White_Tracts,Share_Majority_Hispanic_Tracts,Share_Majority_No_majority_Tracts
0,California,8037,407,47,2656,2645,2282,5.064079,0.584795,33.047157,32.91029,28.393679


In [39]:
# Convert County column to integer for proper filtering
race_income_tract["County"] = pd.to_numeric(race_income_tract["County"], errors='coerce').fillna(0).astype(int)

# Ensure numeric conversion of race columns
numeric_columns = ["Asian_NonHispanic", "Black_NonHispanic", "White_NonHispanic", "Hispanic", "Total_Population", "NO_BUILDING_PERMITS"]
race_income_tract[numeric_columns] = race_income_tract[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Step 1: Create Majority Racial Group Flags
for race in ["asian", "black", "white", "hispanic"]:
    race_col = f"{race.capitalize()}_NonHispanic" if race != "hispanic" else "Hispanic"
    flag_col = f"majority_{race}_flag"
    race_income_tract[flag_col] = ((race_income_tract[race_col] / race_income_tract["Total_Population"]) > 0.5).astype(int)

# Step 1.1: Create a Flag for Tracts with No Clear Majority
race_income_tract["no_majority_flag"] = (race_income_tract[["majority_asian_flag", "majority_black_flag", "majority_white_flag", "majority_hispanic_flag"]].sum(axis=1) == 0).astype(int)

# Step 2: Compute ADU breakdowns for majority racial group tracts
adu_majority = {}
for race in ["asian", "black", "white", "hispanic"]:
    col_name = f"majority_{race}_flag"
    adu_majority[race] = race_income_tract[race_income_tract[col_name] == 1].groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
    adu_majority[race].rename(columns={"NO_BUILDING_PERMITS": f"ADUs_in_Majority_{race.capitalize()}_Tracts"}, inplace=True)

# Compute ADU breakdown for tracts with no clear majority
adu_no_majority = race_income_tract[race_income_tract["no_majority_flag"] == 1].groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
adu_no_majority.rename(columns={"NO_BUILDING_PERMITS": "ADUs_in_No_Majority_Tracts"}, inplace=True)

# Step 3: Compute total ADUs per county
total_adus_county = race_income_tract.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
total_adus_county.rename(columns={"NO_BUILDING_PERMITS": "Total_ADUs_in_County"}, inplace=True)

# Step 4: Merge data into a final DataFrame
race_adu_df = total_adus_county
for race in ["asian", "black", "white", "hispanic"]:
    race_adu_df = race_adu_df.merge(adu_majority[race], on="County", how="left")
race_adu_df = race_adu_df.merge(adu_no_majority, on="County", how="left")

# Step 5: Compute the correct share of ADUs in majority racial group tracts
total_adus_in_majority_tracts = race_adu_df[
    [f"ADUs_in_Majority_{race}_Tracts" for race in ["Asian", "Black", "White", "Hispanic"]] + ["ADUs_in_No_Majority_Tracts"]
].sum(axis=1)

total_adus_in_majority_tracts = total_adus_in_majority_tracts.replace(0, 1)  # Prevent division by zero

for race in ["Asian", "Black", "White", "Hispanic"]:
    adu_col = f"ADUs_in_Majority_{race}_Tracts"
    share_col = f"Share_ADUs_in_Majority_{race}_Tracts"
    race_adu_df[share_col] = (
        race_adu_df[adu_col] / total_adus_in_majority_tracts
    ).fillna(0).round(3) * 100

# Compute share of ADUs in tracts with no clear majority
race_adu_df["Share_ADUs_in_No_Majority_Tracts"] = (
    race_adu_df["ADUs_in_No_Majority_Tracts"] / total_adus_in_majority_tracts
).fillna(0).round(3) * 100

# Compute total ADUs in majority race tracts for California
adu_majority_ca = {race: adu_majority[race][f"ADUs_in_Majority_{race.capitalize()}_Tracts"].sum() if not adu_majority[race].empty else 0 for race in ["asian", "black", "white", "hispanic"]}
adus_no_majority_ca = adu_no_majority["ADUs_in_No_Majority_Tracts"].sum() if not adu_no_majority.empty else 0

# Store California-wide results
california_race_breakdown = pd.DataFrame({
    "Region": ["California"],
    "Total_Tracts": [len(race_income_tract)],
    **{f"ADUs_in_Majority_{race.capitalize()}_Tracts": [adu_majority_ca[race]] for race in adu_majority_ca},
    "ADUs_in_No_Majority_Tracts": [adus_no_majority_ca],
    **{f"Share_ADUs_in_Majority_{race.capitalize()}_Tracts": [race_adu_df[f"Share_ADUs_in_Majority_{race}_Tracts"].mean()] for race in ["Asian", "Black", "White", "Hispanic"]},
    "Share_ADUs_in_No_Majority_Tracts": [race_adu_df["Share_ADUs_in_No_Majority_Tracts"].mean()]
})

# Display the updated DataFrames
california_race_breakdown.head()

Unnamed: 0,Region,Total_Tracts,ADUs_in_Majority_Asian_Tracts,ADUs_in_Majority_Black_Tracts,ADUs_in_Majority_White_Tracts,ADUs_in_Majority_Hispanic_Tracts,ADUs_in_No_Majority_Tracts,Share_ADUs_in_Majority_Asian_Tracts,Share_ADUs_in_Majority_Black_Tracts,Share_ADUs_in_Majority_White_Tracts,Share_ADUs_in_Majority_Hispanic_Tracts,Share_ADUs_in_No_Majority_Tracts
0,California,8037,7384,1284,94888,95778,83513,1.151724,0.1,55.998276,22.22069,20.524138


## Bay Area Breakdown

In [42]:
# Define Bay Area county codes (as integers, without leading zeros)
bay_area_counties = [1, 13, 41, 55, 75, 81, 85, 95, 97]

# Convert County column to integer for proper filtering
race_income_tract["County"] = pd.to_numeric(race_income_tract["County"], errors='coerce').fillna(0).astype(int)

# Filter for Bay Area counties
bay_area_data = race_income_tract[race_income_tract["County"].isin(bay_area_counties)]

# Ensure numeric conversion of race columns
numeric_columns = ["Asian_NonHispanic", "Black_NonHispanic", "White_NonHispanic", "Hispanic", "Total_Population", "NO_BUILDING_PERMITS"]
bay_area_data[numeric_columns] = bay_area_data[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Step 1: Create Majority Racial Group Flags
for race in ["asian", "black", "white", "hispanic"]:
    race_col = f"{race.capitalize()}_NonHispanic" if race != "hispanic" else "Hispanic"
    flag_col = f"majority_{race}_flag"
    bay_area_data[flag_col] = ((bay_area_data[race_col] / bay_area_data["Total_Population"]) > 0.5).astype(int)

# Step 1.1: Create a Flag for Tracts with No Clear Majority
bay_area_data["no_majority_flag"] = (bay_area_data[["majority_asian_flag", "majority_black_flag", "majority_white_flag", "majority_hispanic_flag"]].sum(axis=1) == 0).astype(int)

# Step 2: Compute ADU breakdowns for majority racial group tracts
adu_majority = {}
for race in ["asian", "black", "white", "hispanic"]:
    col_name = f"majority_{race}_flag"
    adu_majority[race] = bay_area_data[bay_area_data[col_name] == 1]["NO_BUILDING_PERMITS"].sum()

# Compute ADU breakdown for tracts with no clear majority
adu_no_majority = bay_area_data[bay_area_data["no_majority_flag"] == 1]["NO_BUILDING_PERMITS"].sum()

# Step 3: Compute total ADUs in Bay Area
total_adus_bay = bay_area_data["NO_BUILDING_PERMITS"].sum()

# Step 4: Compute total tracts and majority tract counts
total_tracts_bay = len(bay_area_data)
total_majority = {race: bay_area_data[f"majority_{race}_flag"].sum() for race in ["asian", "black", "white", "hispanic"]}
total_no_majority = bay_area_data["no_majority_flag"].sum()

# Compute ADU shares, ensuring correct recalculation
corrected_total_adus_in_majority_tracts = sum(adu_majority.values()) + adu_no_majority or 1  # Prevent division by zero
share_adu_bay = {
    race: ((adu_majority[race] / corrected_total_adus_in_majority_tracts) * 100) if corrected_total_adus_in_majority_tracts > 0 else 0
    for race in ["asian", "black", "white", "hispanic"]
}
share_adu_no_majority = (adu_no_majority / corrected_total_adus_in_majority_tracts) * 100 if corrected_total_adus_in_majority_tracts > 0 else 0

# Compute majority racial tract shares, ensuring no division errors
share_majority_bay = {
    race: ((total_majority[race] / total_tracts_bay) * 100) if total_tracts_bay > 0 else 0
    for race in ["asian", "black", "white", "hispanic"]
}
share_majority_no_majority = (total_no_majority / total_tracts_bay) * 100 if total_tracts_bay > 0 else 0

# Store Bay Area-wide results
bay_area_race_breakdown = pd.DataFrame({
    "Region": ["Bay Area"],
    "Total_Tracts": [total_tracts_bay],
    **{f"Majority_{race.capitalize()}_Tracts": [total_majority[race]] for race in total_majority},
    "No_Majority_Tracts": [total_no_majority],
    **{f"Share_ADUs_in_Majority_{race.capitalize()}_Tracts": [share_adu_bay[race]] for race in share_adu_bay},
    "Share_ADUs_in_No_Majority_Tracts": [share_adu_no_majority],
    **{f"Share_Majority_{race.capitalize()}_Tracts": [share_majority_bay[race]] for race in share_majority_bay},
    "Share_Majority_No_Majority_Tracts": [share_majority_no_majority]
})

# Display the updated DataFrames
bay_area_race_breakdown.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bay_area_data[numeric_columns] = bay_area_data[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bay_area_data[flag_col] = ((bay_area_data[race_col] / bay_area_data["Total_Population"]) > 0.5).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

Unnamed: 0,Region,Total_Tracts,Majority_Asian_Tracts,Majority_Black_Tracts,Majority_White_Tracts,Majority_Hispanic_Tracts,No_Majority_Tracts,Share_ADUs_in_Majority_Asian_Tracts,Share_ADUs_in_Majority_Black_Tracts,Share_ADUs_in_Majority_White_Tracts,Share_ADUs_in_Majority_Hispanic_Tracts,Share_ADUs_in_No_Majority_Tracts,Share_Majority_Asian_Tracts,Share_Majority_Black_Tracts,Share_Majority_White_Tracts,Share_Majority_Hispanic_Tracts,Share_Majority_No_Majority_Tracts
0,Bay Area,1571,230,5,490,171,675,8.734163,0.549389,27.447023,11.607243,51.662182,14.640356,0.318269,31.190325,10.884787,42.966264


# Income Dash

In [45]:
# Step 1: Filter for tracts below median income (bel_med_inc_flag == 1)
below_median_tracts = race_income_tract[race_income_tract["bel_med_inc_flag"] == 1]

# Step 2: Compute ADUs permitted in below-median income tracts per county
adu_below_median = below_median_tracts.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
adu_below_median.rename(columns={"NO_BUILDING_PERMITS": "ADUs_in_Below_Median_Tracts"}, inplace=True)

# Step 3: Compute total ADUs per county
total_adus_county = race_income_tract.groupby("County", as_index=False)["NO_BUILDING_PERMITS"].sum()
total_adus_county.rename(columns={"NO_BUILDING_PERMITS": "Total_ADUs_in_County"}, inplace=True)

# Step 4: Merge ADU counts (below median and total) per county
income_adu_df = total_adus_county.merge(adu_below_median, on="County", how="left")

# Step 5: Compute the share of ADUs in below-median income tracts
income_adu_df["Share_ADUs_in_Below_Median_Tracts"] = (
    income_adu_df["ADUs_in_Below_Median_Tracts"] / income_adu_df["Total_ADUs_in_County"]
).fillna(0).round(3) * 100  # Convert to percentage

# Step 6: Count total and below-median tracts per county
total_tracts_county = race_income_tract.groupby("County", as_index=False)["Census_Tract_Clean"].nunique()
total_tracts_county.rename(columns={"Census_Tract_Clean": "Total_Tracts_in_County"}, inplace=True)

below_median_tracts_county = below_median_tracts.groupby("County", as_index=False)["Census_Tract_Clean"].nunique()
below_median_tracts_county.rename(columns={"Census_Tract_Clean": "Below_Median_Tracts_Count"}, inplace=True)

# Step 7: Merge tract counts
income_adu_df = income_adu_df.merge(total_tracts_county, on="County", how="left")
income_adu_df = income_adu_df.merge(below_median_tracts_county, on="County", how="left")

# Fill NaN values with 0 (if no below-median income tracts exist in a county)
income_adu_df["Below_Median_Tracts_Count"] = income_adu_df["Below_Median_Tracts_Count"].fillna(0).astype(int)

# Step 8: Compute the share of below-median income tracts per county
income_adu_df["Share_Below_Median_Tracts"] = (
    income_adu_df["Below_Median_Tracts_Count"] / income_adu_df["Total_Tracts_in_County"]
).fillna(0).round(3) * 100  # Convert to percentage

# Display the updated DataFrame
income_adu_df.head()

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Below_Median_Tracts,Share_ADUs_in_Below_Median_Tracts,Total_Tracts_in_County,Below_Median_Tracts_Count,Share_Below_Median_Tracts
0,1,6220,3267.0,52.5,343,175,51.0
1,3,54,,0.0,1,0,0.0
2,5,1640,798.0,48.7,10,5,50.0
3,7,5185,2879.0,55.5,54,27,50.0
4,9,1060,892.0,84.2,14,7,50.0


In [47]:
med_income_short = race_income_tract.loc[:, ['County', 'County_Median_Income']].drop_duplicates(subset=['County'])
med_income_short

Unnamed: 0,County,County_Median_Income
0,1,123107.0
343,3,110781.0
344,5,82304.5
354,7,65683.0
408,9,82443.5
422,11,70653.0
428,13,129298.5
641,15,59549.0
649,17,99433.0
703,19,66301.0


In [49]:
income_adu_df = income_adu_df.merge(med_income_short, on='County', how='left')  # Change 'left' to 'inner' if needed
income_adu_df

Unnamed: 0,County,Total_ADUs_in_County,ADUs_in_Below_Median_Tracts,Share_ADUs_in_Below_Median_Tracts,Total_Tracts_in_County,Below_Median_Tracts_Count,Share_Below_Median_Tracts,County_Median_Income
0,1,6220,3267.0,52.5,343,175,51.0,123107.0
1,3,54,,0.0,1,0,0.0,110781.0
2,5,1640,798.0,48.7,10,5,50.0,82304.5
3,7,5185,2879.0,55.5,54,27,50.0,65683.0
4,9,1060,892.0,84.2,14,7,50.0,82443.5
5,11,1109,405.0,36.5,6,3,50.0,70653.0
6,13,3346,1542.0,46.1,213,103,48.4,129298.5
7,15,1329,605.0,45.5,8,3,37.5,59549.0
8,17,806,357.0,44.3,54,26,48.1,99433.0
9,19,12128,8026.0,66.2,211,106,50.2,66301.0


In [51]:
income_adu_df.to_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/outputs/income_dash.csv')

 # California Stats

In [64]:
# Total ADUs in majority POC areas
total_adus_poc = race_income_tract.loc[race_income_tract["maj_poc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

# Total ADUs in below-median-income areas
total_adus_low_income = race_income_tract.loc[race_income_tract["bel_med_inc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

print("Total ADUs in majority POC areas:", int(total_adus_poc))
print("Total ADUs in below-median-income areas:", int(total_adus_low_income))


Total ADUs in majority POC areas: 187925
Total ADUs in below-median-income areas: 148985


In [70]:
# Total ADUs built overall
total_adus_overall = race_income_tract["NO_BUILDING_PERMITS"].sum()

print("Total ADUs built overall:", int(total_adus_overall))


Total ADUs built overall: 282847


In [54]:
race_income_tract.head()

Unnamed: 0,Total_Population,White_NonHispanic,Black_NonHispanic,Hispanic,Asian_NonHispanic,Median_Household_Income,State,County,Tract,Census_Tract_Clean,...,maj_poc_flag,majority_white_flag,majority_black_flag,majority_asian_flag,majority_hispanic_flag,County_Median_Income,bel_med_inc_flag,NAMELSAD,NO_BUILDING_PERMITS,no_majority_flag
0,3094,2107,137,200,462,250001,6,1,400100,Census Tract 4001,...,0,1,0,0,0,123107.0,0,Census Tract 4001,0,0
1,2093,1408,43,196,256,225880,6,1,400200,Census Tract 4002,...,0,1,0,0,0,123107.0,0,Census Tract 4002,4,0
2,5727,3365,524,497,609,157731,6,1,400300,Census Tract 4003,...,0,1,0,0,0,123107.0,0,Census Tract 4003,3,0
3,4395,2645,433,604,422,159612,6,1,400400,Census Tract 4004,...,0,1,0,0,0,123107.0,0,Census Tract 4004,10,0
4,3822,1696,911,557,306,96250,6,1,400500,Census Tract 4005,...,1,0,0,0,0,123107.0,1,Census Tract 4005,10,1


In [56]:
# Step 1: Compute total ADUs in below-median income tracts statewide
total_adus_below_median_state = race_income_tract.loc[race_income_tract["bel_med_inc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

# Step 2: Compute total ADUs in majority POC tracts statewide
total_adus_maj_poc_state = race_income_tract.loc[race_income_tract["maj_poc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

# Step 3: Compute total ADUs permitted statewide
total_adus_state = race_income_tract["NO_BUILDING_PERMITS"].sum()

# Step 4: Compute the share of ADUs in below-median income tracts statewide
share_adus_below_median_state = (total_adus_below_median_state / total_adus_state) * 100 if total_adus_state > 0 else 0

# Step 5: Compute the share of ADUs in majority POC tracts statewide
share_adus_maj_poc_state = (total_adus_maj_poc_state / total_adus_state) * 100 if total_adus_state > 0 else 0

# Step 6: Count the number of below-median income tracts statewide
below_median_tracts_state = race_income_tract.loc[race_income_tract["bel_med_inc_flag"] == 1, "Census_Tract_Clean"].nunique()

# Step 7: Count the number of majority POC tracts statewide
maj_poc_tracts_state = race_income_tract.loc[race_income_tract["maj_poc_flag"] == 1, "Census_Tract_Clean"].nunique()

# Step 8: Count total census tracts statewide
total_tracts_state = race_income_tract["Census_Tract_Clean"].nunique()

# Step 9: Compute the share of below-median income tracts statewide
share_below_median_tracts_state = (below_median_tracts_state / total_tracts_state) * 100 if total_tracts_state > 0 else 0

# Step 10: Compute the share of majority POC tracts statewide
share_maj_poc_tracts_state = (maj_poc_tracts_state / total_tracts_state) * 100 if total_tracts_state > 0 else 0

# Step 11: Compute the statewide median income
state_median_income = race_income_tract["Median_Household_Income"].median()

# Step 12: Create a summary DataFrame for the state of California
state_summary = pd.DataFrame({
    "Region": ["California"],
    "Statewide_Median_Income": [state_median_income],
    
    # Total ADUs
    "Total_ADUs_Statewide": [total_adus_state],
    
    # Below-Median Income ADU Stats
    "ADUs_in_Below_Median_Tracts_Statewide": [total_adus_below_median_state],
    "Share_ADUs_in_Below_Median_Tracts_Statewide": [round(share_adus_below_median_state, 3)],
    
    # Majority POC ADU Stats
    "ADUs_in_Maj_POC_Tracts_Statewide": [total_adus_maj_poc_state],
    "Share_ADUs_in_Maj_POC_Tracts_Statewide": [round(share_adus_maj_poc_state, 3)],
    
    # Census Tract Stats
    "Total_Tracts_Statewide": [total_tracts_state],
    "Below_Median_Tracts_Statewide": [below_median_tracts_state],
    "Share_Below_Median_Tracts_Statewide": [round(share_below_median_tracts_state, 3)],
    "Maj_POC_Tracts_Statewide": [maj_poc_tracts_state],
    "Share_Maj_POC_Tracts_Statewide": [round(share_maj_poc_tracts_state, 3)]
})

# Display the final state-level statistics
state_summary

Unnamed: 0,Region,Statewide_Median_Income,Total_ADUs_Statewide,ADUs_in_Below_Median_Tracts_Statewide,Share_ADUs_in_Below_Median_Tracts_Statewide,ADUs_in_Maj_POC_Tracts_Statewide,Share_ADUs_in_Maj_POC_Tracts_Statewide,Total_Tracts_Statewide,Below_Median_Tracts_Statewide,Share_Below_Median_Tracts_Statewide,Maj_POC_Tracts_Statewide,Share_Maj_POC_Tracts_Statewide
0,California,94403.0,282847,148985,52.673,187925,66.441,6320,3271,51.756,4517,71.472


# Bay Area Stats

In [59]:
# Define Bay Area county FIPS codes
bay_area_fips = ["001", "013", "041", "055", "075", "081", "085", "095", "097"]

# Ensure County FIPS codes are formatted as strings (important for matching)
race_income_tract["County"] = race_income_tract["County"].astype(str).str.zfill(3)

# Filter dataset for Bay Area counties using FIPS codes
bay_area_df = race_income_tract[race_income_tract["County"].isin(bay_area_fips)]

# Debug: Check if filtering worked
print(f"Rows in Bay Area dataset: {len(bay_area_df)}")

# Step 1: Compute total ADUs in below-median income tracts in the Bay Area
total_adus_below_median_bay = bay_area_df.loc[bay_area_df["bel_med_inc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

# Step 2: Compute total ADUs in majority POC tracts in the Bay Area
total_adus_maj_poc_bay = bay_area_df.loc[bay_area_df["maj_poc_flag"] == 1, "NO_BUILDING_PERMITS"].sum()

# Step 3: Compute total ADUs permitted in the Bay Area
total_adus_bay = bay_area_df["NO_BUILDING_PERMITS"].sum()

# Step 4: Compute the share of ADUs in below-median income tracts in the Bay Area
share_adus_below_median_bay = (total_adus_below_median_bay / total_adus_bay) * 100 if total_adus_bay > 0 else 0

# Step 5: Compute the share of ADUs in majority POC tracts in the Bay Area
share_adus_maj_poc_bay = (total_adus_maj_poc_bay / total_adus_bay) * 100 if total_adus_bay > 0 else 0

# Step 6: Count the number of below-median income tracts in the Bay Area
below_median_tracts_bay = bay_area_df.loc[bay_area_df["bel_med_inc_flag"] == 1, "Census_Tract_Clean"].nunique()

# Step 7: Count the number of majority POC tracts in the Bay Area
maj_poc_tracts_bay = bay_area_df.loc[bay_area_df["maj_poc_flag"] == 1, "Census_Tract_Clean"].nunique()

# Step 8: Count total census tracts in the Bay Area
total_tracts_bay = bay_area_df["Census_Tract_Clean"].nunique()

# Step 9: Compute the share of below-median income tracts in the Bay Area
share_below_median_tracts_bay = (below_median_tracts_bay / total_tracts_bay) * 100 if total_tracts_bay > 0 else 0

# Step 10: Compute the share of majority POC tracts in the Bay Area
share_maj_poc_tracts_bay = (maj_poc_tracts_bay / total_tracts_bay) * 100 if total_tracts_bay > 0 else 0

# Step 11: Compute the Bay Area median household income
bay_area_median_income = bay_area_df["Median_Household_Income"].median()

# Step 12: Create a summary DataFrame for the Bay Area
bay_area_summary = pd.DataFrame({
    "Region": ["Bay Area"],
    "Bay_Area_Median_Income": [bay_area_median_income],
    
    # Total ADUs
    "Total_ADUs_Bay_Area": [total_adus_bay],
    
    # Below-Median Income ADU Stats
    "ADUs_in_Below_Median_Tracts_Bay_Area": [total_adus_below_median_bay],
    "Share_ADUs_in_Below_Median_Tracts_Bay_Area": [round(share_adus_below_median_bay, 3)],
    
    # Majority POC ADU Stats
    "ADUs_in_Maj_POC_Tracts_Bay_Area": [total_adus_maj_poc_bay],
    "Share_ADUs_in_Maj_POC_Tracts_Bay_Area": [round(share_adus_maj_poc_bay, 3)],
    
    # Census Tract Stats
    "Total_Tracts_Bay_Area": [total_tracts_bay],
    "Below_Median_Tracts_Bay_Area": [below_median_tracts_bay],
    "Share_Below_Median_Tracts_Bay_Area": [round(share_below_median_tracts_bay, 3)],
    "Maj_POC_Tracts_Bay_Area": [maj_poc_tracts_bay],
    "Share_Maj_POC_Tracts_Bay_Area": [round(share_maj_poc_tracts_bay, 3)]
})

# Display the final Bay Area statistics
bay_area_summary

Rows in Bay Area dataset: 1571


Unnamed: 0,Region,Bay_Area_Median_Income,Total_ADUs_Bay_Area,ADUs_in_Below_Median_Tracts_Bay_Area,Share_ADUs_in_Below_Median_Tracts_Bay_Area,ADUs_in_Maj_POC_Tracts_Bay_Area,Share_ADUs_in_Maj_POC_Tracts_Bay_Area,Total_Tracts_Bay_Area,Below_Median_Tracts_Bay_Area,Share_Below_Median_Tracts_Bay_Area,Maj_POC_Tracts_Bay_Area,Share_Maj_POC_Tracts_Bay_Area
0,Bay Area,135517.0,35676,17751,49.756,25884,72.553,1571,769,48.95,1081,68.81


In [72]:
total_adus_bay

35676

In [74]:
total_adus_below_median_bay

17751

In [76]:
total_adus_maj_poc_bay

25884

# Normalizing Redlining Maps

In [62]:
hcd_2023_ALL = pd.read_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/data/hcd_2023_ALL.csv')

  hcd_2023_ALL = pd.read_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/data/hcd_2023_ALL.csv')


In [123]:
# Filter for relevant grades
hcd_2023_ALL = hcd_2023_ALL[hcd_2023_ALL['grade'].isin(['A', 'B', 'C', 'D'])].copy()

# Ensure necessary columns are properly formatted
hcd_2023_ALL['grade'] = hcd_2023_ALL['grade'].astype(str)  # Ensure grade is a string
hcd_2023_ALL['NO_BUILDING_PERMITS'] = pd.to_numeric(hcd_2023_ALL['NO_BUILDING_PERMITS'], errors='coerce').fillna(0)

# Compute total number of developments (rows) per city
city_adus_total = hcd_2023_ALL.groupby('JURIS_NAME', as_index=False).agg(
    total_adus=('JURIS_NAME', 'count')  # Count number of rows (developments) per city
)

# Compute ADUs by redlining grade per city
city_adus_by_grade = hcd_2023_ALL.groupby(['JURIS_NAME', 'grade'], as_index=False).agg(
    adus_in_grade=('JURIS_NAME', 'count')  # Count number of rows (developments) per grade
)

# Merge with total ADUs per city
city_adus_by_grade = city_adus_by_grade.merge(city_adus_total, on='JURIS_NAME', how='left')

# Calculate share of ADUs in each redlining grade per city based on row count
city_adus_by_grade['share_adus_in_grade'] = (
    city_adus_by_grade['adus_in_grade'] / city_adus_by_grade['total_adus'] * 100
).round(1)

# Pivot the data to show shares for A, B, C, D as separate columns per city
city_adus_pivot = city_adus_by_grade.pivot(index='JURIS_NAME', columns='grade', values='share_adus_in_grade').reset_index()

# Rename columns for clarity (fill missing values with 0)
city_adus_pivot = city_adus_pivot.rename(columns=lambda x: f"share_adus_{x}" if x in ['A', 'B', 'C', 'D'] else x).fillna(0)

In [125]:
# Remove rows where all share columns (A, B, C, D) are 0
share_columns = ['share_adus_A', 'share_adus_B', 'share_adus_C', 'share_adus_D']
city_adus_pivot = city_adus_pivot.loc[city_adus_pivot[share_columns].any(axis=1)]
city_adus_pivot

grade,JURIS_NAME,share_adus_A,share_adus_B,share_adus_C,share_adus_D
0,ALAMEDA,0.0,12.1,67.0,20.9
1,ALAMEDA COUNTY,0.0,0.0,50.0,50.0
2,ALBANY,0.0,75.0,25.0,0.0
3,ALHAMBRA,6.8,29.3,61.0,2.9
4,ARCADIA,21.4,11.9,66.7,0.0
...,...,...,...,...,...
106,TORRANCE,38.8,0.0,42.9,18.4
107,WEST HOLLYWOOD,25.0,6.9,65.3,2.8
108,WEST SACRAMENTO,0.0,0.0,0.0,100.0
109,WHITTIER,3.4,11.9,55.9,28.8


In [127]:
# Convert from wide to long format for Tableau
redlined_city_adus_long = city_adus_pivot.melt(
    id_vars=["JURIS_NAME"],  # Keep city name
    value_vars=["share_adus_A", "share_adus_B", "share_adus_C", "share_adus_D"],  # Pivot these columns
    var_name="Redlining_Grade",  # New column for A, B, C, D, NA categories
    value_name="ADU_Share"  # New column for ADU share values
)

# Rename redlining grade values for clarity
redlined_city_adus_long['Redlining_Grade'] = redlined_city_adus_long['Redlining_Grade'].str.replace("share_adus_", "")
#redlined_city_adus_long['Redlining_Grade'] = redlined_city_adus_long['Redlining_Grade'].replace({"nan": "NA"})  # Rename NaN column

# Save the cleaned dataset for Tableau
redlined_city_adus_long
redlined_city_adus_long.to_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/outputs/city_adus_pivot_gradesonly_normalized.csv', index=False)

## By holc area id

In [135]:
holc_redlining = pd.read_csv("/Users/mckenziediep/Documents/Github/bac_adu_dash/data/redlined_maps_by_holc.csv")
holc_redlining.head()

Unnamed: 0,JURIS_NAME,CNTY_NAME,YEAR,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT,TENURE,NO_BUILDING_PERMITS,...,city_1,state_1,city_survey,category,grade,label,residential,commercial,industrial,fill
0,ADELANTO,San Bernardino,2023,310310314,10352 San Marcos Ct,Alyssa Bottinelli,CAMINO-2023-1736,SFD,Owner,1,...,,,,,,,,,,
1,ADELANTO,San Bernardino,2023,310310315,10368 San Marcos Ct,Alyssa Bottinelli,CAMINO-2023-1737,SFD,Owner,1,...,,,,,,,,,,
2,ADELANTO,San Bernardino,2023,310310320,18786 Hampton Ln,Alyssa Bottinelli,CAMINO-2023-192,SFD,Owner,1,...,,,,,,,,,,
3,ADELANTO,San Bernardino,2023,310310320,18792 Hampton Ln,Alyssa Bottinelli,CAMINO-2023-191,SFD,Owner,1,...,,,,,,,,,,
4,ADELANTO,San Bernardino,2023,310310329,10353 San Marcos Ct,Alyssa Bottinelli,CAMINO-2023-1738,SFD,Owner,1,...,,,,,,,,,,


In [139]:
# Filter out rows where 'grade' is blank or NaN
holc_redlining = holc_redlining[holc_redlining['grade'].notna() & (holc_redlining['grade'] != '')]
holc_redlining.to_csv('/Users/mckenziediep/Documents/Github/bac_adu_dash/outputs/holc_redlining_filtered.csv')