In [1]:
import pandas as pd
import geopandas as gpd

pd.set_option('display.max_columns', 1000, 'display.width', 1000, 'display.max_rows',1000)

In [2]:
demographic_vars = [
    'race_percent_white', 'race_percent_black', 'race_percent_native',
    'race_percent_asian', 'race_percent_pacific_islander', 'race_percent_hispanic',
    'race_percent_other', 'income_percent_below_15K', 'income_percent_15K_35K',
    'income_percent_35K_65K', 'income_percent_above_65K', 'total',
    'male_percent', 'female_percent', 'disability_percent',
    'male_disability_percent', 'female_disability_percent'
]

def compute_weighted_averages(group):
    weighted_data = {}
    weighted_data['registered_voters'] = group['registered_voters'].iloc[0]
    weighted_data['ballots_cast'] = group['ballots_cast'].iloc[0]
    weighted_data['voter_turnout'] = group['voter_turnout'].iloc[0]
    weighted_data['year'] = group['year_1'].iloc[0]
    weighted_data['is_midterm'] = group['is_midterm'].iloc[0]
    weighted_data['name'] = group['NAME_1'].iloc[0]
    weighted_data['type'] = group['TYPE'].iloc[0]
    weighted_data['label'] = group['LABEL'].iloc[0]
    weighted_data['district_1'] = group['DISTRICT_1'].iloc[0]
    weighted_data['ward_1'] = group['WARD_1'].iloc[0]
    weighted_data['municode_1'] = group['MUNICODE_1'].iloc[0]
    weighted_data['mwd_nopa_1'] = group['MWD_NOPA_1'].iloc[0]
    weighted_data['opa_muni_1'] = group['OPA_MUNI_1'].iloc[0]
    weighted_data['mwd_pad_1'] = group['MWD_PAD_1'].iloc[0]
    weighted_data['pseud4_12'] = group['Pseud4_12'].iloc[0]
    weighted_data['pseudonu_5'] = group['PseudoNu_5'].iloc[0]
    weighted_data['muni_war_1'] = group['Muni_War_1'].iloc[0]
    for var in demographic_vars:
        # Calculate weighted average
        weighted_sum = (group[var] * group['area_ratio']).sum()
        total_weight = group['area_ratio'].sum()
        weighted_avg = weighted_sum / total_weight if total_weight != 0 else 0
        weighted_data[var] = weighted_avg
    # Add other aggregated fields
    weighted_data['tracts'] = group['tract'].nunique()    
    return pd.Series(weighted_data)


In [3]:
census_tracts = gpd.read_file('tl_2024_42_tract/tl_2024_42_tract.shp')

demo_data = pd.read_csv('total_demographics.csv')
disability_data = pd.read_csv('total_disability.csv').drop(columns=['Unnamed: 0'])

In [4]:
merged_data = pd.merge(demo_data, disability_data, on=['tract', 'year'])
merged_data['parsed_tract'] = merged_data['tract'].apply(lambda x: x.split(', ')[0].split(';')[0])

merged_data = pd.merge(merged_data, census_tracts, left_on='parsed_tract', right_on='NAMELSAD', how='left')
merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry').to_crs('EPSG:3395')

precinct_data = gpd.read_file('relevant_precincts.geojson').to_crs('EPSG:3395')

In [5]:
data = []
for year in [2014, 2016, 2018, 2020, 2022]:
    curr_precinct_data = precinct_data[precinct_data['year'] == year].copy()
    curr_merged_data = merged_data[merged_data['year'] == year].copy()

    curr_intersection = gpd.overlay(curr_precinct_data, curr_merged_data, how='intersection')
    curr_intersection['intersection_area'] = curr_intersection['geometry'].to_crs('EPSG:3395').area

    precinct_areas = curr_precinct_data.copy()
    precinct_areas['precinct_area'] = precinct_areas['geometry'].to_crs('EPSG:3395').area
    precinct_areas = precinct_areas[['district', 'precinct_area']]

    curr_intersection = curr_intersection.merge(precinct_areas, on='district')
    curr_intersection['area_ratio'] = curr_intersection['intersection_area'] / curr_intersection['precinct_area']

    weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()
    data.append(weighted_data)

final_data = pd.concat(data)

  weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()
  weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()
  weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()
  weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()
  weighted_data = curr_intersection.groupby('district').apply(compute_weighted_averages).reset_index()


In [7]:
final_data.to_csv('final_data.csv', index=False)