In [9]:
# Import modules
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import regex as re
import requests
import json
from bs4 import BeautifulSoup
import numpy as np

In [10]:
# open elections.xlsx
elections = pd.read_excel('elections.xlsx')
# open elections_with_geocodes.xlsx
geocodes = pd.read_excel('elections_with_geocodes.xlsx')
# open elections_with_tracts.xlsx
tracts = pd.read_excel('elections_with_tracts.xlsx')

groups = ['DP02', 'DP03', 'DP05']
key = 'ad8851f3bf6aaf76923ec4119b6f714cdfaa87d9'

In [11]:
tracts_dict = {'state': [], 'county': [], 'tract': [], 'year': []}

# for each row in elections
for index, row in elections.iterrows():
    # if District # is not nan
    if not pd.isna(row['District #']):
        # get row in tracts where filename is the same
        tract_row = tracts[tracts['filename'] == row['filename']]
        # combine tract_list with tract_row['tracts']
        # make sure is not nan
        if not pd.isna(tract_row['tracts'].iloc[0]):
            new_tracts = tract_row['tracts'].iloc[0].split(',')
            # add to tracts_df
            for new_tract in new_tracts:
                # split new_tract on _
                new_tract_split = new_tract.split('_')
                # add to tracts_dict
                tracts_dict['state'].append(new_tract_split[0])
                tracts_dict['county'].append(new_tract_split[1])
                tracts_dict['tract'].append(new_tract_split[2])
                tracts_dict['year'].append(row['year'])
            
# create tracts_df
tracts_df = pd.DataFrame(tracts_dict)

# remove duplicates
tracts_df = tracts_df.drop_duplicates()

# reset index on tracts_df
tracts_df = tracts_df.reset_index(drop=True)

In [14]:
acs_data = []

In [16]:
# for each row in tracts_df
for index, row in tracts_df.iterrows():
    if index < len(acs_data):
        continue

    if index % 10 == 0:
        print(f"{index} of {len(tracts_df)}")

    state = row['state'].zfill(2)
    county = row['county'].zfill(3)
    tract = row['tract'].zfill(6)
    year = row['year']
    
    data_dict = {'state': state,
                 'county': county,
                 'tract': tract,
                 'year': year}
    
    if year <= 2005:
        adjusted_year = 2009
    elif year >= 2021:
        adjusted_year = 2022
    else:
        adjusted_year = year + 2

    for query_year in range(adjusted_year, adjusted_year - 3, -1):
        try: 
            for group in groups:
                url = f"https://api.census.gov/data/{query_year}/acs/acs5/profile?get=group({group})&for=tract:{tract}&in=state:{state}+county:{county}&key={key}"

                response = requests.get(url)
                data = json.loads(response.text)
                # append each array of data to each array of all_data
                for i in range(len(data[0])):
                    # do the same thing but for E, EA, PE, and PEA
                    if re.search(r'\d(?:E|PE)$', data[0][i]) is not None:
                        data_dict[data[0][i]] = data[1][i]
            break
                    
        except Exception as e:
            if query_year == adjusted_year -3:
                print(f"Error: {e}")
                exit()
            else:
                continue

    acs_data.append(data_dict)

3170 of 6476
3180 of 6476
3190 of 6476
3200 of 6476
3210 of 6476
3220 of 6476
3230 of 6476
3240 of 6476
3250 of 6476
3260 of 6476
3270 of 6476
3280 of 6476
3290 of 6476
3300 of 6476
3310 of 6476
3320 of 6476
3330 of 6476
3340 of 6476
3350 of 6476
3360 of 6476
3370 of 6476
3380 of 6476
3390 of 6476
3400 of 6476
3410 of 6476
3420 of 6476
3430 of 6476
3440 of 6476
3450 of 6476
3460 of 6476
3470 of 6476
3480 of 6476
3490 of 6476
3500 of 6476
3510 of 6476
3520 of 6476
3530 of 6476
3540 of 6476


KeyboardInterrupt: 

In [None]:
columns = set().union(*(d.keys() for d in acs_data))

filled_dicts = [{key: d.get(key) for key in columns} for d in acs_data]

df = pd.DataFrame(filled_dicts)

# order so that state, county, tract, year are first
df = df[['state', 'county', 'tract', 'year'] + [col for col in df.columns if col not in ['state', 'county', 'tract', 'year']]]

# save as csv
df.to_csv('acs_data.csv', index=False)