In [1]:
# ------------ LIBRARIES ------------ #
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import requests as req
from urllib.request import urlopen
from copy import deepcopy
import os
from functools import reduce

In [2]:
# ------------ FOLDER PATHS ------------ #

# Specify your main folder path here
main_folder = "/main/folder"


# Create folder paths from your main folder path
assets_folder = main_folder + "assets/"

censusdata_folder = main_folder + "censusdata/"
placelevel_data_folder = censusdata_folder + "Place Level Data/"
yearlevel_data_folder = censusdata_folder + "Year Level Data/"
contract_rent_data_folder = censusdata_folder + "Contract Rent Data/"
rent_burden_data_folder = censusdata_folder + "Rent Burden Data/"

masterfiles_folder = main_folder + "masterfiles/"


# Create the necessary folders 
folders = [main_folder, assets_folder, censusdata_folder, yearlevel_data_folder, placelevel_data_folder,
           contract_rent_data_folder, rent_burden_data_folder, masterfiles_folder]
for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

        
# ------------ Notes ------------ #
# The assets folder will be where your map and GeoJSON files will be stored. The
# censusdata folder will contain all the census data that you deal with, including
# the directly extracted datasets, the cleaned datasets, and metadata. From within
# this censusdata folder, we create four folders:
#
# (1) placelevel, which contains census data by places in Los Angeles County
# (2) contract_rent, which contains contract rent data
# (3) rent_burden, which contains rent burden data
# (4) masterfiles
#
# When we create our websites, we will use data from the masterfiles folder.
#
# Be sure that the appropriate folder paths are referenced here and when you create
# the Dash app(s)!

In [5]:
# ------------ 2020 FIPS CODES FOR CALIFORNIA AND LOS ANGELES COUNTY ------------ #
txt_file_url = "https://www2.census.gov/geo/docs/reference/codes2020/place/st06_ca_place2020.txt"
ca2020 = pd.read_csv(txt_file_url, sep='|', dtype = {'STATEFP': str, 'PLACEFP': str})
ca2020['PLACE_FIPS'] = ca2020['STATEFP'] + ca2020['PLACEFP']

columns = ['STATE', 'STATEFP', 'PLACEFP', 'PLACE_FIPS']
ca2020 = ca2020[columns + [col for col in ca2020.columns if col not in columns]]
ca2020['PLACENAME'] = ca2020['PLACENAME'].str.replace(' CDP', "")
ca2020['PLACENAME'] = ca2020['PLACENAME'].str.replace(' city', "")

CA_2020_FIPS_filepath = censusdata_folder + "CA_2020_FIPS.csv"
ca2020.to_csv(CA_2020_FIPS_filepath, index=False)

# ALWAYS make sure to have the converters argument, lest the FIPS code be written as an integer!
CA_2020_FIPS = pd.read_csv(CA_2020_FIPS_filepath, converters={'STATEFP': str, 'PLACEFP': str, 'PLACE_FIPS': str})

# Cities and Census Designated Places (CDPs) in Los Angeles County
LosAngelesCounty_2020_FIPS = CA_2020_FIPS[CA_2020_FIPS['COUNTIES'] == ('Los Angeles County')]
LosAngelesCounty_2020_FIPS_filepath = censusdata_folder + "LosAngelesCounty_2020_FIPS.csv"
LosAngelesCounty_2020_FIPS.to_csv(LosAngelesCounty_2020_FIPS_filepath, index=False)

# Dictionary of FIPS codes for places in Los Angeles County
LosAngelesCounty_dict = LosAngelesCounty_2020_FIPS.set_index('PLACE_FIPS')['PLACENAME'].to_dict()

## Functions

In [10]:
# ------------ FUNCTION #1 (REQUIRED; extract) ------------ #
# Purpose: To download ACS data and metadata

def ACS_data_extraction(initial_year, final_year, ACS_ID, FIPS):
    """
    A function that accomplishes three taskes:
    (1) Extracting US Census Bureau ACS data for an place (using FIPS) by API key,
    (2) Cleaning aforementioned ACS data, and
    (3) Extracting as well as formatting the metadata corresponding to each ACS data.

    Note that by executing this function, you also create a nested folder within your
    directory whose path is as follows:
    
    path + places/place/ACS_ID/
    
    The data will be stored in this nested folder, so be sure this updated path is
    reflected in your later code!
    """
    years = list(range(initial_year, final_year + 1))
    years = list(map(str, years))
    urls = []
    place = LosAngelesCounty_dict[FIPS].replace(" ", "")

    folder_path = f'{placelevel_data_folder}{place}/{ACS_ID}/'
    if not os.path.exists(folder_path):
        folder = os.makedirs(folder_path)
    
    # Data extraction
    for i in years:
        urls.append(f"https://api.census.gov/data/{i}/acs/acs5?get=group({ACS_ID})&ucgid=pseudo(1600000US{FIPS}$1400000)")
        
    
    for i, j in zip(urls, years):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
        }
        
        r = req.get(i, headers = headers, stream=True)
        file_path = f"{folder_path}{ACS_ID}_{j}_{place}.csv"
        with open(file_path, "wb") as file:
            for block in r.iter_content(chunk_size=1024):
                if block:
                    file.write(block)

    
    # Data cleaning
        skip_loop = False
        try:
            df = pd.read_csv(file_path)

            if df.empty or df.shape[1] == 0:
                skip_loop = True
                os.remove(file_path)
                continue

            df.drop(list(df.filter(regex = 'A$')), axis = 1, inplace = True)
            df = df.rename(columns=lambda x: x.strip(']["'))
            for k, name in enumerate(df.columns.to_list()):
                if df[name].dtype == 'object':
                    df[name] = df[name].str.replace('["', "")
                    df[name] = df[name].str.replace('"', "")
                    df[name] = df[name].replace('[null', np.nan)
                if name == 'GEO_ID':
                    df[name] = df[name].str.replace('1400000US', "")
                    df[name] = df[name].astype('int64')
                if FIPS in name:
                    df[name] = df[name].astype('int64')
            df['YEAR'] = int(j)
            df['FIPS'] = FIPS
            value_dict = {-222222222: np.nan, -333333333: np.nan, -666666666: np.nan, '-222222222': np.nan, '-333333333': np.nan, '-666666666': np.nan}
            df.replace(value_dict, inplace=True)
            ordered_columns = ['YEAR', 'FIPS', 'GEO_ID', 'NAME']
            col_list = list(df.filter(regex = f'^{ACS_ID}'))
            
            df = df[ordered_columns + col_list]

            cleaned_file_path = f"{folder_path}{ACS_ID}_{j}_cleaned_{place}.csv"
            df.to_csv(cleaned_file_path, index=False)

        except pd.errors.EmptyDataError:
            os.remove(file_path)
            continue
            
    
        if skip_loop:
            os.remove(file_path)
            continue
    
    # Metadata extraction
        url = f"https://api.census.gov/data/{j}/acs/acs5/groups/{ACS_ID}.json"
        
        response = urlopen(url)
        
        data_json = json.loads(urlopen(url).read())
        
        list1 = [key for key in data_json['variables'].keys()]
        list2 = [val.get('label') for val in data_json['variables'].values()]
        column_labels = dict(zip(list1, list2))
        
        df = pd.DataFrame.from_dict(column_labels, orient='index')
        df.rename(columns={0: "Label"}, inplace=True)
        df.index.name = 'ID'
        df = df[df["Label"].str.contains("Annotation") == False]
        df_new_rows = pd.DataFrame({'Label': ['Year', 'Place-level FIPS code']}, index = ['YEAR', 'FIPS'])
        df = pd.concat([df, df_new_rows])
        metadata_file = f"{folder_path}{ACS_ID}_{j}_metadata.csv"
        df.to_csv(metadata_file, index=True)

In [12]:
# ------------ FUNCTION #2 (REQUIRED; mass extract) ------------ #
# Purpose: To perform the previous function (downloading ACS data and metadata) en masse
def ACS_mass_data_extraction(initial_year, final_year, ACS_ID):
    """
    This function uses our previously defined ACS data extraction function and replicates
    it across ALL cities/CDPs in Los Angeles county using the FIPS codes for each place.
    It takes four arguments, each of which is the same as that from our aforementioned
    function.
    """
    FIPS_list = list(LosAngelesCounty_dict.keys())
            
    n_dupl = len(FIPS_list)
    initial_year_list = [initial_year] * n_dupl
    final_year_list = [final_year] * n_dupl
    ACS_ID_list = [ACS_ID] * n_dupl
    for i, j, k, l in zip(initial_year_list, final_year_list, ACS_ID_list, FIPS_list):
        ACS_data_extraction(i, j, k, l)

In [14]:
# ------------ FUNCTION #3 (optional) ------------ #
# Purpose: To group and collect data by their ACS ID and FIPS code
def get_acs_data(initial_year, final_year, ACS_ID, FIPS):
    """
    Group the cleaned ACS data by their ACS ID and produce a dictionary of dataframes.
    Each key is a year whose corresponding value is the cleaned dataset. Note that by
    assigning a name to the produced dictionary, you can index by the year to get the
    dataset for the year of interest.
    """
    years = list(range(initial_year, final_year + 1))
    place = LosAngelesCounty_dict[FIPS].replace(" ", "")
    acs_data = dict()

    
    folder_path = f"{placelevel_data_folder}{place}/{ACS_ID}/"
    if not os.path.exists(folder_path):
        folder = os.makedirs(folder_path)
    
    for year in years:
        file = f"{folder_path}{ACS_ID}_{year}_cleaned_{place}.csv"
        df = pd.read_csv(file, converters = {'FIPS': str})
        acs_data[year] = deepcopy(df)

    return acs_data

In [16]:
# ------------ FUNCTION #4 (REQUIRED; collect on ACS) ------------ #
# Purpose: To concatenate the cleaned FIPS files by their ACS ID
def concatenate_ACS_files(ACS_ID, FIPS):
    place = LosAngelesCounty_dict[FIPS].replace(" ", "")
    extraction_path = f"{placelevel_data_folder}{place}/{ACS_ID}/"
    string_search = "cleaned"
    matching_files = [file for file in os.listdir(extraction_path) if "cleaned" in file]

    combined_df = pd.DataFrame()
    for file in matching_files:
        df = pd.read_csv(extraction_path + file)
        combined_df = pd.concat([combined_df, df], ignore_index = True)

    placename = LosAngelesCounty_dict[FIPS]
    combined_df['PLACE'] = placename

    ordered_columns = ['YEAR', 'FIPS', 'PLACE', 'GEO_ID', 'NAME']
    other_columns = [col for col in combined_df.columns.to_list() if col not in ordered_columns]
    combined_df = combined_df[ordered_columns + other_columns]

    combined_df = combined_df.sort_values(by = ['YEAR', 'GEO_ID'])

    ACS_folder_path = f"{censusdata_folder}{ACS_ID}/"
    if not os.path.exists(ACS_folder_path):
        os.makedirs(ACS_folder_path)
        
    download_path = ACS_folder_path + f"{ACS_ID}_{place}.csv"
    combined_df.to_csv(download_path, index=False)

In [18]:
# ------------ FUNCTION #5 (REQUIRED; mass collect) ------------ #
# Purpose: To perform the previous function (concatenating the cleaned FIPS files by ACS ID) en masse
def mass_concatenate_ACS_files(ACS_ID):
    FIPS_list = list(LosAngelesCounty_dict.keys())
    n_dupl = len(FIPS_list)
    ACS_ID_list = [ACS_ID] * n_dupl
    
    for i, j in zip(ACS_ID_list, FIPS_list):
        concatenate_ACS_files(i, j)

In [20]:
# ------------ FUNCTION #6 (REQUIRED; decompose by year and ACS) ------------ #
# Purpose: To decompose the previously collected data by year and ACS ID
def concatenate_by_year(ACS_ID):
    combined_df = pd.DataFrame()

    folder_path = f"{censusdata_folder}{ACS_ID}/"
    files = [file for file in sorted(os.listdir(folder_path)) if ACS_ID in file]
    for file in files:
        file_path = f"{folder_path}{file}"
        df = pd.read_csv(file_path)
        combined_df = pd.concat([combined_df, df], ignore_index = True)

    combined_df['NAME'] = combined_df['NAME'].str.split(',').str[0]
    combined_df['NAME'] = combined_df['NAME'].str.split(';').str[0]

    years = combined_df['YEAR'].unique().tolist()
    for year in years:
        mask = combined_df['YEAR'] == year
        year_df = combined_df[mask]
        download_path = f"{yearlevel_data_folder}{ACS_ID}_{year}.csv"
        year_df.to_csv(download_path,index=False)

In [22]:
# ------------ FUNCTION #7 (REQUIRED; masterfile creation) ------------ #
# Purpose: To concatenate all the files in a category into masterfiles
def masterfile_concatenate_by_year(list_ACS, category):
    """
    This function takes two arguments
    (1) list_ACS, which represents a list of the ACS codes you wish to merge
    (2) category, which is either "contract_rent" or "rent_burden"

    If "contract_rent" is specified, list the ACS codes: B25057, B25058, B25059.
    If "rent_burden" is specified, list the ACS codes: B25070, B25072.
    """
    combined_df_list = []
    for ACS_ID in list_ACS:
        exec(f'{ACS_ID}_combined_df = pd.DataFrame()')
        files = [file for file in sorted(os.listdir(yearlevel_data_folder)) if ACS_ID in file]
        for file in files:
            file_path = f"{yearlevel_data_folder}{file}"
            df = pd.read_csv(file_path)
            exec(f'{ACS_ID}_combined_df = pd.concat([{ACS_ID}_combined_df, df], ignore_index=True)')

        exec(f'combined_df_list.append({ACS_ID}_combined_df)')

    masterfile = reduce(lambda left, right: pd.merge(left, right, on=['YEAR', 'FIPS', 'PLACE', 'GEO_ID', 'NAME'], how='left'), combined_df_list)

    years = masterfile['YEAR'].unique().tolist()
    for year in years:
        mask = masterfile['YEAR'] == year
        year_df = masterfile[mask]
        download_path = f"{masterfiles_folder}{category}_masterfile_{year}.csv"
        year_df.to_csv(download_path, index=False)

    return masterfile

In [24]:
# ------------ FUNCTION #8 (optional) ------------ #
# Purpose: To obtain map data on each place
def get_mapdata(ACS_ID, FIPS):
    """
    We make use of the Census Bureau Geographies website.
    Link: https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html
    Note that we extract 2024 census tracts for the state of California.

    We obtain individual mapdata for each city/CDP listed in Los Angeles County.
    """
    place = LosAngelesCounty_dict[FIPS].replace(" ", "")

    LosAngelesCounty_census_tracts = gpd.read_file(assets_folder + "LosAngelesCounty_census_tracts.json")

    file_path = f"{censusdata_folder}{ACS_ID}/{ACS_ID}_{place}.csv"
    df = pd.read_csv(file_path)

    mask = LosAngelesCounty_census_tracts['GEO_ID'].isin(df['GEO_ID'])
    gdf = LosAngelesCounty_census_tracts[mask]

    folder_path = f"{assets_folder}Place Level Mapdata/"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    filename = f"{folder_path}mapdata_{place}.json"
    gdf.to_file(filename, index=False)

In [26]:
# ------------ FUNCTION #9 (optional) ------------ #
# Purpose: To perform the previous function (getting map data on each place) en masse
def mass_get_mapdata(ACS_ID):
    """
    This replicates the previous function, but for all places listed as being
    in Los Angeles County during 2024.
    """
    FIPS_list = list(LosAngelesCounty_dict.keys())
    n_dupl = len(FIPS_list)
    ACS_ID_list = [ACS_ID] * n_dupl

    for i, j in zip(ACS_ID_list, FIPS_list):
        get_mapdata(i, j)

In [28]:
# ------------ FUNCTION #10 (optional; mastergeometry creation) ------------ #
# Purpose: To create geometries accompanying each year's masterfile
def masterfile_geometries(final_year, initial_year=2010, censusdata_path = censusdata_folder):
    """
    Note that due to redistricting over the years, some census tracts that were
    shown to be part of some places in the past may have been relocated. This would
    affect our own data and suggest incompleteness when otherwise. As such, we use
    the aforementioned Census Bureau Geographies website and extract census tracts
    for each year in the state of California and link each JSON file to the respective
    masterfile. This ensures that when we develop our app, each place will (1) render
    all available census tracts for that year, and (2) render data for said tracts.
    """
    years = range(initial_year, final_year + 1)
    geometry_files = []
    for year in years:
        # California census tracts
        filepath = f"{main_folder}/tl_{year}_06_tract.shp"
        California_census_tracts = gpd.read_file(filepath)
        California_census_tracts.columns = California_census_tracts.columns.str.replace('10', '')
        # ^ This was done because of 2010
        California_census_tracts.drop(['NAME'], axis=1, inplace=True)
        California_census_tracts.rename(columns={'GEOID': 'GEO_ID', 'NAMELSAD': 'NAME'}, inplace=True)
        California_census_tracts['GEO_ID'] = California_census_tracts['GEO_ID'].astype(int)
        California_census_tracts['INTPTLAT'] = California_census_tracts['INTPTLAT'].str.split('+').str[1]
        California_census_tracts['INTPTLAT'] = California_census_tracts['INTPTLAT'].astype(float)
        California_census_tracts['INTPTLON'] = California_census_tracts['INTPTLON'].astype(float)
        California_census_tracts['YEAR'] = year
        ordered_columns = ['YEAR']
        other_columns = [col for col in California_census_tracts.columns.to_list() if col not in ordered_columns]
        California_census_tracts = California_census_tracts[ordered_columns + other_columns]
        downloadpath = f"{main_folder}/CA_{year}_census_tracts.json"
        California_census_tracts.to_file(downloadpath, driver='GeoJSON', index=False)

In [30]:
# ------------ FUNCTION #11 (REQUIRED; mastergeometry specification by year) ------------ #
# Purpose: To specify the extracted mastergeometries to correspond to locations based in our masterfiles
def mastergeometries_by_year(category):
    """
    Only two possible inputs for the category property: 'contract_rent' or 'rent_burden'
    """
    files = [file for file in sorted(os.listdir(masterfiles_folder)) if category in file]
    masterfiles_list = []
    for file in files:
        masterfile_path = f'{masterfiles_folder}{file}'
        masterfiles_list.append(masterfile_path)

    geodata_maps = [file for file in sorted(os.listdir(assets_folder)) if 'CA_' in file]
    geodata_list = []
    for file in geodata_maps:
        geometry_path = f'{assets_folder}{file}'
        geodata_list.append(geometry_path)

    for file, geodata in zip(masterfiles_list, geodata_list):
        df = pd.read_csv(file)
        gdf = gpd.read_file(geodata)
        merged_df = df.merge(gdf, on=['YEAR', 'GEO_ID', 'NAME'], how='left')
        merged_df = merged_df[['YEAR', 'FIPS', 'PLACE', 'GEO_ID', 'NAME', 'INTPTLAT', 'INTPTLON', 'geometry']]
        year = merged_df.at[0, 'YEAR']
        merged_df = gpd.GeoDataFrame(merged_df)
        download_path = f'{assets_folder}{category}_mastergeometry_{year}.json'
        merged_df.to_file(download_path, driver='GeoJSON')

In [31]:
# ------------ FUNCTION #11 (REQUIRED; mastergeometry specification by year and place) ------------ #
# Purpose: To specify the previously generated year mastergeometries by place.
# Reasoning: Clientside callbacks triggered via GitHub Pages will likely consume lots of resources
#            by using the year mastergeometries alone. As such, it may be preferential to decompose
#            each year mastergeometry by place. The result will be a massive collection of JSON files
#            whose file names will correspond to the unique combinations of year and place.

def mastergeometries_by_year_place(category):
    """
    Only two possible inputs for the category property: 'contract_rent' or 'rent_burden'
    """
    years = list(range(2010, 2024))
    category_path = f'{category}_mastergeometry'
    files = [f'{assets_folder}{file}' for file in sorted(os.listdir(assets_folder)) if category_path in file]
    for file, year in zip(files, years):
        gdf = gpd.read_file(file)
        place_list = list(gdf['PLACE'].unique())
        folder_path = f'{assets_folder}{category}/{year}/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        for place in place_list:
            dummy_gdf = gdf[gdf['PLACE'] == place]
            place = place.replace(" ", "")
            download_path = f'{folder_path}{category_path}_{year}_{place}.json'
            dummy_gdf.to_file(download_path, driver='GeoJSON')

In [31]:
# ------------ FUNCTION #12 (optional) ------------ #
# Purpose: To convert the previously generated year masterfiles into JSON files specified by year and place.
# Reasoning: Same reasoning as function #11.
def masterfiles_json(category):
    """
    Only two possible inputs for the category property: 'contract_rent' or 'rent_burden'
    """
    masterfile = pd.DataFrame()
    years = range(2010, 2024)
    
    for year in years:
        file_path = f'{masterfiles_folder}{category}_masterfile_{year}.csv'
        df = pd.read_csv(file_path)
        map_path = f'{assets_folder}{category}_mastergeometry_{year}.json'
        gdf = gpd.read_file(map_path)
        df = pd.merge(df, gdf[['GEO_ID','INTPTLAT','INTPTLON']], on='GEO_ID', how='left')

        if category == 'contract_rent':
            df['dummy'] = 1
            df['B25058_001E_copy'] = df['B25058_001E']
            df['Median'] = df['B25058_001E_copy']
            df.drop(columns=['B25058_001E_copy'], inplace=True)
            df['75th'] = df['B25059_001E']
            df['25th'] = df['B25057_001E']
            columns = ['Median', '75th', '25th']
            for col in columns:
                df[col] = '$' + df[col].astype(str)
                df[col] = df[col].str.replace('.0', '')
                df.loc[df[col] == '$3501', col] = 'Not available. Exceeds $3500!'
                df.loc[df[col] == '$nan', col] = 'Not Available!'
                if year in [2010, 2011, 2012, 2013, 2014]:
                    df.loc[df[col] == '$2001', col] = 'Not available. Exceeds $2000!'

        elif category == 'rent_burden':
            df['TotalRentBurden']   = round( ( (df['B25070_007E'] + df['B25070_008E'] + df['B25070_009E'] + df['B25070_010E']) / df['B25070_001E']) * 100, 2)
            df['RentBurden_15to24'] = round( ( (df['B25072_006E'] + df['B25072_007E']) / df['B25072_002E']) * 100, 2)
            df['RentBurden_25to34'] = round( ( (df['B25072_013E'] + df['B25072_014E']) / df['B25072_009E']) * 100, 2)
            df['RentBurden_35to64'] = round( ( (df['B25072_020E'] + df['B25072_021E']) / df['B25072_016E']) * 100, 2)
            df['RentBurden_65+']    = round( ( (df['B25072_027E'] + df['B25072_028E']) / df['B25072_023E']) * 100, 2)
        
            df['RentBurden_15to24_str'] = df['RentBurden_15to24'].astype(str) + '%'
            df.loc[df['RentBurden_15to24_str'] == 'nan%', 'RentBurden_15to24_str'] = 'Not Available'
            df['RentBurden_25to34_str'] = df['RentBurden_25to34'].astype(str) + '%'
            df.loc[df['RentBurden_25to34_str'] == 'nan%', 'RentBurden_25to34_str'] = 'Not Available'
            df['RentBurden_35to64_str'] = df['RentBurden_35to64'].astype(str) + '%'
            df.loc[df['RentBurden_35to64_str'] == 'nan%', 'RentBurden_35to64_str'] = 'Not Available'
            df['RentBurden_65+_str'] = df['RentBurden_65+'].astype(str) + '%'
            df.loc[df['RentBurden_65+_str'] == 'nan%', 'RentBurden_65+_str'] = 'Not Available'

            df['TotalSevereRentBurden']   = round( ( (df['B25070_010E']) / df['B25070_001E']) * 100, 2)

        folder_path = f'{masterfiles_folder}{category}/{year}/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        places = list(df['PLACE'].unique())
        for place in places:
            dummy_df = df[df['PLACE'] == place]
            place = place.replace(" ", "")
            download_path = f'{folder_path}{category}_{year}_{place}.json'
            dummy_df.to_json(download_path, orient='records', lines=False)

## Applying the functions

### Contract Rents

In [117]:
# ------------ DATA EXTRACTION ------------ #
ACS_mass_data_extraction(2010, 2023, 'B25057')
# B25057: Lower Quartile Contract Rent

ACS_mass_data_extraction(2010, 2023, 'B25058')
# B25058: Median Contract Rent

ACS_mass_data_extraction(2010, 2023, 'B25059')
# B25059: Upper Quartile Contract Rent

In [None]:
# ------------ DATA COLLECTION ------------ #
mass_concatenate_ACS_files('B25057')
mass_concatenate_ACS_files('B25058')
mass_concatenate_ACS_files('B25059')

concatenate_by_year('B25057')
concatenate_by_year('B25058')
concatenate_by_year('B25059')

contract_rents_list = ['B25057', 'B25058', 'B25059']
masterfile_concatenate_by_year(contract_rents_list, 'contract_rent')

In [135]:
# ------------ ACCOMPANYING MASTER GEOMETRIES ------------ #
mastergeometries_by_year('contract_rent')

In [33]:
# ------------ MASTER GEOMETRIES BY YEAR & PLACE ------------ #
mastergeometries_by_year_place('contract_rent')

In [27]:
# ------------ MASTERFILES TO JSON FILES BY YEAR & PLACE ------------ #
#masterfiles_json('contract_rent')

### Rent Burden

In [None]:
# ------------ DATA EXTRACTION ------------ #
ACS_mass_data_extraction(2010, 2023, 'B25070')
# B25070: Gross Rent as a Percentage of Household Income in the Past 12 Months

ACS_mass_data_extraction(2010, 2023, 'B25072')
# B25072: Age of Householder by Gross Rent as a Percentage of Household Income in the Past 12 Months

In [None]:
# ------------ DATA COLLECTION ------------ #
mass_concatenate_ACS_files('B25070')
mass_concatenate_ACS_files('B25072')

concatenate_by_year('B25070')
concatenate_by_year('B25072')

rent_burden_list = ['B25070', 'B25072']
masterfile_concatenate_by_year(rent_burden_list, 'rent_burden')

In [137]:
# ------------ ACCOMPANYING MASTER GEOMETRIES ------------ #
mastergeometries_by_year('rent_burden')

In [34]:
# ------------ MASTER GEOMETRIES BY YEAR & PLACE ------------ #
mastergeometries_by_year_place('rent_burden')

In [33]:
# ------------ MASTERFILES TO JSON FILES BY YEAR & PLACE ------------ #
#masterfiles_json('rent_burden')