In [3]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import glob
import os
import re

# Demographic Metric

### Race data

In [5]:
directory_path = r"C:\Users\jabba\Desktop\Code\machine_learning\AUC_mastercard_challenge\src\demographic"

# Specify the file pattern
file_pattern = os.path.join(directory_path, 'acs_race_*.csv')

# Use glob to find all files matching the pattern
file_list = glob.glob(file_pattern)

# Initialize an empty list to hold the DataFrames
dataframes = []

# Loop through each file, read it into a DataFrame, and append it to the list
for file in file_list:
    
    # Read the CSV file
    df = pd.read_csv(file)
    
    # Set Label Grouping as index
    df.set_index('Label (Grouping)', drop=True, inplace=True)
    
    # Delete Percent columns
    df = df.loc[:, ~df.columns.str.contains('Percent', case=False)]
    
    # Transpose
    df = df.T

    # Add a year column based on the file name
    year = os.path.basename(file).split('_')[-1].split('.')[0]
    df['Estimate Year'] = year

    # Append the modified DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
merged_race_df = pd.concat(dataframes, ignore_index=False)

# Move 'Estimate Year' to the front
column_order = ['Estimate Year'] + [col for col in merged_race_df.columns if col != 'Estimate Year']
merged_race_df = merged_race_df[column_order]

# Strip whitespace from column names
merged_race_df.columns = merged_race_df.columns.str.strip()

# Specify columns to keep
columns_to_keep = [
    'Estimate Year',
    'Total:',
    'White alone',
    'Black or African American alone',
#     'American Indian and Alaska Native alone',
    'Asian alone',
#     'Native Hawaiian and Other Pacific Islander alone'
]

# Check which columns to keep exist
existing_columns_to_keep = [col for col in columns_to_keep if col in merged_race_df.columns]

# Create a filtered DataFrame with only the specified columns
filtered_race = merged_race_df[existing_columns_to_keep]

# Extract the tract number using regex
filtered_race.index = filtered_race.index.str.extract(r'(\d+(\.\d+)?)')[0]

# # Rename index to Tract ID
filtered_race.index.name = 'Tract ID'

# # Optionally save the filtered DataFrame to a new CSV file
filtered_race.to_csv('filtered_race.csv', index=False)

# Reset index to turn the index into a column
filtered_race.reset_index(drop=False, inplace=True)

# Display the filtered DataFrame
filtered_race

Label (Grouping),Tract ID,Estimate Year,Total:,White alone,Black or African American alone,Asian alone
0,4052,2017,5125,1930,711,1519
1,4053.01,2017,3019,1552,757,275
2,4053.02,2017,2446,885,367,848
3,4054.01,2017,4014,1061,806,1362
4,4054.02,2017,3250,404,841,1117
...,...,...,...,...,...,...
67,4057,2022,3522,609,985,934
68,4058,2022,4182,630,850,1815
69,4059.01,2022,3757,434,643,1261
70,4059.02,2022,3138,407,234,1275


### Age and Sex data

In [9]:
# Directory path for age and sex data files
directory_path = r"C:\Users\jabba\Desktop\Code\machine_learning\AUC_mastercard_challenge\src\demographic"

# Specify the file pattern
file_pattern = os.path.join(directory_path, 'acs_age_sex_*.csv')

# Use glob to find all files matching the pattern
file_list = glob.glob(file_pattern)

# Initialize an empty list to hold the DataFrames
dataframes = []

# Loop through each file, read it into a DataFrame, and append it to the list
for file in file_list:
    
    # Read the CSV file
    df = pd.read_csv(file)
    
    # Set Label Grouping as index
    df.set_index('Label (Grouping)', drop=True, inplace=True)
    
    # Delete Percent columns
    df = df.loc[:, ~df.columns.str.contains('Percent', case=False)]

    # Rename the columns
#     df.rename(columns=new_column_names, inplace=True)    
    
    # Transpose
    df = df.T

    # Add a year column based on the file name
    year = os.path.basename(file).split('_')[-1].split('.')[0]
    df['Estimate Year'] = year

    # Append the modified DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
merged_age_sex_df = pd.concat(dataframes, ignore_index=False)

# Move 'Estimate Year' to the front
column_order = ['Estimate Year'] + [col for col in merged_age_sex_df.columns if col != 'Estimate Year']
merged_age_sex_df = merged_age_sex_df[column_order]

# Strip whitespace from column names
merged_age_sex_df.columns = merged_age_sex_df.columns.str.strip()

# Specify columns to keep
columns_to_keep = [
    'Estimate Year',
    'Total population',
    'Under 18 years',
    'Median age (years)',
    '18 years and over'
]

# Check which columns to keep exist
existing_columns_to_keep = [col for col in columns_to_keep if col in merged_age_sex_df.columns]

# Create a filtered DataFrame with only the specified columns
filtered_age_sex = merged_age_sex_df[existing_columns_to_keep]

# Create a "Sex" column based on the original index values before transposing
sex_values = []

# Re-create the index to get sex information before transposing
original_indices = []
for index in filtered_age_sex.index:
    original_indices.append(index)

# Create the "Sex" column
for index in original_indices:
    if "Male" in index:
        sex_values.append(1)
    elif "Female" in index:
        sex_values.append(0)
    else:
        sex_values.append(2)  # For combined categories

# Assign the list to the Sex column
filtered_age_sex['Sex'] = sex_values

# Extract numeric parts from the index using str.extract
filtered_age_sex.index = filtered_age_sex.index.str.extract(r'(\d+(\.\d+)?)')[0]

# # Rename index to Tract ID
filtered_age_sex.index.name = 'Tract ID'

# Reset index to turn the index into a column
filtered_age_sex.reset_index(drop=False, inplace=True)

# Optionally save the filtered DataFrame to a new CSV file
filtered_age_sex.to_csv('final_age_sex_data.csv', index=False)

# Display the filtered DataFrame
filtered_age_sex

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_age_sex['Sex'] = sex_values


Label (Grouping),Tract ID,Estimate Year,Total population,Under 18 years,Median age (years),18 years and over,Sex
0,4052,2017,5125,606,35.8,4519,2
1,4052,2017,2617,324,33.5,2293,1
2,4052,2017,2508,282,37.8,2226,0
3,4053.01,2017,3019,326,36.2,2693,2
4,4053.01,2017,1336,152,36.9,1184,1
...,...,...,...,...,...,...,...
211,4059.02,2022,1609,307,35.6,1302,1
212,4059.02,2022,1529,319,34.2,1210,0
213,4060,2022,4735,814,43.8,3921,2
214,4060,2022,2081,296,45.6,1785,1


### Merge to create demographic metric

In [376]:
# Aggregate race data by Tract ID and Estimate Year
aggregated_race_data = filtered_race.groupby(['Tract ID', 'Estimate Year'], as_index=False).agg({
    'Total:': 'sum',
    'White alone': 'sum',
    'Black or African American alone': 'sum',
    'Asian alone': 'sum'
})

merged_demo = pd.merge(filtered_race, filtered_age_sex, on=['Tract ID', 'Estimate Year'], how='inner')

merged_data.to_csv('merged_demographic_data.csv', index=False)

merged_demo

Label (Grouping),Tract ID,Estimate Year,Total:,White alone,Black or African American alone,Asian alone,Total population,Under 18 years,Median age (years),18 years and over,Sex
0,4052,2017,5125,1930,711,1519,5125,606,35.8,4519,2
1,4052,2017,5125,1930,711,1519,2617,324,33.5,2293,1
2,4052,2017,5125,1930,711,1519,2508,282,37.8,2226,0
3,4055,2017,4124,1152,585,2051,4124,626,34.7,3498,2
4,4055,2017,4124,1152,585,2051,1971,266,34.8,1705,1
5,4055,2017,4124,1152,585,2051,2153,360,34.4,1793,0
6,4056,2017,3171,1221,482,875,3171,576,36.5,2595,2
7,4056,2017,3171,1221,482,875,1797,413,35.4,1384,1
8,4056,2017,3171,1221,482,875,1374,163,38.9,1211,0
9,4057,2017,3760,814,1229,1179,3760,602,35.4,3158,2


---------------------------------------------
# Housing Metric

### Housing Occupancy data

In [34]:
directory_path = r"C:\Users\jabba\Desktop\Code\machine_learning\AUC_mastercard_challenge\src\housing_market"

# Specify the file pattern
file_pattern = os.path.join(directory_path, 'acs_housing_occupancy_*.csv')

# Use glob to find all files matching the pattern
file_list = glob.glob(file_pattern)

# Initialize an empty list to hold the DataFrames
dataframes = []

# Rows to remove
rows_to_remove = [
    "RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER", 
    "One race --",
    "Two or more races",
    "Hispanic or Latino origin",
    "White alone, not Hispanic or Latino",
    "AGE OF HOUSEHOLDER",
    "EDUCATIONAL ATTAINMENT OF HOUSEHOLDER",
    "YEAR HOUSEHOLDER MOVED INTO UNIT",
]

# Loop through each file, read it into a DataFrame, and append it to the list
for file in file_list:
    df = pd.read_csv(file)

    # Add a year column based on the file name
    year = os.path.basename(file).split('_')[-1].split('.')[0]
    df['Estimate Year'] = year

    # Remove specified rows after stripping whitespace
    df['Label (Grouping)'] = df['Label (Grouping)'].str.strip()  # Remove leading/trailing spaces
    df = df[~df['Label (Grouping)'].isin(rows_to_remove)]
    
    # Drop columns that contain the word 'Percent'
    df = df.loc[:, ~df.columns.str.contains('Percent', case=False)]
    
    # Prepare the renaming dictionary
    rename_dict = {
        'Label (Grouping)': 'Demographic Group'
    }

    # Define the tracts and create the renaming mappings
    tracts = [
        '4052', '4053.01', '4053.02', '4054.01', 
        '4054.02', '4055', '4056', '4057', '4058', 
        '4059.01', '4059.02', '4060'
    ]

    for tract in tracts:
        if year == '2022':
            # Renaming for 2022 data with semicolons
            rename_dict[f'Census Tract {tract}; Alameda County; California!!Occupied housing units!!Estimate'] = f'Census Tract {tract} Total occupied housing units'
            rename_dict[f'Census Tract {tract}; Alameda County; California!!Owner-occupied housing units!!Estimate'] = f'Census Tract {tract} owner-occupied units'
            rename_dict[f'Census Tract {tract}; Alameda County; California!!Renter-occupied housing units!!Estimate'] = f'Census Tract {tract} renter-occupied units'
        else:
            # Renaming for other years with commas
            rename_dict[f'Census Tract {tract}, Alameda County, California!!Occupied housing units!!Estimate'] = f'Census Tract {tract} Total occupied housing units'
            rename_dict[f'Census Tract {tract}, Alameda County, California!!Owner-occupied housing units!!Estimate'] = f'Census Tract {tract} owner-occupied units'
            rename_dict[f'Census Tract {tract}, Alameda County, California!!Renter-occupied housing units!!Estimate'] = f'Census Tract {tract} renter-occupied units'

    # Rename the columns
    df.rename(columns=rename_dict, inplace=True)

    # Append the modified DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
merged_occupancy_df = pd.concat(dataframes, ignore_index=True)

# Move 'Year' to the front
column_order = ['Estimate Year'] + [col for col in merged_occupancy_df.columns if col != 'Estimate Year']
merged_occupancy_df = merged_occupancy_df[column_order]

# Save the merged DataFrame to a new CSV file
# merged_occupancy_df.to_csv('filtered_merged_occupancy_df.csv', index=False)


filtered_merged_occupancy_df = merged_occupancy_df.dropna(how='all')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)  # Show full column width

filtered_merged_occupancy_df.head()


Unnamed: 0,Estimate Year,Demographic Group,Census Tract 4052 Total occupied housing units,Census Tract 4052 owner-occupied units,Census Tract 4052 renter-occupied units,Census Tract 4053.01 Total occupied housing units,Census Tract 4053.01 owner-occupied units,Census Tract 4053.01 renter-occupied units,Census Tract 4053.02 Total occupied housing units,Census Tract 4053.02 owner-occupied units,...,Census Tract 4058 renter-occupied units,Census Tract 4059.01 Total occupied housing units,Census Tract 4059.01 owner-occupied units,Census Tract 4059.01 renter-occupied units,Census Tract 4059.02 Total occupied housing units,Census Tract 4059.02 owner-occupied units,Census Tract 4059.02 renter-occupied units,Census Tract 4060 Total occupied housing units,Census Tract 4060 owner-occupied units,Census Tract 4060 renter-occupied units
0,2017,Occupied housing units,2407,642.0,1765,1548,143.0,1405,1448,77.0,...,865.0,1080,375.0,705.0,956.0,243.0,713.0,1405,136.0,1269
1,2017,White,1080,249.0,831,853,75.0,778,619,24.0,...,165.0,175,36.0,139.0,181.0,42.0,139.0,384,28.0,356
2,2017,Black or African American,419,24.0,395,384,5.0,379,228,0.0,...,271.0,205,109.0,96.0,196.0,47.0,149.0,234,0.0,234
3,2017,American Indian and Alaska Native,46,13.0,33,7,0.0,7,7,0.0,...,9.0,51,22.0,29.0,11.0,0.0,11.0,6,0.0,6
4,2017,Asian,594,291.0,303,154,40.0,114,463,53.0,...,321.0,367,163.0,204.0,409.0,131.0,278.0,570,98.0,472


### Median Home Price data

In [47]:
# Define the directory containing the median home price data
directory_path = r"C:\Users\jabba\Desktop\Code\machine_learning\AUC_mastercard_challenge\src\housing_market"

# Specify the file pattern for median home price files
file_pattern = os.path.join(directory_path, 'acs_median_home_price_*.csv')

# Use glob to find all files matching the pattern
file_list = glob.glob(file_pattern)

# Initialize an empty list to hold the DataFrames
dataframes = []

# Define the tracts for renaming
tracts = [
    '4052', '4053.01', '4053.02', '4054.01',
    '4054.02', '4055', '4056', '4057', '4058',
    '4059.01', '4059.02', '4060'
]

# Function to rename tract columns
def rename_tract_column(col_name):
    for tract in tracts:
        if tract in col_name:
            return f"Tract {tract} Estimate"
    return col_name

# Loop through each file, read it into a DataFrame, and append it to the list
for file in file_list:
    df = pd.read_csv(file)

    # Add a year column based on the file name
    year = os.path.basename(file).split('_')[-1].split('.')[0]
    df['Estimate Year'] = year

    # Rename columns, focusing on tract names
    df.columns = [rename_tract_column(col) for col in df.columns]

    # Append the modified DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
merged_housing_df = pd.concat(dataframes, ignore_index=True)

# Move 'Estimate Year' to the front
column_order = ['Estimate Year'] + [col for col in merged_housing_df.columns if col != 'Estimate Year']
merged_housing_df = merged_housing_df[column_order]

# Remove rows where all elements are NaN
filtered_merged_median_price_df = merged_housing_df.dropna(how='all')

# Rename the 'Label (Grouping)' column to 'Median Value (Dollars)'
filtered_merged_median_price_df.rename(columns={'Label (Grouping)': 'Median Value (Dollars)'}, inplace=True)

# Save the merged DataFrame to a new CSV file
#filtered_merged_median_price_df.to_csv('filtered_merged_median_price_df.csv', index=False)

# Display the first few rows of the filtered DataFrame
filtered_merged_median_price_df


Unnamed: 0,Estimate Year,Median Value (Dollars),Tract 4052 Estimate,Tract 4053.01 Estimate,Tract 4053.02 Estimate,Tract 4054.01 Estimate,Tract 4054.02 Estimate,Tract 4055 Estimate,Tract 4056 Estimate,Tract 4057 Estimate,Tract 4058 Estimate,Tract 4059.01 Estimate,Tract 4059.02 Estimate,Tract 4060 Estimate
0,2017,Median value (dollars),764100,647700,428300,459500,428600,509300,541300,455700,370000,369100,407600,434500
1,2018,Median value (dollars),839200,627100,456300,462000,466700,665400,562200,469100,438800,403000,512500,523300
2,2019,Median value (dollars),888900,682000,585900,498800,573700,687900,563200,484900,463300,434000,533900,571400
3,2020,Median value (dollars),886100,779600,491700,581300,648000,705000,644400,529800,547400,462300,599300,625000
4,2021,Median value (dollars),866500,878700,653800,614100,731100,713100,705200,671200,600400,546900,635100,643100
5,2022,Median value (dollars),1060300,793300,637800,652400,976000,754100,743000,665900,696600,649600,696300,745000


### merge to create housing metric

In [44]:
# Reshape the housing occupancy DataFrame using melt, focusing on relevant columns
occupancy_melted = filtered_merged_occupancy_df.melt(id_vars=['Estimate Year'], 
                                                       value_vars=[
                                                           col for col in filtered_merged_occupancy_df.columns 
                                                           if 'Total occupied housing units' in col or 
                                                              'owner-occupied' in col or 
                                                              'renter-occupied' in col
                                                       ],
                                                       var_name='Tract', 
                                                       value_name='Occupancy Count')

# Clean the 'Tract' column to match the format in the median home prices DataFrame
occupancy_melted['Tract'] = occupancy_melted['Tract'].str.replace(' Total occupied housing units', '', regex=False)
occupancy_melted['Tract'] = occupancy_melted['Tract'].str.replace(' owner-occupied units', '', regex=False)
occupancy_melted['Tract'] = occupancy_melted['Tract'].str.replace(' renter-occupied units', '', regex=False)

# Remove any leading or trailing spaces from the Tract names
occupancy_melted['Tract'] = occupancy_melted['Tract'].str.strip()

# Convert the 'Occupancy Count' to numeric, forcing errors to NaN
occupancy_melted['Occupancy Count'] = pd.to_numeric(occupancy_melted['Occupancy Count'].str.replace(',', ''), errors='coerce')

# Aggregate the occupancy counts by Estimate Year
occupancy_aggregated = occupancy_melted.groupby('Estimate Year').agg({
    'Occupancy Count': 'mean'  
}).reset_index()

# Merge the aggregated occupancy DataFrame with median home prices based on Estimate Year
merged_data = pd.merge(occupancy_aggregated, filtered_merged_median_price_df, how='inner', 
                        left_on='Estimate Year', 
                        right_on='Estimate Year')

# Rename columns for clarity
merged_data.rename(columns={
    'Median Value (Dollars)': 'Median Home Price'
}, inplace=True)

# Optionally, save the merged dataset for further analysis
merged_data.to_csv('merged_housing_market_data.csv', index=False)
merged_data

Unnamed: 0,Estimate Year,Occupancy Count,Median Home Price,Tract 4052 Estimate,Tract 4053.01 Estimate,Tract 4053.02 Estimate,Tract 4054.01 Estimate,Tract 4054.02 Estimate,Tract 4055 Estimate,Tract 4056 Estimate,Tract 4057 Estimate,Tract 4058 Estimate,Tract 4059.01 Estimate,Tract 4059.02 Estimate,Tract 4060 Estimate
0,2017,292.675439,Median value (dollars),764100,647700,428300,459500,428600,509300,541300,455700,370000,369100,407600,434500
1,2018,293.607456,Median value (dollars),839200,627100,456300,462000,466700,665400,562200,469100,438800,403000,512500,523300
2,2019,296.430556,Median value (dollars),888900,682000,585900,498800,573700,687900,563200,484900,463300,434000,533900,571400
3,2020,281.964583,Median value (dollars),886100,779600,491700,581300,648000,705000,644400,529800,547400,462300,599300,625000
4,2021,294.2625,Median value (dollars),866500,878700,653800,614100,731100,713100,705200,671200,600400,546900,635100,643100
5,2022,294.570833,Median value (dollars),1060300,793300,637800,652400,976000,754100,743000,665900,696600,649600,696300,745000


-------------------------------------
# Economic Stability

### Income Data

In [45]:
# Define the directory path
directory_path = r"C:\Users\jabba\Desktop\Code\machine_learning\AUC_mastercard_challenge\src\economic_stability"

# List of years to include in the concatenation
years = [2017, 2018, 2019, 2020, 2021, 2022]

# Initialize an empty list to hold DataFrames
dataframes = []

# Loop through the years and read the corresponding CSV files
for year in years:
    file_path = os.path.join(directory_path, f'income_stability_{year}.csv')
    
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # If the year is 2022, replace semicolons with commas in the column names
    if year == 2022:
        df.columns = df.columns.str.replace(';', ',', regex=False)

    # Add a year column based on the file name
    df['Estimate Year'] = year

    # Append the DataFrame to the list
    dataframes.append(df)
    
# Concatenate all DataFrames
combined_data = pd.concat(dataframes, ignore_index=True)

# Remove specified rows based on the values in a specific column (assuming the first column holds these values)
rows_to_drop = [
    "HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER",
    "HOUSEHOLD INCOME BY AGE OF HOUSEHOLDER", "FAMILIES", "FAMILY INCOME BY FAMILY SIZE", 
    "FAMILY INCOME BY NUMBER OF EARNERS", "NONFAMILY HOUSEHOLDS"
    
]
combined_data = combined_data[~combined_data.iloc[:, 0].isin(rows_to_drop)]

# Move 'Estimate Year' to the front
column_order = ['Estimate Year'] + [col for col in combined_data.columns if col != 'Estimate Year']
combined_data = combined_data[column_order]

# Remove columns that contain 'Percent' in their names
combined_data = combined_data.loc[:, ~combined_data.columns.str.contains("Percent")]

# Save the combined DataFrame to a new CSV file
output_file_path = os.path.join(directory_path, 'income_stability_combined.csv')
combined_data.to_csv(output_file_path, index=False)

# Display the first few rows of the combined DataFrame
combined_data.head(100)


Unnamed: 0,Estimate Year,Label (Grouping),"Census Tract 4052, Alameda County, California!!Number!!Estimate","Census Tract 4052, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4053.01, Alameda County, California!!Number!!Estimate","Census Tract 4053.01, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4053.02, Alameda County, California!!Number!!Estimate","Census Tract 4053.02, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4054.01, Alameda County, California!!Number!!Estimate","Census Tract 4054.01, Alameda County, California!!Median income (dollars)!!Estimate",...,"Census Tract 4057, Alameda County, California!!Number!!Estimate","Census Tract 4057, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4058, Alameda County, California!!Number!!Estimate","Census Tract 4058, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4059.01, Alameda County, California!!Number!!Estimate","Census Tract 4059.01, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4059.02, Alameda County, California!!Number!!Estimate","Census Tract 4059.02, Alameda County, California!!Median income (dollars)!!Estimate","Census Tract 4060, Alameda County, California!!Number!!Estimate","Census Tract 4060, Alameda County, California!!Median income (dollars)!!Estimate"
1,2017,Households,2407,77688,1548,62500,1448,41111,1688,46790,...,1353,50344,1333,38794,1080,36214,956,36000,1405,35457
2,2017,One race--,,,,,,,,,...,,,,,,,,,,
3,2017,White,1080,83258,853,90391,619,62431,450,54722,...,327,58750,262,92750,175,51926,181,58250,384,74938
4,2017,Black or African American,419,63813,384,38500,228,-,404,32123,...,562,34674,393,31587,205,32083,196,22500,234,27308
5,2017,American Indian and Alaska Native,46,59074,7,-,7,-,12,-,...,9,-,9,-,51,-,11,-,6,-
6,2017,Asian,594,69821,154,-,463,21893,493,-,...,306,68214,510,35714,367,35368,409,34125,570,18158
7,2017,Native Hawaiian and Other Pacific Islander,0,-,7,-,0,-,0,-,...,0,-,25,-,0,-,15,-,7,-
8,2017,Some other race,152,88661,52,-,59,-,205,47695,...,98,67500,70,-,272,36528,118,25500,129,-
9,2017,Two or more races,116,66419,91,-,72,36316,124,70789,...,51,-,64,-,10,-,26,40625,75,89712
10,2017,Hispanic or Latino origin (of any race),394,87946,105,-,139,35521,299,46182,...,170,50972,214,-,425,50230,215,35972,172,35500


### Employment

------------------------------------
# Inclusive Growth score

In [15]:
IG_data = pd.read_csv('Inclusive_Growth_Score_Data.csv')
IG_data

Unnamed: 0,N/A,Is an Opportunity Zone,Census Tract FIPS code,County,State,Year,Inclusive Growth Score,Growth,Inclusion,Place,...,"Female Above Poverty Tract, %",Gini Coefficient Score,Gini Coefficient Base,Gini Coefficient Tract,Early Education Enrollment Score,"Early Education Enrollment Base, %","Early Education Enrollment Tract, %",Health Insurance Coverage Score,"Health Insurance Coverage Base, %","Health Insurance Coverage Tract, %"
0,0,,6001405302,Alameda County,California,2017,44,51,36,40,...,71.2,6,41.3,52.8,77.0,25.5,37.6,27,91.2,86.7
1,1,,6001405302,Alameda County,California,2018,42,47,38,47,...,77.0,5,41.5,53.4,53.0,25.5,26.6,33,92.3,89.3
2,2,,6001405302,Alameda County,California,2019,42,45,38,40,...,80.6,3,41.6,55.8,34.0,25.3,19.6,33,93.3,90.7
3,3,,6001405302,Alameda County,California,2020,43,42,44,35,...,84.9,4,41.6,55.3,21.0,25.4,14.6,34,94.2,91.9
4,4,,6001405302,Alameda County,California,2021,44,44,45,38,...,86.4,6,41.3,53.6,,27.5,,34,95.0,92.8
5,5,,6001405302,Alameda County,California,2022,44,42,46,49,...,83.4,19,41.3,47.8,,27.4,,30,94.9,92.1
6,6,,6001405200,Alameda County,California,2017,63,60,66,70,...,92.9,53,41.3,40.9,92.0,25.5,51.4,72,91.2,94.6
7,7,,6001405200,Alameda County,California,2018,65,65,65,70,...,93.4,43,41.5,42.6,98.0,25.5,66.2,82,92.3,96.5
8,8,,6001405200,Alameda County,California,2019,64,66,63,69,...,94.0,34,41.6,44.2,99.0,25.3,76.5,79,93.3,96.7
9,9,,6001405200,Alameda County,California,2020,64,69,60,66,...,94.3,28,41.6,45.5,99.0,25.4,72.4,73,94.2,96.6
