In [1]:
import pandas as pd
import numpy as np
import time
import os

# CVAP

Code mostly from https://github.com/nonpartisan-redistricting-datahub/cvap/blob/main/disagg2022.ipynb by Spencer

## Step 1 Download Data
CVAP comes from the special tabulation from the Census Bereau. The 2011-2015 American Community Survey (ACS) 5-year estimates and this CVAP special tabulation created from those estimates use the same geography source. The reference date for all geographies in this product is January 1, 2015. The best/accurate would be year 2013 for this file. The 2015 file can be downloaded from (https://www.census.gov/programs-surveys/decennial-census/about/voting-rights/cvap.2015.html#list-tab-1518558936)

In [3]:
csv_path = './CVAP_2011-2015_ACS_csv_files/BlockGr.csv'

blockgr_df = pd.read_csv(csv_path, encoding='latin1')

blockgr_df.head()

Unnamed: 0,GEONAME,lntitle,geoid,lnnumber,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE
0,"Block Group 1, Census Tract 201, Autauga Count...",Total,15000US010010201001,1,625,201,440,113
1,"Block Group 1, Census Tract 201, Autauga Count...",Not Hispanic or Latino,15000US010010201001,2,625,201,440,113
2,"Block Group 1, Census Tract 201, Autauga Count...",American Indian or Alaska Native Alone,15000US010010201001,3,0,11,0,11
3,"Block Group 1, Census Tract 201, Autauga Count...",Asian Alone,15000US010010201001,4,0,11,0,11
4,"Block Group 1, Census Tract 201, Autauga Count...",Black or African American Alone,15000US010010201001,5,90,95,65,68


In [4]:
blockgr_df['lntitle'].unique()

array(['Total', 'Not Hispanic or Latino',
       'American Indian or Alaska Native Alone', 'Asian Alone',
       'Black or African American Alone',
       'Native Hawaiian or Other Pacific Islander Alone', 'White Alone',
       'American Indian or Alaska Native and White', 'Asian and White',
       'Black or African American and White',
       'American Indian or Alaska Native and Black or African American',
       'Remainder of Two or More Race Responses', 'Hispanic or Latino'],
      dtype=object)

In [5]:
blockgr_df['GEONAME'][0]

'Block Group 1, Census Tract 201, Autauga County, Alabama'

## Step 2: Processing the 2021 Block Group CVAP data
CVAP data was retrieved at the block group level.  
The data was pivoted from narrow to wide data based on GEOIDs so that one row is one block group, and each field represents a particular race/ethnicity.  
The fields were renamed to fit character length requirements. 

In [6]:
blockgr_df.head(1)

Unnamed: 0,GEONAME,lntitle,geoid,lnnumber,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE
0,"Block Group 1, Census Tract 201, Autauga Count...",Total,15000US010010201001,1,625,201,440,113


### Pivot the dataframe, reorder, and combine some fileds

In [7]:
rename_map = {
    "Total": "TOT15",
    "Not Hispanic or Latino": "NHS15",
    "American Indian or Alaska Native Alone": "AIA15",
    "Asian Alone": "ASN15",
    "Black or African American Alone": "BLK15",
    "Native Hawaiian or Other Pacific Islander Alone": "NHP15",
    "White Alone": "WHT15",
    "American Indian or Alaska Native and White": "AIW15",
    "Asian and White": "ASW15",
    "Black or African American and White": "BLW15",
    "American Indian or Alaska Native and Black or African American": "AIB15",
    "Remainder of Two or More Race Responses": "2OM15",
    "Hispanic or Latino": "HSP15",
}


pivot_df = blockgr_df.assign(
    field_name=lambda x: x["lntitle"].map(rename_map)
).pivot(
    index="geoid",
    columns="field_name",
    values=["CVAP_EST", "CIT_EST"]
)

pivot_df.columns = [
    f"{col[0]}_{col[1]}" for col in pivot_df.columns.to_flat_index()
]

pivot_df = pivot_df.reset_index()

field_mapping = {
    "CVAP_EST_TOT15": "CVAP_TOT15",
    "CVAP_EST_NHS15": "CVAP_NHS15",
    "CVAP_EST_AIA15": "CVAP_AIA15",
    "CVAP_EST_ASN15": "CVAP_ASN15",
    "CVAP_EST_BLK15": "CVAP_BLK15",
    "CVAP_EST_NHP15": "CVAP_NHP15",
    "CVAP_EST_WHT15": "CVAP_WHT15",
    "CVAP_EST_AIW15": "CVAP_AIW15",
    "CVAP_EST_ASW15": "CVAP_ASW15",
    "CVAP_EST_BLW15": "CVAP_BLW15",
    "CVAP_EST_AIB15": "CVAP_AIB15",
    "CVAP_EST_2OM15": "CVAP_2OM15",
    "CVAP_EST_HSP15": "CVAP_HSP15",
    "CIT_EST_TOT15": "C_TOT15",
    "CIT_EST_NHS15": "C_NHS15",
    "CIT_EST_AIA15": "C_AIA15",
    "CIT_EST_ASN15": "C_ASN15",
    "CIT_EST_BLK15": "C_BLK15",
    "CIT_EST_NHP15": "C_NHP15",
    "CIT_EST_WHT15": "C_WHT15",
    "CIT_EST_AIW15": "C_AIW15",
    "CIT_EST_ASW15": "C_ASW15",
    "CIT_EST_BLW15": "C_BLW15",
    "CIT_EST_AIB15": "C_AIB15",
    "CIT_EST_2OM15": "C_2OM15",
    "CIT_EST_HSP15": "C_HSP15",
}

pivot_df_renamed = pivot_df.rename(columns=field_mapping)

pivot_df_renamed = pivot_df_renamed.rename(columns={"geoid": "GEOID20"})

# Modify three categories to correspond with the Office of Management and Budget (OMB) racial categories
pivot_df_renamed["CVAP_AIA15"] += pivot_df_renamed["CVAP_AIB15"] + pivot_df_renamed["CVAP_AIW15"]
pivot_df_renamed["CVAP_BLK15"] += pivot_df_renamed["CVAP_AIB15"] + pivot_df_renamed["CVAP_BLW15"]
pivot_df_renamed["CVAP_ASN15"] += pivot_df_renamed["CVAP_ASW15"]

pivot_df_renamed["C_AIA15"] += pivot_df_renamed["C_AIB15"] + pivot_df_renamed["C_AIW15"]
pivot_df_renamed["C_BLK15"] += pivot_df_renamed["C_AIB15"] + pivot_df_renamed["C_BLW15"]
pivot_df_renamed["C_ASN15"] += pivot_df_renamed["C_ASW15"]

# Reorder the columns
desired_order = [
    "GEOID20",
    "CVAP_TOT15", "CVAP_NHS15", "CVAP_AIA15", "CVAP_ASN15", "CVAP_BLK15",
    "CVAP_NHP15", "CVAP_WHT15", "CVAP_AIW15", "CVAP_ASW15", "CVAP_BLW15",
    "CVAP_AIB15", "CVAP_2OM15", "CVAP_HSP15",
    "C_TOT15", "C_NHS15", "C_AIA15", "C_ASN15", "C_BLK15",
    "C_NHP15", "C_WHT15", "C_AIW15", "C_ASW15", "C_BLW15",
    "C_AIB15", "C_2OM15", "C_HSP15"
]

# Reorder columns in the DataFrame
pivot_df_ordered = pivot_df_renamed.reindex(columns=desired_order)

pivot_df_ordered

Unnamed: 0,GEOID20,CVAP_TOT15,CVAP_NHS15,CVAP_AIA15,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,...,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,C_ASW15,C_BLW15,C_AIB15,C_2OM15,C_HSP15
0,15000US010010201001,440,440,0,0,65,0,375,0,0,...,0,90,0,535,0,0,0,0,0,0
1,15000US010010201002,1060,1060,29,10,39,0,965,25,10,...,25,64,0,1155,25,15,4,0,15,0
2,15000US010010202001,750,750,0,10,535,0,205,0,10,...,55,615,0,385,0,55,0,0,0,0
3,15000US010010202002,915,895,10,0,469,0,415,10,0,...,15,534,0,485,10,0,4,0,0,15
4,15000US010010203001,1695,1695,60,10,405,10,1165,45,0,...,14,455,10,1520,45,4,0,0,80,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220328,15000US721537506011,740,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,980
220329,15000US721537506012,2270,25,0,0,0,0,25,0,0,...,0,0,0,25,0,0,0,0,0,2895
220330,15000US721537506013,930,40,0,0,0,0,40,0,0,...,0,0,0,40,0,0,0,0,0,1330
220331,15000US721537506021,1715,20,0,0,0,0,20,0,0,...,0,0,0,20,0,0,0,0,0,1975


#### Remove "15000US" for GEOID20 as it is country code

In [8]:
pivot_df_ordered["GEOID20"] = pivot_df_ordered["GEOID20"].str.replace("15000US", "", n=1)


In [9]:
pd.set_option('display.max_columns', None)

In [10]:
pivot_df_ordered.head()

Unnamed: 0,GEOID20,CVAP_TOT15,CVAP_NHS15,CVAP_AIA15,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,C_ASW15,C_BLW15,C_AIB15,C_2OM15,C_HSP15
0,10010201001,440,440,0,0,65,0,375,0,0,0,0,0,0,625,625,0,0,90,0,535,0,0,0,0,0,0
1,10010201002,1060,1060,29,10,39,0,965,25,10,4,0,15,0,1295,1295,29,25,64,0,1155,25,15,4,0,15,0
2,10010202001,750,750,0,10,535,0,205,0,10,0,0,0,0,1055,1055,0,55,615,0,385,0,55,0,0,0,0
3,10010202002,915,895,10,0,469,0,415,10,0,4,0,0,15,1065,1045,10,15,534,0,485,10,0,4,0,0,15
4,10010203001,1695,1695,60,10,405,10,1165,45,0,0,0,40,0,2145,2145,60,14,455,10,1520,45,4,0,0,80,0


#### Break the country CVAP data file into 50 states

In [11]:
import os
import pandas as pd

state_fips_mapping = {
    '01': 'al', '02': 'ak', '04': 'az', '05': 'ar', '06': 'ca', '08': 'co', '09': 'ct',
    '10': 'de', '12': 'fl', '13': 'ga', '15': 'hi', '16': 'id', '17': 'il', '18': 'in',
    '19': 'ia', '20': 'ks', '21': 'ky', '22': 'la', '23': 'me', '24': 'md', '25': 'ma',
    '26': 'mi', '27': 'mn', '28': 'ms', '29': 'mo', '30': 'mt', '31': 'ne', '32': 'nv',
    '33': 'nh', '34': 'nj', '35': 'nm', '36': 'ny', '37': 'nc', '38': 'nd', '39': 'oh',
    '40': 'ok', '41': 'or', '42': 'pa', '44': 'ri', '45': 'sc', '46': 'sd', '47': 'tn',
    '48': 'tx', '49': 'ut', '50': 'vt', '51': 'va', '53': 'wa', '54': 'wv', '55': 'wi', '56': 'wy'
}

pivot_df_ordered['state_fips'] = pivot_df_ordered['GEOID20'].str[0:2]

output_dir = './bg_cvap_2015'
os.makedirs(output_dir, exist_ok=True)

for fips, abbrev in state_fips_mapping.items():
    state_data = pivot_df_ordered[pivot_df_ordered['state_fips'] == fips]
    if not state_data.empty:
        state_file_path = os.path.join(output_dir, f'{abbrev}_cvap_2015_bg.csv')
        state_data.to_csv(state_file_path, index=False)

output_dir


'./bg_cvap_2015'

## Step 3: Disaggregating the 2015 Block Group CVAP data to 2010 Blocks

In [6]:
# Variable to set the year for the CVAP data
y = "15"

# Define the column names
update_dict = {
'P0040001': 'CVAP_TOT' + y,
 'P0040003': 'CVAP_NHS' + y,
 'P0040007': 'CVAP_AIA' + y,
 'P0040008': 'CVAP_ASN' + y,
 'P0040006': 'CVAP_BLK' + y,
 'P0040009': 'CVAP_NHP' + y,
 'P0040005': 'CVAP_WHT' + y,
 'P0040014': 'CVAP_AIW' + y,
 'P0040015': 'CVAP_ASW' + y,
 'P0040013': 'CVAP_BLW' + y,
 'P0040018': 'CVAP_AIB' + y,
 'P0040011': 'CVAP_2OM' + y,
 'P0040002': 'CVAP_HSP' + y,
 'P0020001': 'C_TOT' + y,
 'P0020003': 'C_NHS' + y,
 'P0020007': 'C_AIA' + y,
 'P0020008': 'C_ASN' + y,
 'P0020006': 'C_BLK' + y,
 'P0020009': 'C_NHP' + y,
 'P0020005': 'C_WHT' + y,
 'P0020014': 'C_AIW' + y,
 'P0020015': 'C_ASW' + y,
 'P0020013': 'C_BLW' + y,
 'P0020018': 'C_AIB' + y,
 'P0020011': 'C_2OM' + y,
 'P0020002': 'C_HSP' + y}

In [7]:
# List of total population columns
tot_cols = ['C_TOT15',
 'CVAP_TOT15']

# List of non-total population columns
non_tot_cols = [
    'CVAP_TOT15', 'CVAP_NHS15', 'CVAP_AIA15', 'CVAP_ASN15', 'CVAP_BLK15', 'CVAP_NHP15',
    'CVAP_WHT15', 'CVAP_AIW15', 'CVAP_ASW15', 'CVAP_BLW15', 'CVAP_AIB15', 'CVAP_2OM15',
    'CVAP_HSP15', 'C_TOT15', 'C_NHS15', 'C_AIA15', 'C_ASN15', 'C_BLK15', 'C_NHP15',
    'C_WHT15', 'C_AIW15', 'C_ASW15', 'C_BLW15', 'C_AIB15', 'C_2OM15', 'C_HSP15'
]


In [None]:
def get_state_bg_cvap(state):
    '''
    Add code to retrieve the bg data
    ''' 
    df = pd.read_csv(os.path.join('./bg_cvap_2015', state+'_cvap_2015_bg.csv'))
    return df
    
# def get_state_block_pl(state):
#     '''
#     Add code to retrieve block-level PL data here
#     '''
#     df = pd.read_csv(os.path.join('./block_pl', state+'_pl2010_b.csv'))
#     df['GEOID20'] = df['GEOID20'].apply(lambda x: str(x).zfill(15))
#     return df

# Create a GEOID20 column for each pl for merge and return the new df
def get_state_block_pl(state):
    '''
    Retrieves block-level PL data for a given state and generates the GEOID20 column.
    '''
    import os
    import pandas as pd
    
    # Read the block-level PL data
    file_path = os.path.join('./block_pl', f"{state}_pl2010_b.csv")
    df = pd.read_csv(file_path)
    
    # Convert all column names to uppercase for consistency
    df.columns = [col.upper() for col in df.columns]
    
    # Verify necessary columns exist
    required_columns = ["STATE", "COUNTY", "TRACT", "BLOCK"]
    for col in required_columns:
        if col not in df.columns:
            raise KeyError(f"Required column '{col}' not found in the file {file_path}. Available columns: {list(df.columns)}")
    
    # Create the GEOID20 column
    df['GEOID20'] = (
        df["STATE"].astype(str).str.zfill(2) + 
        df["COUNTY"].astype(str).str.zfill(3) +
        df["TRACT"].astype(str).str.zfill(6) +
        df["BLOCK"].astype(str).str.zfill(4)
    )
    
    # Verify GEOID20 creation
    print("Sample GEOID20 values:")
    print(df[['STATE', 'COUNTY', 'TRACT', 'BLOCK', 'GEOID20']].head())
    
    return df



In [18]:
pl_hi = get_state_block_pl('hi')
pl_hi.head()

Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     15       1  21800   1000  150010218001000
1     15       1  21800   1001  150010218001001
2     15       1  21800   1002  150010218001002
3     15       1  21800   1004  150010218001004
4     15       1  21800   1005  150010218001005


  df = pd.read_csv(file_path)


Unnamed: 0,FILEID,STUSAB,SUMLEV,GEOCOMP,CHARITER,LOGRECNO,REGION,DIVISION,STATE,COUNTY,COUNTYCC,COUSUB,COUSUBCC,PLACE,PLACECC,TRACT,BLKGRP,BLOCK,CONCIT,CONCITCC,AIANHH,AIANHHFP,AIANHHCC,AIHHTLI,AITSCE,AITS,AITSCC,TTRACT,TBLKGRP,ANRC,ANRCCC,CBSA,METDIV,C,NECTA,NECTADIV,CNECTA,CBSAPCI,NECTAPCI,CD,SLDU,SLDL,V,VTDI,SDELM,SDSEC,SDUNI,AREALAND,AREAWATR,NAME,FUNCSTAT,POP100,HU100,INTPTLAT,INTPTLON,LSADC,U,STATENS,COUNTYNS,COUSUBNS,PLACENS,CONCITNS,AIANHHNS,AITSNS,ANRCNS,MEMI,NMEMI,P0010001,P0010002,P0010003,P0010004,P0010005,P0010006,P0010007,P0010008,P0010009,P0010010,P0010011,P0010012,P0010013,P0010014,P0010015,P0010016,P0010017,P0010018,P0010019,P0010020,P0010021,P0010022,P0010023,P0010024,P0010025,P0010026,P0010027,P0010028,P0010029,P0010030,P0010031,P0010032,P0010033,P0010034,P0010035,P0010036,P0010037,P0010038,P0010039,P0010040,P0010041,P0010042,P0010043,P0010044,P0010045,P0010046,P0010047,P0010048,P0010049,P0010050,P0010051,P0010052,P0010053,P0010054,P0010055,P0010056,P0010057,P0010058,P0010059,P0010060,P0010061,P0010062,P0010063,P0010064,P0010065,P0010066,P0010067,P0010068,P0010069,P0010070,P0010071,P0020001,P0020002,P0020003,P0020004,P0020005,P0020006,P0020007,P0020008,P0020009,P0020010,P0020011,P0020012,P0020013,P0020014,P0020015,P0020016,P0020017,P0020018,P0020019,P0020020,P0020021,P0020022,P0020023,P0020024,P0020025,P0020026,P0020027,P0020028,P0020029,P0020030,P0020031,P0020032,P0020033,P0020034,P0020035,P0020036,P0020037,P0020038,P0020039,P0020040,P0020041,P0020042,P0020043,P0020044,P0020045,P0020046,P0020047,P0020048,P0020049,P0020050,P0020051,P0020052,P0020053,P0020054,P0020055,P0020056,P0020057,P0020058,P0020059,P0020060,P0020061,P0020062,P0020063,P0020064,P0020065,P0020066,P0020067,P0020068,P0020069,P0020070,P0020071,P0020072,P0020073,P0030001,P0030002,P0030003,P0030004,P0030005,P0030006,P0030007,P0030008,P0030009,P0030010,P0030011,P0030012,P0030013,P0030014,P0030015,P0030016,P0030017,P0030018,P0030019,P0030020,P0030021,P0030022,P0030023,P0030024,P0030025,P0030026,P0030027,P0030028,P0030029,P0030030,P0030031,P0030032,P0030033,P0030034,P0030035,P0030036,P0030037,P0030038,P0030039,P0030040,P0030041,P0030042,P0030043,P0030044,P0030045,P0030046,P0030047,P0030048,P0030049,P0030050,P0030051,P0030052,P0030053,P0030054,P0030055,P0030056,P0030057,P0030058,P0030059,P0030060,P0030061,P0030062,P0030063,P0030064,P0030065,P0030066,P0030067,P0030068,P0030069,P0030070,P0030071,P0040001,P0040002,P0040003,P0040004,P0040005,P0040006,P0040007,P0040008,P0040009,P0040010,P0040011,P0040012,P0040013,P0040014,P0040015,P0040016,P0040017,P0040018,P0040019,P0040020,P0040021,P0040022,P0040023,P0040024,P0040025,P0040026,P0040027,P0040028,P0040029,P0040030,P0040031,P0040032,P0040033,P0040034,P0040035,P0040036,P0040037,P0040038,P0040039,P0040040,P0040041,P0040042,P0040043,P0040044,P0040045,P0040046,P0040047,P0040048,P0040049,P0040050,P0040051,P0040052,P0040053,P0040054,P0040055,P0040056,P0040057,P0040058,P0040059,P0040060,P0040061,P0040062,P0040063,P0040064,P0040065,P0040066,P0040067,P0040068,P0040069,P0040070,P0040071,P0040072,P0040073,H0010001,H0010002,H0010003,GEOID20
0,PLST,HI,750,0,0,4498,4,9,15,1,H1,92520,Z5,13600,U1,21800,1,1000,99999,99,9999,99999,99,9,999,99999,99,999999,9,99999,99,25900,99999,999,99999,99999,999,N,N,2,3,1,101,P,99999,99999,30,40545,0,Block 1000,S,6,5,20.236725,-155.832155,BK,99999,1779782,365280,1935665,2414039,99999999,99999999,99999999,99999999,2,9,6,4,3,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,4,3,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,4,3,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,4,3,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,4,1,150010218001000
1,PLST,HI,750,0,0,4499,4,9,15,1,H1,92520,Z5,13600,U1,21800,1,1001,99999,99,9999,99999,99,9,999,99999,99,999999,9,99999,99,25900,99999,999,99999,99999,999,N,N,2,3,1,101,P,99999,99999,30,24732,0,Block 1001,S,35,12,20.239444,-155.832459,BK,99999,1779782,365280,1935665,2414039,99999999,99999999,99999999,99999999,2,9,35,27,14,0,2,7,4,0,8,6,0,0,0,1,0,0,0,0,0,0,1,0,4,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,7,28,21,13,0,2,6,0,0,7,5,0,0,0,0,0,0,0,0,0,0,1,0,4,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,23,11,0,2,7,3,0,5,3,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,5,23,18,10,0,2,6,0,0,5,3,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,12,0,150010218001001
2,PLST,HI,750,0,0,4500,4,9,15,1,H1,92520,Z5,13600,U1,21800,1,1002,99999,99,9999,99999,99,9,999,99999,99,999999,9,99999,99,25900,99999,999,99999,99999,999,N,N,2,3,1,101,P,99999,99999,30,103361,0,Block 1002,S,14,7,20.240185,-155.833026,BK,99999,1779782,365280,1935665,2414039,99999999,99999999,99999999,99999999,2,9,14,14,8,0,0,5,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,1,13,13,8,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,12,6,0,0,5,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,11,11,6,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,6,1,150010218001002
3,PLST,HI,750,0,0,4501,4,9,15,1,H1,92520,Z5,13600,U1,21800,1,1004,99999,99,9999,99999,99,9,999,99999,99,999999,9,99999,99,25900,99999,999,99999,99999,999,N,N,2,3,1,101,P,99999,99999,30,209879,6948,Block 1004,S,61,26,20.240347,-155.836331,BK,99999,1779782,365280,1935665,2414039,99999999,99999999,99999999,99999999,2,9,61,48,38,0,0,10,0,0,13,7,0,1,1,0,0,0,0,0,0,0,0,0,5,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,61,0,61,48,38,0,0,10,0,0,13,7,0,1,1,0,0,0,0,0,0,0,0,0,5,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,42,34,0,0,8,0,0,7,5,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,0,49,42,34,0,0,8,0,0,7,5,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,23,3,150010218001004
4,PLST,HI,750,0,0,4502,4,9,15,1,H1,92520,Z5,13600,U1,21800,1,1005,99999,99,9999,99999,99,9,999,99999,99,999999,9,99999,99,25900,99999,999,99999,99999,999,N,N,2,3,1,101,P,99999,99999,30,391974,0,Block 1005,S,147,45,20.23486,-155.836163,BK,99999,1779782,365280,1935665,2414039,99999999,99999999,99999999,99999999,2,9,147,71,14,0,0,27,30,0,76,51,0,0,19,4,1,0,0,0,0,0,0,0,27,0,0,24,2,0,0,0,0,0,0,20,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,147,26,121,65,10,0,0,25,30,0,56,39,0,0,12,4,0,0,0,0,0,0,0,0,23,0,0,16,2,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,114,57,13,0,0,26,18,0,57,38,0,0,14,3,1,0,0,0,0,0,0,0,20,0,0,19,2,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,114,17,97,52,10,0,0,24,18,0,45,31,0,0,11,3,0,0,0,0,0,0,0,0,17,0,0,14,2,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,41,4,150010218001005


In [20]:
def data_rounding(x):
    x = x.sort_values(ascending = False, kind='mergesort')
    floor = np.floor(x)
    diff = x - floor
    to_add = int(round(sum(x),0) - sum(floor))
    floor[diff.nlargest(to_add).index] += 1
    return floor.sort_index(ascending = True).astype(int)

### This is the unmodified funtion with rounding 

In [21]:
def run_disagg_changed_original(state):
    '''Runs a disaggregation to the block-level from CVAP data'''
    
    # Set the state abbreviation to lower case
    state = state.lower()
    
    # Get the block-group CVAP data and block-level PL data
    state_bg_cvap = get_state_bg_cvap(state)
    state_block_pl = get_state_block_pl(state)
    
    # PL-Data: Rename the columns to their proxies using the above
    state_block_pl.rename(columns = update_dict, inplace = True)


    state_block_pl["BLKGRP"] = state_block_pl["GEOID20"].astype(str).str.zfill(15).str[0:12]
    print('state_block_pl.head():')
    print(state_block_pl.head())
    # PL-Data: Create a count variable for number of blocks in block group
    state_block_pl["COUNT"] = 1

    # PL-Data: Aggregate the block groups together, clean indices
    print('len(list(state_block_pl.columns))')
    print(len(list(state_block_pl.columns)))

    for i in state_block_pl.columns:
        if state_block_pl[i].dtype == 'object':
            if i not in ['BLKGRP','GEOID20']:
                state_block_pl.drop(columns = i, inplace=True)

    print('len(list(state_block_pl.columns))')            
    print(len(list(state_block_pl.columns)))

    state_bg_pl = state_block_pl.groupby(["BLKGRP"]).sum()
    state_bg_pl.reset_index(inplace = True, drop = False)
    
    # CVAP-Data: Clean the GEOID20, call it BLKGRP
    state_bg_cvap["GEOID20"] = state_bg_cvap["GEOID20"].astype(str).str.zfill(12)
    state_bg_cvap.rename(columns = {"GEOID20":"BLKGRP"}, inplace = True)

    # Merge the two files together
    merged_data = pd.merge(state_block_pl, state_bg_pl, on = "BLKGRP", how = "left", indicator = "ind_1", suffixes = ["_block","_bg"] )
    merged_data_final = pd.merge(merged_data, state_bg_cvap, on = "BLKGRP", how = "left", indicator = "ind_2")
    merged_data_final = merged_data_final[merged_data_final['ind_2']=='both']
    
#    display(state_bg_cvap[state_bg_cvap['BLKGRP']=='091703431011'])
    # Mapping for total columns
    col_mapping = {"C_TOT15":"P0010001", 'CVAP_TOT15':"P0030001"}
    
    # Iterate over the total columns first
    for val in tot_cols:
        merged_data_final[val+"_DISAGG"] = np.where(merged_data_final[val]==0,0,
                                                    np.where(merged_data_final[val+"_bg"]!=0, (merged_data_final[val+"_block"]/merged_data_final[val+"_bg"]) * merged_data_final[val], 
                                                        np.where(merged_data_final[col_mapping[val]+"_bg"]==0, (1/merged_data_final["COUNT_bg"])*merged_data_final[val],(merged_data_final[col_mapping[val]+"_block"]/merged_data_final[col_mapping[val]+"_bg"]) * merged_data_final[val]) 
                                                            )
                                                    )
                                                                 
    # Iterate over the remaining columns after
    for val in non_tot_cols:
        merged_data_final[val+"_DISAGG"] = np.where(merged_data_final[val]==0,0,
                                                    np.where(merged_data_final[val+"_bg"]!=0, (merged_data_final[val+"_block"]/merged_data_final[val+"_bg"]) * merged_data_final[val], 
                                                        np.where("VAP" in val, (merged_data_final["CVAP_TOT15_DISAGG"]/merged_data_final["CVAP_TOT15"]) * merged_data_final[val],
                                                                    (merged_data_final["C_TOT15_DISAGG"]/merged_data_final["C_TOT15"]) * merged_data_final[val]
                                                                )
                                                            )
                                                        )
    disagg_cols=[i for i in list(merged_data_final.columns) if "_DISAGG" in i]
    merged_data_final.sort_values("GEOID20_block", ascending = False, inplace = True)

    for col in disagg_cols:
        try:
            assert(len(merged_data_final[merged_data_final[col].isna()][['GEOID20_block','BLKGRP']])==0)
        except:
            display(merged_data_final[merged_data_final[col].isna()][['GEOID20_block','BLKGRP']])
        merged_data_final[col+"_rounded"] = merged_data_final.groupby('BLKGRP')[[col]].transform(data_rounding)
        if (~(merged_data_final.groupby("BLKGRP")[col+"_rounded"].transform(sum) == merged_data_final[col.replace("_DISAGG","")])).any():
            print("ISSUE")
    print('rounding complete')        
    merged_data_final_export = merged_data_final[["GEOID20_block"]+[i for i in merged_data_final.columns if "_rounded" in i]]

    print('After rounding')
    display(merged_data_final_export.head())
    merged_data_final_export = merged_data_final
    for i in merged_data_final_export.columns:
        if '_rounded' in i:
            print(i)
            new_name = i.replace('_DISAGG_rounded','')
            print(new_name)
            merged_data_final_export[new_name] = merged_data_final_export[i]
            merged_data_final_export[new_name] = merged_data_final_export[new_name].fillna(0)
            merged_data_final_export[new_name] = merged_data_final_export[new_name].apply(lambda x: int(x))

    print('After making to integers')
    display(merged_data_final_export.head())
    merged_data_final_export = merged_data_final

    # Prepare the export DataFrame
    merged_data_final_export = merged_data_final[["GEOID20_block"] + disagg_cols]

    # Ensure the disaggregated columns are integers
    for col in disagg_cols:
        merged_data_final_export[col] = merged_data_final_export[col].fillna(0).apply(lambda x: int(x) if not pd.isna(x) else 0)

    print('After processing DISAGG columns')
    display(merged_data_final_export.head())
    merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
    merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]

    column_order = ['GEOID20','BLKGRP15','C_TOT15','C_NHS15','C_AIA15','C_ASN15','C_BLK15','C_NHP15','C_WHT15','C_AIW15','C_ASW15','C_BLW15','C_AIB15','C_2OM15','C_HSP15',
                'CVAP_TOT15','CVAP_NHS15','CVAP_AIA15','CVAP_ASN15','CVAP_BLK15','CVAP_NHP15','CVAP_WHT15','CVAP_AIW15','CVAP_ASW15','CVAP_BLW15','CVAP_AIB15','CVAP_2OM15','CVAP_HSP15',]

    os.makedirs(f"./2015_cvap_disagg/{state}/", exist_ok=True)

    merged_data_final_export = merged_data_final_export[column_order]
    print('Exported DF')
    merged_data_final_export.sort_values(by = 'GEOID20',inplace=True,ascending=True)
    display(merged_data_final_export.head())
    # Export to CSV
    merged_data_final_export.to_csv("./2015_cvap_disagg/"+state+"/"+state+"_2015_cvap_block_original.csv", index = False)    



#### This is the modified function with rounding and crosswalk removed

In [5]:
def run_disagg_changed_no_rounding(state):
    '''Runs a disaggregation to the block-level from CVAP data'''
    
    # Set the state abbreviation to lower case
    state = state.lower()
    
    # Get the block-group CVAP data and block-level PL data
    state_bg_cvap = get_state_bg_cvap(state)
    state_block_pl = get_state_block_pl(state)
    
    # PL-Data: Rename the columns to their proxies using the above
    state_block_pl.rename(columns = update_dict, inplace = True)


    state_block_pl["BLKGRP"] = state_block_pl["GEOID20"].astype(str).str.zfill(15).str[0:12]

    # PL-Data: Create a count variable for number of blocks in block group
    state_block_pl["COUNT"] = 1

    # PL-Data: Aggregate the block groups together, clean indices


    for i in state_block_pl.columns:
        if state_block_pl[i].dtype == 'object':
            if i not in ['BLKGRP','GEOID20']:
                state_block_pl.drop(columns = i, inplace=True)


    state_bg_pl = state_block_pl.groupby(["BLKGRP"]).sum()
    state_bg_pl.reset_index(inplace = True, drop = False)
    
    # CVAP-Data: Clean the GEOID20, call it BLKGRP
    state_bg_cvap["GEOID20"] = state_bg_cvap["GEOID20"].astype(str).str.zfill(12)
    state_bg_cvap.rename(columns = {"GEOID20":"BLKGRP"}, inplace = True)

    # Merge the two files together
    merged_data = pd.merge(state_block_pl, state_bg_pl, on = "BLKGRP", how = "left", indicator = "ind_1", suffixes = ["_block","_bg"] )
    merged_data_final = pd.merge(merged_data, state_bg_cvap, on = "BLKGRP", how = "left", indicator = "ind_2")
    merged_data_final = merged_data_final[merged_data_final['ind_2']=='both']
    
    # Mapping for total columns
    col_mapping = {"C_TOT15":"P0010001", 'CVAP_TOT15':"P0030001"}
    
    # Iterate over the total columns first
    for val in tot_cols:
        merged_data_final[val+"_DISAGG"] = np.where(merged_data_final[val]==0,0,
                                                    np.where(merged_data_final[val+"_bg"]!=0, (merged_data_final[val+"_block"]/merged_data_final[val+"_bg"]) * merged_data_final[val], 
                                                        np.where(merged_data_final[col_mapping[val]+"_bg"]==0, (1/merged_data_final["COUNT_bg"])*merged_data_final[val],(merged_data_final[col_mapping[val]+"_block"]/merged_data_final[col_mapping[val]+"_bg"]) * merged_data_final[val]) 
                                                            )
                                                    )
                                                                 
    # Iterate over the remaining columns after
    for val in non_tot_cols:
        merged_data_final[val+"_DISAGG"] = np.where(merged_data_final[val]==0,0,
                                                    np.where(merged_data_final[val+"_bg"]!=0, (merged_data_final[val+"_block"]/merged_data_final[val+"_bg"]) * merged_data_final[val], 
                                                        np.where("VAP" in val, (merged_data_final["CVAP_TOT15_DISAGG"]/merged_data_final["CVAP_TOT15"]) * merged_data_final[val],
                                                                    (merged_data_final["C_TOT15_DISAGG"]/merged_data_final["C_TOT15"]) * merged_data_final[val]
                                                                )
                                                            )
                                                        )
    disagg_cols=[i for i in list(merged_data_final.columns) if "_DISAGG" in i]
    merged_data_final.sort_values("GEOID20_block", ascending = False, inplace = True)

    # Prepare the export DataFrame
    merged_data_final_export = merged_data_final[["GEOID20_block"] + disagg_cols]

    merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
    merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
    
    merged_data_final_export.rename(columns=lambda x: x.replace('_DISAGG', ''), inplace=True)

    column_order = ['GEOID20','BLKGRP15','C_TOT15','C_NHS15','C_AIA15','C_ASN15','C_BLK15','C_NHP15','C_WHT15','C_AIW15','C_ASW15','C_BLW15','C_AIB15','C_2OM15','C_HSP15',
                'CVAP_TOT15','CVAP_NHS15','CVAP_AIA15','CVAP_ASN15','CVAP_BLK15','CVAP_NHP15','CVAP_WHT15','CVAP_AIW15','CVAP_ASW15','CVAP_BLW15','CVAP_AIB15','CVAP_2OM15','CVAP_HSP15']

    os.makedirs(f"./2015_cvap_disagg/{state}/", exist_ok=True)

    merged_data_final_export = merged_data_final_export[column_order]
    print('Exported DF')
    merged_data_final_export.sort_values(by = 'GEOID20',inplace=True,ascending=True)
    display(merged_data_final_export.head())
    # Export to CSV
    merged_data_final_export.to_csv("./2015_cvap_disagg/"+state+"/"+state+"_2015_cvap_block_no_rounding.csv", index = False)    



In [9]:
# Iterate over the abbreviations
for abbrev in [
    'al', 'ak', 'az', 'ar', 'ca', 'co', 'ct', 'de', 'fl', 'ga', 'hi', 'id', 'il', 'in', 'ia', 
    'ks', 'ky', 'la', 'me', 'md', 'ma', 'mi', 'mn', 'ms', 'mo', 'mt', 'ne', 'nv', 'nh', 'nj', 
    'nm', 'ny', 'nc', 'nd', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx', 'ut', 'vt', 
    'va', 'wa', 'wv', 'wi', 'wy'
]:
    # Start timer
    start_process_time = time.time()
    
    # Run disagg
    run_disagg_changed_no_rounding(abbrev)
    # run_disagg_changed(abbrev)
    
    # End timer
    end_process_time = time.time()
    
    # Write to a .txt file
    print(abbrev + " took " +  str(round(end_process_time - start_process_time,3)) + " seconds ")
    with open('log_no_rounding.txt', 'a') as t:
        t.write(abbrev + " took " +  str(round(end_process_time - start_process_time,3)) + " seconds " + "\n")
    t.close()

  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0      1       1  21100   2000  010010211002000
1      1       1  21100   2001  010010211002001
2      1       1  21100   2003  010010211002003
3      1       1  21100   2004  010010211002004
4      1       1  21100   2005  010010211002005


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1754,10010201001000,10010201001,54.620344,56.066176,0.0,0.0,4.615385,0.0,50.385274,0.0,...,0.0,4.727273,0.0,35.714286,0.0,0.0,0.0,0.0,0.0,0.0
1755,10010201001001,10010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1756,10010201001002,10010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1358,10010201001003,10010201001,67.15616,65.257353,0.0,0.0,4.615385,0.0,60.462329,0.0,...,0.0,2.363636,0.0,41.666667,0.0,0.0,0.0,0.0,0.0,0.0
1359,10010201001004,10010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


al took 15.217 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0      2      13    100   1363  020130001001363
1      2      13    100   1366  020130001001366
2      2      13    100   1369  020130001001369
3      2      13    100   1370  020130001001370
4      2      13    100   1371  020130001001371
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
217,20130001001000,20130001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218,20130001001001,20130001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
219,20130001001002,20130001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
220,20130001001003,20130001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
221,20130001001004,20130001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ak took 1.677 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0      4       1  970502   1478  040019705021478
1      4       1  970502   1479  040019705021479
2      4       1  970502   1480  040019705021480
3      4       1  970502   1481  040019705021481
4      4       1  970502   1482  040019705021482


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
4103,40019426001000,40019426001,24.251969,24.772118,25.177112,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4104,40019426001001,40019426001,29.76378,30.402145,30.899183,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4105,40019426001002,40019426001,9.92126,10.134048,10.299728,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4019,40019426001003,40019426001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4020,40019426001004,40019426001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


az took 13.98 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0      5       1  480300   1008  050014803001008
1      5       1  480300   2034  050014803002034
2      5       1  480300   2035  050014803002035
3      5       1  480300   2036  050014803002036
4      5       1  480300   2037  050014803002037
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
214,50014801001000,50014801001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
215,50014801001001,50014801001,107.938931,107.554417,0.0,0.0,0.0,0.0,114.599686,0.0,...,0.0,0.0,0.0,86.857143,0.0,0.0,0.0,0.0,0.0,0.0
216,50014801001002,50014801001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
217,50014801001003,50014801001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218,50014801001004,50014801001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ar took 10.713 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0      6       1  427700   3018  060014277003018
1      6       1  428301   4046  060014283014046
2      6       1  428400   1020  060014284001020
3      6       1  428500   1009  060014285001009
4      6       1  428600   2004  060014286002004


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
22030,60014001001000,60014001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22031,60014001001001,60014001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22032,60014001001002,60014001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22033,60014001001003,60014001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22034,60014001001004,60014001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ca took 46.072 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0      8       1   9751   1014  080010097511014
1      8       1   9751   1016  080010097511016
2      8       1   9751   1017  080010097511017
3      8       1   9751   1018  080010097511018
4      8       1   9751   1019  080010097511019
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
8665,80010078011000,80010078011,83.660517,62.760291,8.142857,0.0,53.482759,0.0,15.178571,0.0,...,0.0,52.025316,0.0,12.60274,0.0,0.0,0.0,3.0,0.0,11.191336
8564,80010078011001,80010078011,93.638376,82.51816,0.0,0.0,61.586207,0.0,26.785714,0.0,...,0.0,45.088608,0.0,20.479452,0.0,0.0,0.5,0.0,0.0,12.274368
8565,80010078011002,80010078011,189.579336,102.276029,5.428571,0.0,72.931034,0.0,34.821429,0.0,...,0.0,50.291139,0.0,27.568493,0.0,0.0,0.5,3.0,0.0,34.65704
8566,80010078011003,80010078011,29.933579,23.244552,0.0,0.0,0.0,0.0,17.857143,0.0,...,0.0,0.0,0.0,13.390411,0.0,0.0,0.0,0.0,0.0,5.054152
8567,80010078011004,80010078011,11.512915,4.64891,0.0,0.0,0.0,0.0,3.571429,0.0,...,0.0,0.0,0.0,3.150685,0.0,0.0,0.0,0.0,0.0,1.444043


co took 10.4 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0      9       1  200200   1000  090012002001000
1      9       1  200200   1001  090012002001001
2      9       1  200200   1002  090012002001002
3      9       1  200200   1003  090012002001003
4      9       1  200200   1004  090012002001004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
5053,90010101011000,90010101011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5054,90010101011001,90010101011,313.38843,305.344828,0.0,42.96875,0.0,0.0,273.709924,0.0,...,42.5,0.0,0.0,211.46,0.0,0.0,0.0,0.0,0.0,8.421053
5055,90010101011002,90010101011,2.176309,2.212644,0.0,0.0,0.0,0.0,2.198473,0.0,...,0.0,0.0,0.0,2.18,0.0,0.0,0.0,0.0,0.0,0.0
5056,90010101011003,90010101011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5057,90010101011004,90010101011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ct took 2.931 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     10       1  40203   1000  100010402031000
1     10       1  40203   1001  100010402031001
2     10       1  40203   1002  100010402031002
3     10       1  40203   1003  100010402031003
4     10       1  40203   1004  100010402031004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
4467,100010401001000,100010401001,100.301479,88.33595,0.0,3.75,8.130252,0.0,71.704495,0.0,...,2.083333,3.666667,0.0,51.121718,0.0,0.0,0.0,0.0,0.0,11.27451
4468,100010401001001,100010401001,382.969283,378.025903,0.0,0.0,4.516807,0.0,378.816199,0.0,...,0.0,3.666667,0.0,303.890215,0.0,0.0,0.0,0.0,0.0,2.254902
4469,100010401001002,100010401001,26.052332,25.981162,0.0,0.0,0.0,0.0,27.0583,0.0,...,0.0,0.0,0.0,17.040573,0.0,0.0,0.0,0.0,0.0,0.0
4470,100010401001003,100010401001,118.538111,118.214286,0.0,0.0,0.0,0.0,123.115265,0.0,...,0.0,0.0,0.0,90.883055,0.0,0.0,0.0,0.0,0.0,0.0
4471,100010401001004,100010401001,69.03868,64.952904,0.0,0.0,0.0,0.0,67.64575,0.0,...,0.0,0.0,0.0,59.642005,0.0,0.0,0.0,0.0,0.0,4.509804


de took 0.818 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     12       1   1908   1000  120010019081000
1     12       1   1908   1001  120010019081001
2     12       1   1908   1002  120010019081002
3     12       1   1908   1003  120010019081003
4     12       1   1908   1004  120010019081004


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
4054,120010002001000,120010002001,18.253521,15.47619,0.0,0.0,10.619469,0.0,2.631579,0.0,...,0.0,10.916031,0.0,2.580071,0.0,0.0,0.0,0.0,0.0,0.0
4055,120010002001001,120010002001,13.119718,11.123512,0.0,0.0,7.964602,0.0,2.631579,0.0,...,0.0,7.938931,0.0,2.580071,0.0,0.0,0.0,0.0,0.0,0.0
4056,120010002001002,120010002001,26.239437,21.279762,0.0,0.0,12.831858,0.0,7.894737,0.0,...,0.0,8.435115,0.0,5.160142,0.0,0.0,0.0,0.0,0.0,2.083333
4057,120010002001003,120010002001,29.661972,23.214286,0.0,0.0,14.60177,0.0,7.368421,0.0,...,0.0,11.412214,0.0,6.708185,0.0,0.0,0.0,0.0,0.0,8.333333
4058,120010002001004,120010002001,15.971831,13.541667,0.0,0.0,12.389381,0.0,0.0,0.0,...,0.0,12.40458,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


fl took 28.322 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     13       1  950200   1029  130019502001029
1     13       1  950200   1031  130019502001031
2     13       1  950200   1053  130019502001053
3     13       1  950200   1056  130019502001056
4     13       1  950200   1057  130019502001057


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1940,130019501001000,130019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1941,130019501001001,130019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1942,130019501001002,130019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
317,130019501001003,130019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
318,130019501001004,130019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ga took 15.076 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     15       1  21800   1000  150010218001000
1     15       1  21800   1001  150010218001001
2     15       1  21800   1002  150010218001002
3     15       1  21800   1004  150010218001004
4     15       1  21800   1005  150010218001005
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2553,150010201001000,150010201001,0.927187,0.907223,0.0,0.0,0.0,0.0,1.153846,0.0,...,0.0,0.0,0.0,1.141026,0.0,0.0,0.0,0.0,0.0,0.0
2554,150010201001001,150010201001,114.044002,103.423371,0.0,18.858603,0.0,4.466019,68.076923,0.117647,...,18.761726,0.0,3.759398,63.897436,0.2,0.540541,0.0,0.0,8.450704,7.322835
2555,150010201001002,150010201001,14.834992,14.515561,0.0,0.698467,0.0,2.23301,10.384615,0.0,...,0.750469,0.0,1.879699,10.269231,0.0,0.540541,0.0,0.0,1.408451,0.0
2556,150010201001003,150010201001,10.199057,7.25778,0.0,2.793867,0.0,0.0,4.615385,0.0,...,3.001876,0.0,0.0,4.564103,0.0,0.0,0.0,0.0,0.0,3.661417
2565,150010201001004,150010201001,0.927187,0.907223,0.0,0.0,0.0,0.0,1.153846,0.0,...,0.0,0.0,0.0,1.141026,0.0,0.0,0.0,0.0,0.0,0.0


hi took 1.185 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     16       1  10201   1032  160010102011032
1     16       1  10201   1033  160010102011033
2     16       1  10201   1059  160010102011059
3     16       1  10201   1060  160010102011060
4     16       1  10201   1061  160010102011061
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
3332,160010001001000,160010001001,28.888889,23.755102,0.0,0.0,0.0,0.0,19.069507,0.0,...,0.0,0.0,0.0,18.40311,0.0,0.0,0.0,0.0,0.0,3.333333
3333,160010001001001,160010001001,14.444444,12.867347,0.0,0.0,0.0,0.0,13.772422,0.0,...,0.0,0.0,0.0,10.825359,0.0,0.0,0.0,0.0,0.0,0.0
3334,160010001001002,160010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3335,160010001001003,160010001001,43.333333,37.612245,0.0,0.0,5.0,0.0,38.139013,0.0,...,0.0,0.0,0.0,34.641148,0.0,0.0,0.0,0.0,0.0,1.666667
3336,160010001001004,160010001001,42.222222,37.612245,0.0,0.0,0.0,0.0,31.782511,0.0,...,0.0,0.0,0.0,28.145933,0.0,0.0,0.0,0.0,0.0,0.0


id took 6.198 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     17       1  10200   4009  170010102004009
1     17       1  10200   4010  170010102004010
2     17       1  10200   4011  170010102004011
3     17       1  10200   4012  170010102004012
4     17       1  10200   4013  170010102004013


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2085,170010001001000,170010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2086,170010001001001,170010001001,163.54024,157.837838,0.0,0.0,0.0,0.0,151.882998,0.0,...,0.0,0.0,0.0,130.985401,0.0,0.0,0.0,0.0,0.0,5.0
2087,170010001001002,170010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2088,170010001001003,170010001001,5.41524,5.405405,0.0,0.0,0.0,0.0,5.667276,0.0,...,0.0,0.0,0.0,3.540146,0.0,0.0,0.0,0.0,0.0,0.0
2089,170010001001004,170010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


il took 24.623 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     18       1  30600   1001  180010306001001
1     18       1  30600   1003  180010306001003
2     18       1  30600   1008  180010306001008
3     18       1  30600   1010  180010306001010
4     18       1  30600   1011  180010306001011


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1185,180010301001000,180010301001,19.26699,14.757331,0.0,0.0,0.0,0.0,14.556575,0.0,...,0.0,0.0,0.0,12.332016,0.0,0.0,0.0,0.0,0.0,2.272727
1186,180010301001001,180010301001,22.121359,19.676441,0.0,0.0,0.0,0.0,19.408767,0.0,...,0.0,0.0,0.0,16.185771,0.0,0.0,0.0,0.0,0.0,3.409091
1187,180010301001002,180010301001,7.135922,7.0273,0.0,0.0,0.0,0.0,6.931702,0.0,...,0.0,0.0,0.0,6.936759,0.0,0.0,0.0,0.0,0.0,0.0
1188,180010301001003,180010301001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1189,180010301001004,180010301001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


in took 12.813 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     19       1  960100   3002  190019601003002
1     19       1  960100   3003  190019601003003
2     19       1  960100   3004  190019601003004
3     19       1  960100   3005  190019601003005
4     19       1  960100   3006  190019601003006
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
355,190019601001000,190019601001,4.885257,4.922096,0.0,0.0,0.0,0.0,4.903364,0.0,...,0.0,0.0,0.0,3.682771,0.0,0.0,0.0,0.0,0.0,0.0
356,190019601001001,190019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
357,190019601001002,190019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
358,190019601001003,190019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
359,190019601001004,190019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ia took 10.638 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     20       1  952700   1005  200019527001005
1     20       1  952700   1006  200019527001006
2     20       1  952700   1007  200019527001007
3     20       1  952700   1008  200019527001008
4     20       1  952700   1009  200019527001009
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2147,200019526001000,200019526001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2148,200019526001001,200019526001,4.246643,4.188164,0.0,0.0,0.0,0.0,4.335443,0.0,...,0.0,0.0,0.0,2.120596,0.0,0.0,0.0,0.0,0.0,0.0
2149,200019526001002,200019526001,5.308304,5.235205,0.0,0.0,0.0,0.0,5.419304,0.0,...,0.0,0.0,0.0,4.241192,0.0,0.0,0.0,0.0,0.0,0.0
2150,200019526001003,200019526001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2151,200019526001004,200019526001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ks took 13.701 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     21       1  970200   1000  210019702001000
1     21       1  970200   1001  210019702001001
2     21       1  970200   1002  210019702001002
3     21       1  970200   1003  210019702001003
4     21       1  970200   1004  210019702001004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
104,210019701001000,210019701001,43.764988,43.966584,0.0,0.0,0.0,0.0,42.622028,0.0,...,0.0,0.0,0.0,36.369637,0.0,0.0,0.934505,0.0,0.0,0.266667
105,210019701001001,210019701001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
106,210019701001002,210019701001,42.889688,43.966584,0.0,0.0,0.0,0.0,41.75219,0.0,...,0.0,0.0,0.0,30.627063,0.0,0.0,0.790735,0.0,0.0,0.0
107,210019701001003,210019701001,36.76259,37.685644,0.0,0.0,0.0,0.0,36.533166,0.0,...,0.0,0.0,0.0,28.712871,0.0,0.0,0.71885,0.0,0.0,0.0
108,210019701001004,210019701001,10.503597,10.767327,0.0,0.0,0.0,0.0,10.438048,0.0,...,0.0,0.0,0.0,3.828383,0.0,0.0,0.095847,0.0,0.0,0.0


ky took 7.654 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     22       1  960500   3063  220019605003063
1     22       1  960600   1003  220019606001003
2     22       1  960500   3007  220019605003007
3     22       1  960500   3008  220019605003008
4     22       1  960500   3009  220019605003009
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
774,220019601001000,220019601001,40.189125,40.153349,0.0,0.0,13.473684,0.0,24.026403,0.0,...,0.0,18.429319,0.0,18.372414,0.0,0.0,0.0,0.0,0.0,0.0
775,220019601001001,220019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
776,220019601001002,220019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
777,220019601001003,220019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
778,220019601001004,220019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


la took 11.954 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     23       1  10100   2012  230010101002012
1     23       1  10100   2027  230010101002027
2     23       1  10100   2028  230010101002028
3     23       1  10200   2000  230010102002000
4     23       1  10200   2001  230010102002001
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
208,230010101001000,230010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209,230010101001001,230010101001,0.88661,0.867925,0.0,0.0,0.0,0.0,0.0,6.153846,...,0.0,0.0,0.0,0.0,3.846154,0.0,0.0,0.0,0.0,0.0
210,230010101001002,230010101001,95.75392,93.735849,20.0,0.0,0.0,0.0,79.455446,0.0,...,0.0,0.0,0.0,80.335463,0.0,0.0,0.0,0.0,0.0,0.0
211,230010101001003,230010101001,26.598311,24.301887,0.0,0.0,0.0,0.0,20.049505,0.0,...,0.0,0.0,0.0,18.769968,0.0,0.0,0.0,0.0,0.0,4.0
212,230010101001004,230010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


me took 3.136 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     24       1    100   1110  240010001001110
1     24       1    100   1111  240010001001111
2     24       1    100   1112  240010001001112
3     24       1    100   1113  240010001001113
4     24       1    100   1115  240010001001115
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
18,240010001001000,240010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,240010001001001,240010001001,26.307506,25.593939,0.0,0.0,0.0,0.0,24.644172,0.0,...,0.0,0.0,0.0,19.704579,0.0,0.0,0.0,0.0,0.0,0.0
20,240010001001002,240010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21,240010001001003,240010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,240010001001004,240010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


md took 7.54 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     25       1  12200   1000  250010122001000
1     25       1  12200   1001  250010122001001
2     25       1  12200   1002  250010122001002
3     25       1  12200   1003  250010122001003
4     25       1  12200   1004  250010122001004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
12745,250010101001000,250010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12746,250010101001001,250010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8981,250010101001002,250010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8982,250010101001003,250010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8983,250010101001004,250010101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ma took 9.754 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     26       1    100   1092  260010001001092
1     26       1    100   1093  260010001001093
2     26       1    100   1094  260010001001094
3     26       1    100   1095  260010001001095
4     26       1    100   1096  260010001001096


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
32,260010001001000,260010001001,16.403509,16.482372,0.0,0.0,0.0,0.0,16.615509,0.0,...,0.0,0.0,0.0,15.888502,0.0,0.0,0.0,0.0,0.0,0.0
33,260010001001001,260010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34,260010001001002,260010001001,7.719298,7.75641,0.0,0.0,0.0,0.0,7.819063,0.0,...,0.0,0.0,0.0,7.944251,0.0,0.0,0.0,0.0,0.0,0.0
35,260010001001003,260010001001,0.964912,0.969551,0.0,0.0,0.0,0.0,0.977383,0.0,...,0.0,0.0,0.0,0.993031,0.0,0.0,0.0,0.0,0.0,0.0
36,260010001001004,260010001001,4.824561,4.847756,0.0,0.0,0.0,0.0,4.886914,0.0,...,0.0,0.0,0.0,4.965157,0.0,0.0,0.0,0.0,0.0,0.0


mi took 22.332 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     27       1  770100   3151  270017701003151
1     27       1  770300   1002  270017703001002
2     27       1  770300   1003  270017703001003
3     27       1  770300   1004  270017703001004
4     27       1  770300   1007  270017703001007


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2291,270017701001000,270017701001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2292,270017701001001,270017701001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2293,270017701001002,270017701001,0.871849,0.86225,0.0,0.0,0.0,0.0,0.833333,0.0,...,0.004779,0.0,0.0,0.869033,0.0,0.0,0.0,0.0,0.0,0.0
2294,270017701001003,270017701001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2295,270017701001004,270017701001,1.743697,1.724501,0.0,0.0,0.0,0.0,1.666667,0.0,...,0.009558,0.0,0.0,1.738066,0.0,0.0,0.0,0.0,0.0,0.0


mn took 14.981 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     28       1    600   2028  280010006002028
1     28       1    600   3016  280010006003016
2     28       1    600   3022  280010006003022
3     28       1    600   3023  280010006003023
4     28       1    600   3024  280010006003024
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1667,280010001001000,280010001001,3.471392,7.177419,0.0,0.0,5.588235,0.0,2.622623,0.0,...,0.0,2.443064,0.0,2.982005,0.0,0.0,0.0,0.0,0.0,0.0
1668,280010001001001,280010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1669,280010001001002,280010001001,1.157131,2.392473,0.0,0.0,0.0,0.0,1.966967,0.0,...,0.0,0.0,0.0,2.236504,0.0,0.0,0.0,0.0,0.0,0.0
1670,280010001001003,280010001001,1.542841,3.189964,0.0,0.0,2.235294,0.0,1.311311,0.0,...,0.0,2.443064,0.0,1.491003,0.0,0.0,0.0,0.0,0.0,0.0
1671,280010001001004,280010001001,6.942784,14.354839,0.0,0.0,0.0,0.0,11.801802,0.0,...,0.0,0.0,0.0,10.437018,0.0,0.0,0.0,0.0,0.0,0.0


ms took 8.249 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     29       1  950100   1000  290019501001000
1     29       1  950100   1001  290019501001001
2     29       1  950100   1002  290019501001002
3     29       1  950100   1003  290019501001003
4     29       1  950100   1004  290019501001004


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
0,290019501001000,290019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,290019501001001,290019501001,1.846473,1.84083,0.033195,0.0,0.0,0.0,1.801113,0.027663,...,0.0,0.0,0.0,1.784314,0.039024,0.0,0.0,0.0,0.0,0.0
2,290019501001002,290019501001,12.925311,12.885813,0.232365,0.0,0.0,0.0,12.607789,0.193638,...,0.0,0.0,0.0,9.813725,0.214634,0.0,0.0,0.0,0.0,0.0
3,290019501001003,290019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,290019501001004,290019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


mo took 18.912 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     30       1    200   1719  300010002001719
1     30       1    200   1720  300010002001720
2     30       1    200   1721  300010002001721
3     30       1    200   1722  300010002001722
4     30       1    200   1723  300010002001723
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
3450,300010001001000,300010001001,6.292683,6.479592,0.0,0.0,0.039024,0.0,6.619965,0.0,...,0.0,0.031621,0.0,4.579832,0.0,0.0,0.0,0.0,0.0,0.0
3451,300010001001001,300010001001,7.341463,7.559524,0.0,0.0,0.045528,0.0,7.723292,0.0,...,0.0,0.055336,0.0,8.014706,0.0,0.0,0.0,0.0,0.0,0.0
3452,300010001001002,300010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3453,300010001001003,300010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3454,300010001001004,300010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


mt took 5.763 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     31       1  965400   2112  310019654002112
1     31       1  965400   2122  310019654002122
2     31       1  965400   2124  310019654002124
3     31       1  965400   2125  310019654002125
4     31       1  965400   2126  310019654002126
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1020,310019654001000,310019654001,1.883373,1.874003,0.0,0.0,0.0,0.0,1.88755,0.0,...,0.0,0.0,0.0,1.989071,0.0,0.0,0.0,0.0,0.0,0.0
1021,310019654001001,310019654001,7.533491,7.496013,0.0,0.0,0.0,0.0,7.550201,0.0,...,0.0,0.0,0.0,5.967213,0.0,0.0,0.0,0.0,0.0,0.0
1022,310019654001002,310019654001,1.883373,1.874003,0.0,0.0,0.0,0.0,1.88755,0.0,...,0.0,0.0,0.0,1.989071,0.0,0.0,0.0,0.0,0.0,0.0
1023,310019654001003,310019654001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1024,310019654001004,310019654001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ne took 9.417 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     32       1  950302   3005  320019503023005
1     32       1  950302   3006  320019503023006
2     32       1  950302   3007  320019503023007
3     32       1  950302   3008  320019503023008
4     32       1  950302   3009  320019503023009
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1629,320019501001000,320019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1630,320019501001001,320019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1631,320019501001002,320019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632,320019501001003,320019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1633,320019501001004,320019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


nv took 4.225 seconds 
Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     33       1  966500   1000  330019665001000
1     33       1  966500   1001  330019665001001
2     33       1  966500   1002  330019665001002
3     33       1  966500   1003  330019665001003
4     33       1  966500   1004  330019665001004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1022,330019651001000,330019651001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1023,330019651001001,330019651001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1024,330019651001002,330019651001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1025,330019651001003,330019651001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1026,330019651001004,330019651001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


nh took 1.851 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     34       1  10200   1026  340010102001026
1     34       1  10200   1027  340010102001027
2     34       1  10200   1028  340010102001028
3     34       1  10200   1029  340010102001029
4     34       1  10200   1030  340010102001030
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
957,340010001001000,340010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
958,340010001001001,340010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
998,340010001001002,340010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
959,340010001001003,340010001001,20.0,18.840948,0.0,18.511111,0.0,0.0,0.0,0.0,...,9.440559,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.641414
960,340010001001004,340010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


nj took 11.032 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     35       1   4720   1010  350010047201010
1     35       1   4720   1011  350010047201011
2     35       1   4746   1020  350010047461020
3     35       1   4746   1024  350010047461024
4     35       1   4746   1025  350010047461025
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
12511,350010001071000,350010001071,10.365573,9.100892,0.0,0.0,0.731707,0.0,9.109105,0.0,...,0.0,0.882353,0.0,8.477666,0.0,0.0,0.0,0.0,0.0,1.113861
12512,350010001071001,350010001071,271.099597,228.280714,0.0,5.945946,2.195122,0.0,229.38383,0.0,...,6.206897,1.764706,0.0,211.093892,0.0,0.0,0.0,0.0,0.0,33.415842
12513,350010001071002,350010001071,54.219919,47.779684,0.0,1.621622,0.0,0.0,47.201727,0.0,...,0.689655,0.0,0.0,37.301732,0.0,0.0,0.0,0.0,0.0,5.569307
12514,350010001071003,350010001071,44.651698,34.886754,0.0,2.162162,0.731707,0.0,33.124019,0.0,...,2.758621,0.882353,0.0,27.128532,0.0,0.0,0.0,0.0,0.0,6.683168
12517,350010001071004,350010001071,66.180196,61.431023,0.0,0.0,0.0,0.0,64.591837,0.0,...,0.0,0.0,0.0,55.104831,0.0,0.0,0.0,0.0,0.0,2.227723


nm took 9.899 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     36       1   2200   2000  360010022002000
1     36       1   2300   2000  360010023002000
2     36       1   2300   2009  360010023002009
3     36       1   2300   2010  360010023002010
4     36       1   2300   2012  360010023002012


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
432,360010001001000,360010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
433,360010001001001,360010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
434,360010001001002,360010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
435,360010001001003,360010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
436,360010001001004,360010001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ny took 21.786 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     37       1  21801   2057  370010218012057
1     37       1  21801   2058  370010218012058
2     37       1  21801   2059  370010218012059
3     37       1  21801   2060  370010218012060
4     37       1  21801   2061  370010218012061


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
4224,370010201001000,370010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4225,370010201001001,370010201001,33.694853,21.371681,0.0,0.0,3.642534,0.0,16.721311,0.0,...,0.0,3.965517,0.0,11.475309,0.0,0.0,0.0,0.0,0.0,1.333333
4226,370010201001002,370010201001,31.966912,33.451327,0.0,0.0,9.366516,0.0,16.721311,4.166667,...,0.0,7.931034,0.0,10.432099,5.0,0.0,0.0,0.0,0.0,0.133333
4227,370010201001003,370010201001,35.422794,33.451327,0.0,0.0,5.723982,0.0,26.127049,0.0,...,0.0,2.37931,0.0,17.734568,0.0,0.0,0.0,0.0,0.0,0.4
4228,370010201001004,370010201001,51.838235,45.530973,0.0,0.0,1.561086,0.0,38.668033,0.0,...,0.0,1.586207,0.0,33.382716,0.0,0.0,0.0,0.0,0.0,0.666667


nc took 17.911 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     38       1  965600   2000  380019656002000
1     38       1  965600   2001  380019656002001
2     38       1  965600   2002  380019656002002
3     38       1  965600   2003  380019656002003
4     38       1  965600   2004  380019656002004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
403,380019656001000,380019656001,5.123762,5.0625,0.0,0.0,0.0,0.0,4.987113,0.0,...,0.0,0.0,0.0,3.379845,0.0,0.0,0.0,0.0,0.0,0.0
404,380019656001001,380019656001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
405,380019656001002,380019656001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
321,380019656001003,380019656001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
322,380019656001004,380019656001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


nd took 6.636 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     39       1  770100   1010  390017701001010
1     39       1  770100   1011  390017701001011
2     39       1  770100   1012  390017701001012
3     39       1  770100   1013  390017701001013
4     39       1  770100   1019  390017701001019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
135,390017701001000,390017701001,15.99393,16.128539,0.0,0.0,0.0,0.0,10.748621,0.0,...,0.0,0.0,0.0,8.748707,0.0,0.0,0.0,0.0,0.0,0.0
136,390017701001001,390017701001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
137,390017701001002,390017701001,16.93475,17.077276,0.0,0.0,0.0,0.0,17.588652,0.0,...,0.0,0.0,0.0,14.581179,0.0,0.0,0.0,0.0,0.0,0.0
138,390017701001003,390017701001,1.881639,1.897475,0.0,0.0,0.0,0.0,1.954295,0.0,...,0.0,0.0,0.0,1.944157,0.0,0.0,0.0,0.0,0.0,0.0
139,390017701001004,390017701001,12.230653,12.333588,0.0,0.0,0.0,0.0,12.702916,0.0,...,0.0,0.0,0.0,7.776629,0.0,0.0,0.0,0.0,0.0,0.0


oh took 22.375 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     40       1  377000   1003  400013770001003
1     40       1  377000   1004  400013770001004
2     40       1  377000   1005  400013770001005
3     40       1  377000   1006  400013770001006
4     40       1  377000   1055  400013770001055


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1797,400013766001000,400013766001,50.201281,49.413408,13.37386,0.0,0.0,0.0,30.467791,4.137931,...,0.0,0.0,0.0,27.240326,3.62069,0.0,0.0,0.0,0.0,0.363636
1798,400013766001001,400013766001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1799,400013766001002,400013766001,19.569991,19.594972,1.337386,0.0,0.0,0.0,17.860429,1.83908,...,0.0,0.0,0.0,16.344196,0.603448,0.0,0.0,0.0,0.0,0.0
1800,400013766001003,400013766001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1801,400013766001004,400013766001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ok took 14.649 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     41       1  950200   1006  410019502001006
1     41       1  950200   1007  410019502001007
2     41       1  950200   1008  410019502001008
3     41       1  950200   1009  410019502001009
4     41       1  950200   1010  410019502001010
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2992,410019501001000,410019501001,1.828839,1.875761,0.0,0.0,0.03517,0.0,1.829733,0.0,...,0.0,0.039947,0.0,1.798867,0.0,0.0,0.0,0.0,0.0,0.0
2993,410019501001001,410019501001,0.91442,0.937881,0.0,0.0,0.017585,0.0,0.914867,0.0,...,0.0,0.019973,0.0,0.899433,0.0,0.0,0.0,0.0,0.0,0.0
2994,410019501001002,410019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2995,410019501001003,410019501001,42.063306,43.142509,0.0,0.0,0.80891,0.0,42.083863,0.0,...,0.0,0.699068,0.0,31.48017,0.0,0.0,0.0,0.0,0.0,0.0
2996,410019501001004,410019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


or took 12.158 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     42       1  30800   3003  420010308003003
1     42       1  30800   3004  420010308003004
2     42       1  30800   3005  420010308003005
3     42       1  30800   3006  420010308003006
4     42       1  30800   3019  420010308003019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2404,420010301011000,420010301011,5.837209,5.858626,0.0,0.0,0.0,0.0,5.905032,0.0,...,0.0,0.0,0.0,5.192918,0.0,0.0,0.0,0.0,0.0,0.0
2405,420010301011001,420010301011,29.186047,29.293131,0.0,0.0,0.0,0.0,27.556818,0.0,...,0.0,0.0,0.0,21.810254,0.0,0.0,0.0,0.0,0.0,0.0
2406,420010301011002,420010301011,14.593023,14.646565,0.0,0.0,2.222222,0.0,12.794237,0.0,...,0.0,0.8,0.0,12.463002,0.0,0.0,0.0,0.0,0.0,0.0
2407,420010301011003,420010301011,74.910853,74.209265,0.0,0.0,0.0,0.0,74.797078,0.0,...,0.0,0.0,0.0,54.006342,0.0,0.0,0.0,0.0,0.0,1.122449
2408,420010301011004,420010301011,26.267442,26.363818,0.0,0.0,0.0,0.0,26.572646,0.0,...,0.0,0.0,0.0,20.77167,0.0,0.0,0.0,0.0,0.0,0.0


pa took 24.626 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     44       1  30100   1000  440010301001000
1     44       1  30100   1001  440010301001001
2     44       1  30100   1002  440010301001002
3     44       1  30100   1003  440010301001003
4     44       1  30100   1004  440010301001004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
0,440010301001000,440010301001,22.58378,18.973829,0.0,0.0,0.0,0.0,18.632987,0.0,...,0.0,0.0,0.0,8.378651,0.0,0.0,0.0,0.0,0.0,3.636364
1,440010301001001,440010301001,16.692359,16.976584,0.0,0.0,0.0,0.0,16.67162,0.0,...,0.0,0.0,0.0,14.662638,0.0,0.0,0.0,0.0,0.0,0.0
2,440010301001002,440010301001,7.855228,7.988981,0.0,0.0,0.0,0.0,5.884101,0.0,...,0.0,0.0,0.0,3.141994,0.0,0.0,0.0,0.0,0.571429,0.0
3,440010301001003,440010301001,27.493298,27.961433,0.0,0.0,0.0,0.0,27.459138,0.0,...,0.0,0.0,0.0,24.08862,0.0,0.0,0.0,0.0,0.0,0.0
4,440010301001004,440010301001,17.674263,17.975207,0.0,0.0,0.0,0.0,13.729569,0.0,...,0.0,0.0,0.0,8.378651,0.0,0.0,0.0,0.0,0.571429,0.0


ri took 0.965 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     45       1  950400   2032  450019504002032
1     45       1  950400   2033  450019504002033
2     45       1  950400   2034  450019504002034
3     45       1  950400   2037  450019504002037
4     45       1  950400   2038  450019504002038
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
530,450019501001000,450019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
531,450019501001001,450019501001,12.059238,11.819788,0.0,0.0,0.0,0.0,11.196682,0.0,...,0.0,0.0,0.0,7.185261,0.0,0.0,0.0,0.0,0.0,0.0
532,450019501001002,450019501001,1.607898,1.575972,0.0,0.0,0.0,0.0,1.492891,0.0,...,0.0,0.0,0.0,1.596725,0.0,0.0,0.0,0.0,0.0,0.0
533,450019501001003,450019501001,1.607898,1.575972,0.0,0.0,0.0,0.0,1.492891,0.0,...,0.0,0.0,0.0,1.596725,0.0,0.0,0.0,0.0,0.0,0.0
534,450019501001004,450019501001,16.078984,15.759717,0.0,0.0,0.0,0.0,14.92891,0.0,...,0.0,0.0,0.0,11.177073,0.0,0.0,0.0,0.0,0.0,0.0


sc took 9.414 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     46       3  973600   3105  460039736003105
1     46       3  973600   3108  460039736003108
2     46       3  973600   3109  460039736003109
3     46       3  973600   3110  460039736003110
4     46       3  973600   3111  460039736003111
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
585,460039736001000,460039736001,0.858051,0.881057,0.0,0.0,0.0,0.0,0.884354,0.0,...,0.0,0.0,0.0,0.856269,0.0,0.0,0.0,0.0,0.0,0.0
586,460039736001001,460039736001,4.290254,4.405286,0.0,0.0,0.0,0.0,4.421769,0.0,...,0.0,0.0,0.0,2.568807,0.0,0.0,0.0,0.0,0.0,0.0
587,460039736001002,460039736001,3.432203,3.524229,0.0,0.0,0.0,0.0,3.537415,0.0,...,0.0,0.0,0.0,2.568807,0.0,0.0,0.0,0.0,0.0,0.0
588,460039736001003,460039736001,1.716102,1.762115,0.0,0.0,0.0,0.0,1.768707,0.0,...,0.0,0.0,0.0,1.712538,0.0,0.0,0.0,0.0,0.0,0.0
589,460039736001004,460039736001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


sd took 4.061 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     47       1  20901   2092  470010209012092
1     47       1  20901   2093  470010209012093
2     47       1  20901   2094  470010209012094
3     47       1  20901   2095  470010209012095
4     47       1  20901   2112  470010209012112
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2903,470010201001000,470010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2904,470010201001001,470010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2905,470010201001002,470010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2906,470010201001003,470010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2907,470010201001004,470010201001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


tn took 13.374 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     48       1  950700   1000  480019507001000
1     48       1  950700   1013  480019507001013
2     48       1  950700   1014  480019507001014
3     48       1  950700   3114  480019507003114
4     48       1  950800   1000  480019508001000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
2179,480019501001000,480019501001,41.701807,32.9653,0.0,0.0,6.030151,0.0,27.50969,0.0,...,0.0,3.320611,0.0,23.608748,0.0,0.0,0.0,0.0,0.0,1.612903
2087,480019501001001,480019501001,113.076054,112.906151,0.0,0.0,0.0,0.0,105.993217,0.0,...,0.0,0.0,0.0,89.550425,0.0,0.0,0.0,0.0,0.0,0.645161
2088,480019501001002,480019501001,2.405873,2.472397,0.0,0.0,0.0,0.0,2.427326,0.0,...,0.0,0.0,0.0,0.814095,0.0,0.0,0.0,0.0,0.0,0.0
2089,480019501001003,480019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2180,480019501001004,480019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


tx took 62.645 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     49       1  100100   1730  490011001001730
1     49       1  100100   1737  490011001001737
2     49       1  100100   1738  490011001001738
3     49       1  100100   2036  490011001002036
4     49       1  100100   2037  490011001002037
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
257,490011001001000,490011001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
258,490011001001001,490011001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
259,490011001001002,490011001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
260,490011001001003,490011001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
261,490011001001004,490011001001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


ut took 5.116 seconds 
Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     50       1  960400   3000  500019604003000
1     50       1  960400   3001  500019604003001
2     50       1  960400   3002  500019604003002
3     50       1  960400   3003  500019604003003
4     50       1  960400   3004  500019604003004
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1827,500019601001000,500019601001,130.987292,130.122911,1.047898,0.0,0.523949,0.0,127.386935,0.727273,...,0.0,0.0,0.0,94.529032,0.5,0.0,0.0,0.0,0.0,0.0
1828,500019601001001,500019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1829,500019601001002,500019601001,99.706745,97.836775,0.797654,0.0,0.398827,0.0,97.98995,0.0,...,0.0,0.0,0.0,74.477419,0.0,0.0,0.0,0.0,0.0,0.0
1830,500019601001003,500019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1831,500019601001004,500019601001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


vt took 1.124 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY  TRACT  BLOCK          GEOID20
0     51       1  90100   1000  510010901001000
1     51       1  90100   1001  510010901001001
2     51       1  90100   1002  510010901001002
3     51       1  90100   1004  510010901001004
4     51       1  90100   1005  510010901001005


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
0,510010901001000,510010901001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,510010901001001,510010901001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,510010901001002,510010901001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
392,510010901001003,510010901001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,510010901001004,510010901001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


va took 12.612 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     53       1  950100   1113  530019501001113
1     53       1  950100   3001  530019501003001
2     53       1  950100   3010  530019501003010
3     53       1  950100   3011  530019501003011
4     53       1  950100   3012  530019501003012
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
336,530019501001000,530019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
337,530019501001001,530019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
338,530019501001002,530019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
339,530019501001003,530019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
340,530019501001004,530019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


wa took 8.911 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     54       1  965800   2036  540019658002036
1     54       1  965800   2037  540019658002037
2     54       1  965800   2038  540019658002038
3     54       1  965800   2039  540019658002039
4     54       1  965800   2041  540019658002041
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
1535,540019655001000,540019655001,11.332471,11.376623,0.0,0.0,0.0,0.0,11.648936,0.0,...,0.0,0.0,0.0,12.645985,0.0,0.0,0.0,0.0,0.0,0.0
1536,540019655001001,540019655001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1537,540019655001002,540019655001,1.888745,1.896104,0.0,0.0,0.0,0.0,1.941489,0.0,...,0.0,0.0,0.0,2.29927,0.0,0.0,0.0,0.0,0.0,0.0
1602,540019655001003,540019655001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1591,540019655001004,540019655001,90.659767,91.012987,0.0,0.0,0.0,0.0,90.279255,0.0,...,0.0,0.0,0.0,83.923358,0.0,0.0,0.0,0.0,0.0,0.0


wv took 5.702 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     55       1  950100   1000  550019501001000
1     55       1  950100   1001  550019501001001
2     55       1  950100   1002  550019501001002
3     55       1  950100   1003  550019501001003
4     55       1  950100   1004  550019501001004


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Exported DF


Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
0,550019501001000,550019501001,5.203406,5.288462,0.0,0.0,0.0,0.0,5.317073,0.0,...,0.0,0.0,0.0,5.3233,0.0,0.0,0.0,0.0,0.0,0.0
1,550019501001001,550019501001,7.284768,7.403846,0.0,0.0,0.0,0.0,7.443902,0.0,...,0.0,0.0,0.0,6.38796,0.0,0.0,0.0,0.0,0.0,0.0
2,550019501001002,550019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,550019501001003,550019501001,11.447493,11.634615,0.0,0.0,0.0,0.0,11.697561,0.0,...,0.0,0.0,0.0,8.51728,0.0,0.0,0.0,0.0,0.0,0.0
4,550019501001004,550019501001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


wi took 12.409 seconds 


  df = pd.read_csv(file_path)


Sample GEOID20 values:
   STATE  COUNTY   TRACT  BLOCK          GEOID20
0     56       1  963900   2202  560019639002202
1     56       1  963900   2203  560019639002203
2     56       1  963900   2204  560019639002204
3     56       1  963900   2208  560019639002208
4     56       1  963900   2209  560019639002209
Exported DF


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['GEOID20'] = merged_data_final_export['GEOID20_block'].fillna(merged_data_final_export['GEOID20_block'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_final_export['BLKGRP15'] = merged_data_final_export['GEOID20_block'].astype(str).str.zfill(15).str[0:12]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_data_fin

Unnamed: 0,GEOID20,BLKGRP15,C_TOT15,C_NHS15,C_AIA15,C_ASN15,C_BLK15,C_NHP15,C_WHT15,C_AIW15,...,CVAP_ASN15,CVAP_BLK15,CVAP_NHP15,CVAP_WHT15,CVAP_AIW15,CVAP_ASW15,CVAP_BLW15,CVAP_AIB15,CVAP_2OM15,CVAP_HSP15
979,560019627001000,560019627001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
980,560019627001001,560019627001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
981,560019627001002,560019627001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
982,560019627001003,560019627001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
983,560019627001004,560019627001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


wy took 3.248 seconds 


## Step 4: Process and use the 2010_to_2020 crosswalk

#### Process the crosswalk file so that each row is in ['block_fips_2020', 'block_fips_2010', 'weight'] format

In [None]:
import os
import pandas as pd

def preprocess_crosswalk_grouped(file_path):

    normalized_data = []

    with open(file_path, 'r') as f:
        for line in f:
            # Split the line into fields
            fields = line.strip().split(',')

            # The first field is the 2020 block
            block_2020 = fields[0]

            # Remaining fields come in pairs of [2010_block, weight]
            for i in range(1, len(fields), 2):
                try:
                    block_2010 = fields[i]
                    weight = float(fields[i + 1])
                    normalized_data.append((block_2020, block_2010, weight))
                except IndexError:
                    # Skip incomplete rows
                    print(f"Skipped incomplete row: {fields}")
                    continue

    df = pd.DataFrame(normalized_data, columns=['block_fips_2020', 'block_fips_2010', 'weight'])
    return df

def process_all_crosswalks_grouped(directory, output_directory):

    os.makedirs(output_directory, exist_ok=True)

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            print(f"Processing file: {file_path}")

            # Preprocess the file
            try:
                crosswalk_df = preprocess_crosswalk_grouped(file_path)

                # Save the normalized data
                output_file = os.path.join(output_directory, filename)
                crosswalk_df.to_csv(output_file, index=False)
                print(f"Processed file saved to: {output_file}")
            except Exception as e:
                print(f"Error processing file {filename}: {e}")

input_directory = "./block10block20_crosswalks"
output_directory = "./processed_crosswalks"

process_all_crosswalks_grouped(input_directory, output_directory)


Processing file: ./block10block20_crosswalks/block1020_crosswalk_08.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_08.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_34.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_34.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_20.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_20.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_21.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_21.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_35.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_35.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_09.csv
Processed file saved to: ./processed_crosswalks/block1020_crosswalk_09.csv
Processing file: ./block10block20_crosswalks/block1020_crosswalk_23.csv
Processed file saved to: ./processed_crosswalk

#### Was planning to use: for each GEOID20, find its corresponding GEOID10 row in the CVAP data, apply the weights and sum them. But some states have more than 100,000 GEOID20, looking up and storing takes too much time.

In [None]:
def translate_2010_to_2020(cvap_data, crosswalk):

    crosswalk['block_fips_2010'] = crosswalk['block_fips_2010'].apply(lambda x: str(x).zfill(15))
    crosswalk['block_fips_2020'] = crosswalk['block_fips_2020'].apply(lambda x: str(x).zfill(15))
    cvap_data['GEOID10'] = cvap_data['GEOID20'].apply(lambda x: str(x).zfill(15))

    data_columns = [
        col for col in cvap_data.columns
        if col not in ['GEOID20', 'GEOID10', 'BLKGRP15']
    ]

    # Initialize a results dictionary
    results = {col: [] for col in ['GEOID20'] + data_columns}

    # Create a dictionary for quick lookups of CVAP data by GEOID10
    cvap_dict = cvap_data.set_index('GEOID10').to_dict(orient='index')

    # Iterate through unique GEOID20 values in the crosswalk
    for geoid_2020 in crosswalk['block_fips_2020'].unique():
        relevant_rows = crosswalk[crosswalk['block_fips_2020'] == geoid_2020]

        aggregated_row = {col: 0 for col in data_columns}
        aggregated_row['GEOID20'] = geoid_2020

        for _, row in relevant_rows.iterrows():
            block_2010 = row['block_fips_2010']
            weight = row['weight']

            if block_2010 in cvap_dict:
                cvap_row = cvap_dict[block_2010]
            else:
                cvap_row = {col: 0 for col in data_columns}

            for col in data_columns:
                aggregated_row[col] += cvap_row[col] * weight

        for col in ['GEOID20'] + data_columns:
            results[col].append(aggregated_row[col])

    translated_data = pd.DataFrame(results)

    translated_data[data_columns] = translated_data[data_columns].round(2)

    return translated_data

Processing state: AL


KeyboardInterrupt: 

#### Alternatively, use merge:   
 
The merge operation aligns the block_fips_2010 column in the crosswalk with the GEOID10 column in the CVAP dataset.  
The weight column from the crosswalk is retained during the merge operation.  
After merging, the data is grouped by block_fips_2020, which represents the 2020 Census block IDs.  
Aggregation is performed on the redistributed CVAP values within each 2020 block group.  


In [None]:
def translate_2010_to_2020_optimized(cvap_data, crosswalk):
    # Ensure block identifiers are strings of 15 digits
    crosswalk['block_fips_2010'] = crosswalk['block_fips_2010'].astype(str).str.zfill(15)
    crosswalk['block_fips_2020'] = crosswalk['block_fips_2020'].astype(str).str.zfill(15)
    cvap_data['GEOID10'] = cvap_data['GEOID20'].astype(str).str.zfill(15)

    # Columns to process (exclude GEOID20, GEOID10, BLKGRP15)
    data_columns = [
        col for col in cvap_data.columns if col not in ['GEOID20', 'GEOID10', 'BLKGRP15']
    ]

    # Merge crosswalk with CVAP data
    merged = crosswalk.merge(
        cvap_data,
        left_on='block_fips_2010',
        right_on='GEOID10',
        how='left'
    )

    # Fill missing values in merged CVAP columns with 0
    merged[data_columns] = merged[data_columns].fillna(0)

    # Apply weights
    for col in data_columns:
        merged[col] = merged[col] * merged['weight']

    # Group by block_fips_2020 and sum the weighted values
    aggregated = merged.groupby('block_fips_2020', as_index=False)[data_columns].sum()

    # Rename block_fips_2020 to GEOID20 for output
    aggregated.rename(columns={'block_fips_2020': 'GEOID20'}, inplace=True)

    # Round numeric columns to the nearest hundredth
    aggregated[data_columns] = aggregated[data_columns].round(2)

    return aggregated

def process_all_states_fixed(cvap_dir, crosswalk_dir, output_dir, state_fips_mapping):

    for fips_code, state_abbrev in state_fips_mapping.items():
        print(f"Processing state: {state_abbrev.upper()}")

        cvap_file = os.path.join(cvap_dir, state_abbrev, f"{state_abbrev}_2015_cvap_block_no_rounding.csv")
        crosswalk_file = os.path.join(crosswalk_dir, f"block1020_crosswalk_{fips_code}.csv")
        output_file = os.path.join(output_dir, state_abbrev, f"{state_abbrev}_2020_cvap_block.csv")

        if not os.path.exists(cvap_file):
            print(f"CVAP data file missing for {state_abbrev.upper()}: {cvap_file}")
            continue
        if not os.path.exists(crosswalk_file):
            print(f"Crosswalk file missing for {state_abbrev.upper()}: {crosswalk_file}")
            continue

        cvap_data = pd.read_csv(cvap_file)
        crosswalk = pd.read_csv(crosswalk_file)

        try:
            translated_data = translate_2010_to_2020_optimized(cvap_data, crosswalk)

            translated_data = translated_data[[col for col in translated_data.columns if col != 'BLKGRP15']]

            os.makedirs(os.path.dirname(output_file), exist_ok=True)
            translated_data.to_csv(output_file, index=False)
            print(f"Translated CVAP data saved to {output_file}")

        except Exception as e:
            print(f"Error processing {state_abbrev.upper()}: {e}")

state_fips_mapping = {
    '01': 'al', '02': 'ak', '04': 'az', '05': 'ar', '06': 'ca', '08': 'co', '09': 'ct',
    '10': 'de', '12': 'fl', '13': 'ga', '15': 'hi', '16': 'id', '17': 'il', '18': 'in',
    '19': 'ia', '20': 'ks', '21': 'ky', '22': 'la', '23': 'me', '24': 'md', '25': 'ma',
    '26': 'mi', '27': 'mn', '28': 'ms', '29': 'mo', '30': 'mt', '31': 'ne', '32': 'nv',
    '33': 'nh', '34': 'nj', '35': 'nm', '36': 'ny', '37': 'nc', '38': 'nd', '39': 'oh',
    '40': 'ok', '41': 'or', '42': 'pa', '44': 'ri', '45': 'sc', '46': 'sd', '47': 'tn',
    '48': 'tx', '49': 'ut', '50': 'vt', '51': 'va', '53': 'wa', '54': 'wv', '55': 'wi', '56': 'wy'
}

cvap_dir = "./2015_cvap_disagg"
crosswalk_dir = "./processed_crosswalks"
output_dir = "./2015_cvap_diagg_2020"

# Process all states
process_all_states_fixed(cvap_dir, crosswalk_dir, output_dir, state_fips_mapping)

Processing state: AL
Translated CVAP data saved to ./2015_cvap_diagg_2020/al/al_2020_cvap_block.csv
Processing state: AK
Translated CVAP data saved to ./2015_cvap_diagg_2020/ak/ak_2020_cvap_block.csv
Processing state: AZ
Translated CVAP data saved to ./2015_cvap_diagg_2020/az/az_2020_cvap_block.csv
Processing state: AR
Translated CVAP data saved to ./2015_cvap_diagg_2020/ar/ar_2020_cvap_block.csv
Processing state: CA
Translated CVAP data saved to ./2015_cvap_diagg_2020/ca/ca_2020_cvap_block.csv
Processing state: CO
Translated CVAP data saved to ./2015_cvap_diagg_2020/co/co_2020_cvap_block.csv
Processing state: CT
Translated CVAP data saved to ./2015_cvap_diagg_2020/ct/ct_2020_cvap_block.csv
Processing state: DE
Translated CVAP data saved to ./2015_cvap_diagg_2020/de/de_2020_cvap_block.csv
Processing state: FL
Translated CVAP data saved to ./2015_cvap_diagg_2020/fl/fl_2020_cvap_block.csv
Processing state: GA
Translated CVAP data saved to ./2015_cvap_diagg_2020/ga/ga_2020_cvap_block.csv


## Round the results to integers

In [None]:
# Define the rounding function
def data_rounding(x):
    x = x.sort_values(ascending=False, kind='mergesort')
    floor = np.floor(x)
    diff = x - floor
    to_add = int(round(sum(x), 0) - sum(floor))
    floor[diff.nlargest(to_add).index] += 1
    return floor.sort_index(ascending=True).astype(int)

# Define paths
input_base_path = './2015_cvap_diagg_2020/'
output_base_path = './2015_cvap_disagg_2020_rounding/'

# Ensure the output directory exists
os.makedirs(output_base_path, exist_ok=True)

# Iterate over each state folder
for state in os.listdir(input_base_path):
    state_input_path = os.path.join(input_base_path, state)
    state_output_path = os.path.join(output_base_path, state)

    os.makedirs(state_output_path, exist_ok=True)

    for file in os.listdir(state_input_path):
        if file.endswith('.csv'):
            file_path = os.path.join(state_input_path, file)
            print(f"Processing file: {file_path}")

            data = pd.read_csv(file_path)

            # Round all columns besides GEOID
            columns_to_round = [col for col in data.columns if col not in ['GEOID20']]

            # Extract block groups
            data['BLKGRP'] = data['GEOID20'].astype(str).str[:12]

            # Perform rounding at the block group level
            rounded_data = []

            for col in columns_to_round:
                rounded_group = data.groupby('BLKGRP')[col].apply(data_rounding)

                # Flatten the group result into a DataFrame
                rounded_col = rounded_group.reset_index(level=0, drop=True)
                rounded_data.append(rounded_col)

            # Combine the rounded results with the original data
            for i, col in enumerate(columns_to_round):
                data[col] = rounded_data[i]

            # Drop temporary block group column
            data.drop(columns=['BLKGRP'], inplace=True)

            # Save the rounded data
            output_file_path = os.path.join(state_output_path, file)
            data.to_csv(output_file_path, index=False)
            print(f"Rounded data saved to {output_file_path}")


Processing file: ./2015_cvap_diagg_2020/vt/vt_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/vt/vt_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/va/va_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/va/va_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/sd/sd_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/sd/sd_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/sc/sc_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/sc/sc_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/ut/ut_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/ut/ut_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/ga/ga_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_rounding/ga/ga_2020_cvap_block.csv
Processing file: ./2015_cvap_diagg_2020/ms/ms_2020_cvap_block.csv
Rounded data saved to ./2015_cvap_disagg_2020_roundi