# National 2020 AIANHH (American Indian Area / Alaska Native Area / Hawaiian Home Land) Block Assignment File and Tribal Block Group Indicator for 2020 Blocks

## Background:
- We received a request for a national dataset identifying blocks that are in Tribal Block Groups and in AIANHH areas.
## Approach:
- Read in block-level PL data and query out necessary fields.
- Retrieve AIANHH name data using fields in PL file and AIANHH shapefiles.
- Retrieve county name data using fields in PL file and county shapefiles.
- Assign blocks as in or not Tribal Block Groups and AIANHH based on non-null values.
- Concat state datasets to national dataset and clean before exporting.

## Sources
- Block Level PL data for each state (ex. [Washington block PL 94-171 2020](https://redistrictingdatahub.org/dataset/washington-block-pl-94171-2020/))
- AIANHH shapefile for each state (where applicable) (ex. [Washington AIANNH boundaries (2020)](https://redistrictingdatahub.org/dataset/washington-aiannh-boundaries-2020/))
- County shapefile for each state (ex. [Washington County boundaries (2020)](https://redistrictingdatahub.org/dataset/washington-county-boundaries-2020/))

### Note: Please email info@redistrictingdatahub.org for any questions

In [None]:
import pandas as pd
import geopandas as gp
import os
import numpy as np
from s3_paths import *
datafold = os.path.join(wd,'pl')

In [None]:
def assign_fullname(state_ab):
    values = ['al','ak','az','ar','ca','co','ct','de','fl','ga','hi','id','il','in','ia','ks','ky','la','me','md','ma','mi','mn','ms','mo','mt','ne','nv','nh','nj','nm','ny','nc','nd','oh','ok','or','pa','ri','sc','sd','tn','tx','ut','vt','va','wa','wv','wi','wy']
    keys = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
    dictionary = dict(zip(keys,values))
    state_name = ''
    for k, v in dictionary.items():
        if v == state_ab:
            state_name = k
    return state_name

Import block level data for each state

In [None]:
data_dict ={}
for i in os.listdir(datafold):
    cols_keep= ['GEOCODE','AIANHHNS','AIHHTLI','AIANHH','AIANHHFP','AIANHHCC','AITS','AITSFP','AITSCC','AITSNS','TTRACT','TBLKGRP']
    sa  = i.split('_')[0]
    df= pd.read_csv(os.path.join(datafold,i))
    df = df[cols_keep]
    for col in df.columns:
        df[col] = df[col].astype(str)
        list1=list(df[col])
        max_len = -1
        for ele in list1: 
            if(len(ele) > max_len): 
                max_len = len(ele)
        df[col]=df[col].apply(lambda x: str(x).zfill(max_len))
    df['GEOID20'] = df['GEOCODE'].apply(lambda x: str(x).zfill(15))
    df['STATEFP20'] = df['GEOID20'].apply(lambda x: str(x)[0:2])
    df['COUNTYFP20'] = df['GEOID20'].apply(lambda x: str(x)[2:5])
    df['TRACTCE20'] = df['GEOID20'].apply(lambda x: str(x)[5:11])
    df['BLOCKCE20'] = df['GEOID20'].apply(lambda x: str(x)[11:15])
    df['AIA_GEOID'] = df['AIANHH']+df['AIHHTLI']
    data_dict.update({sa:df})

Assign AIANHH names and IN/OUT of AIANHH and Tribal Block Group designations

In [None]:
data_dict_copy = data_dict.copy()
for k,v in data_dict_copy.items():
    #print('**************',k,'*************')
    county = gp.read_file(f'zip+{county_base+k}_cnty_2020_bound.zip')
    county_dict = dict(zip(list(county['COUNTYFP20']),list(county['NAMELSAD20'])))
    v['COUNTY'] = v['COUNTYFP20'].apply(lambda x: county_dict.get(str(x)))
    try:
        aia = gp.read_file(f'zip+{aia_base+k}_aiannh_2020_bound.zip')
        aia_dict = dict(zip(list(aia['GEOID20']),list(aia['NAMELSAD20'])))
        #display(aia.head())
        v['IN_TBLKGRP'] = v['TBLKGRP'].apply(lambda x: 'YES' if x!='9' else 'NO')
        v['IN_AIANHH'] = v['AIA_GEOID'].apply(lambda x: 'YES' if x!='99999' else 'NO')
        v['AIA_NAME'] = v['AIA_GEOID'].apply(lambda x: aia_dict.get(x) if x in aia_dict.keys() else 'N/A')
    except:
        #print('***NO AIA***')

        v['IN_TBLKGRP'] = 'NO'
        v['IN_AIANHH'] = 'NO'
        v['AIA_NAME'] = 'N/A'
    #display(v.head(1))

Create state-level files

In [None]:
for k,v in data_dict_copy.items():
    #print(k)
    v['STATE']= assign_fullname(k)
    
    order = ['GEOID20','STATE', 'STATEFP20','COUNTY', 'COUNTYFP20', 'TRACTCE20', 'BLOCKCE20',
             'IN_AIANHH','IN_TBLKGRP',
             'AIA_NAME','AIA_GEOID','AIANHHNS',
             'AIHHTLI', 'AIANHH', 'AIANHHFP', 'AIANHHCC',
       'TTRACT', 'TBLKGRP' ,
            'AITS', 'AITSFP', 'AITSCC', 'AITSNS']
    v = v[order]
    fold = os.path.join(wd,'STATE_BAFS')
    if not os.path.exists(fold):
        os.mkdir(fold)
    path = os.path.join(fold,k+'_aia_baf.csv')
    v.to_csv(path,index=False)

In [None]:
dfs = []
for i in os.listdir(fold):
    df = pd.read_csv(os.path.join(fold,i))
    dfs.append(df)

Concat state files into national file and assign 'NULL' to applicable blocks

In [None]:
nat = pd.concat(dfs)
nat.rename(columns = {'AIA_NAME':'AIANHH_NAME','AIA_GEOID':'AIANHH_GEOID'},inplace=True)
nat.reset_index(inplace=True,drop=True)

nat['GEOID20'] = nat['GEOID20'].apply(lambda x: str(x).zfill(15))
nat['STATEFP20'] = nat['STATEFP20'].apply(lambda x: str(x).zfill(2))
nat['COUNTYFP20'] = nat['COUNTYFP20'].apply(lambda x: str(x).zfill(3))
nat['TRACTCE20'] = nat['TRACTCE20'].apply(lambda x: str(x).zfill(6))
nat['BLOCKCE20'] = nat['BLOCKCE20'].apply(lambda x: str(x).zfill(4))

nat['AIANHH_GEOID'] =nat['AIANHH_GEOID'].apply(lambda x: 'NULL' if str(x)=='99999' else x)
nat['AIANHHNS'] =nat['AIANHHNS'].apply(lambda x: 'NULL' if str(x)=='99999999' else x)
nat['AIHHTLI'] =nat['AIHHTLI'].apply(lambda x: 'NULL' if str(x)=='9' else x)
nat['AIANHH'] =nat['AIANHH'].apply(lambda x: 'NULL' if str(x)=='9999' else x)
nat['AIANHHFP'] =nat['AIANHHFP'].apply(lambda x: 'NULL' if str(x)=='99999' else x)
nat['AIANHHCC'] =nat['AIANHHCC'].apply(lambda x: 'NULL' if str(x)=='99' else x)
nat['TTRACT'] =nat['TTRACT'].apply(lambda x: 'NULL' if str(x)=='999999' else x)
nat['TBLKGRP'] =nat['TBLKGRP'].apply(lambda x: 'NULL' if str(x)=='9' else x)
nat['AITS'] =nat['AITS'].apply(lambda x: 'NULL' if str(x)=='999' else x)
nat['AITSFP'] =nat['AITSFP'].apply(lambda x: 'NULL' if str(x)=='99999' else x)
nat['AITSCC'] =nat['AITSCC'].apply(lambda x: 'NULL' if str(x)=='99' else x)
nat['AITSNS'] =nat['AITSNS'].apply(lambda x: 'NULL' if str(x)=='99999999' else x)

nat['AIANHH_NAME'] = nat['AIANHH_NAME'].fillna('NULL')

nat.to_csv(os.path.join(fold,'national_block_assignment_aianhh.csv'),index=False)

display(nat.head())
print(len(nat))