This script will match all columns with some zip codes in a column named "Zip Code" with the crosswalk lookup generated by zcta_ej.ipynb. Files must be under data/to_match or in a subfolder under that folder.

In [13]:
import pandas, pyprojroot, warnings, numpy

def match(subfolder=''):
    #set paths
    loc = pyprojroot.here('./data/to_match/' + subfolder)
    zip_ej = pyprojroot.here('./data/outputs/ZCTA_EJ.csv')
    lookup = pandas.read_csv(zip_ej).set_index('zip')
    
    #loop through all csv's in folder
    for file in loc:
        if file.is_file() and file.suffix == '.csv':
            #pull zip codes and create array of proportions of EJ communities in those zip codes
            zipcode_col = "Zip Code" #if your files have different column titles for zip codes, replace this variable.
            df = pandas.read_csv(file).dropna(axis='columns', how='all')
            zips = df[zipcode_col]
            ej_prob = [0] * len(zips)
            for i in range(0, len(zips)):
                try:
                    z = int(str(zips[i]).split('-', 1)[0]) #cleans any zip+4 entries
                except:
                    ej_prob[i] = numpy.nan
                    continue
                try:
                    ej_prob[i] = lookup.loc[z, 'percent_ej']
                except:
                    ej_prob[i] = numpy.nan
                    print("Could not find zip code " + str(z))

            #append the array and create the output file
            df.insert(len(df.columns), 'Percent EJ', ej_prob)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                outpath = pyprojroot.here('./data/matched/' + subfolder + '/' + file.name)
                if subfolder == '':
                    outpath = pyprojroot.here('./data/matched/' + file.name)
            df.to_csv(outpath)
            print(file.name + " matched!")
            print()

What is the file you would like to match? Please include the extension (.csv)
EPA-HQ-OAR-2013-0479-8769_attachment_2.csv
Could not find zip code 52805
Could not find zip code 52406
Could not find zip code 52272
Matching complete!


Calling the match function below with no arguments will match all .csv's in the root <code>/data/to_match</code> folder, a subfolder within that folder can be passed as a string argument to match all .csv's in that folder. For example, <code>match('test')</code> will match all .csv's under <code>/data/to_match/test</code>. All matched files will be placed under <code>/data/matched/</code>.

In [None]:
match()