In [128]:
# 2) Now read the rest with those names (no automatic header)
df = pd.read_csv(
    path,
    sep="\t",
    encoding="latin1",
    engine="python",
    header=None,
    names=header_cols,
    skiprows=1,
    dtype=str,
    quoting=csv.QUOTE_NONE,   # be strict about tabs
    on_bad_lines="skip"       # skip malformed data rows
)

In [129]:
# Optional: drop any phantom trailing column that slipped in anyway
df = df.loc[:, ~df.columns.astype(str).str.fullmatch(r"Unnamed:.*|^$")]

print(len(df.columns), "columns")
print(df.columns[:8].tolist())
print(df.head(1).T.head(6))  # sanity: '1. FORM TYPE' should be 'R' or 'A'; '2. REPORTING YEAR' should be 2022/2023

281 columns
['1. FORM TYPE', '2. REPORTING YEAR', '3. TRADE SECRET INDICATOR', '4. SANITIZED INDICATOR', '5. TITLE OF CERTIFYING OFFICIAL', '6. NAME OF CERTIFYING OFFICIAL', '7. CERTIFYING OFFICIAL S SIGNATURE INDICATOR', '8. DATE SIGNED']
                                                R
                                             2022
1. FORM TYPE                                   NO
2. REPORTING YEAR                              NO
3. TRADE SECRET INDICATOR        VP-MANUFACTURING
4. SANITIZED INDICATOR               JOHN NUNDAHL
5. TITLE OF CERTIFYING OFFICIAL        ELECTRONIC
6. NAME OF CERTIFYING OFFICIAL         2024-04-25


In [130]:
import csv, io, re
import pandas as pd
from pathlib import Path

path = "/Users/michaelwalker/RDM_Datalab/rdm-datalab-pipelines/data_raw/us_series/US_1A_2022.txt"  # your 1A file

# --- 1) Read header exactly as-is (strip BOM, kill stamp/empty tail) ---
with open(path, "r", encoding="latin1", newline="") as f:
    raw_header = f.readline().lstrip("\ufeff").rstrip("\r\n")
header_cols = raw_header.split("\t")
# Some drops include a trailing run-stamp like '20250917' or a blank last field
if header_cols and (header_cols[-1] == "" or header_cols[-1].isdigit()):
    header_cols = header_cols[:-1]
N = len(header_cols)

# --- 2) Robust row normalizer: enforce exactly N fields per row ---
def normalize_row(fields, N):
    """Return a list of exactly N fields: pad if short; glue extras into the last field."""
    if len(fields) < N:
        return fields + [""]*(N - len(fields))
    if len(fields) > N:
        fields[N-1] = fields[N-1] + "\t" + "\t".join(fields[N:])
        return fields[:N]
    return fields

# --- 3) Stream the file with csv (tab-delim, standard quoting), normalize widths ---
rows = []
with open(path, "r", encoding="latin1", newline="") as fh:
    next(fh)  # skip header line (we already captured it)
    reader = csv.reader(fh, delimiter="\t", quotechar='"', doublequote=True, escapechar="\\")
    for r in reader:
        rows.append(normalize_row(r, N))

df = pd.DataFrame(rows, columns=header_cols)

# --- 4) Clean phantom columns, if any slipped in ---
df = df.loc[:, ~df.columns.astype(str).str.fullmatch(r"Unnamed:.*|^$")]

# --- 5) Sanity checks (these should pass) ---
print(len(df.columns), "columns (expected N=", N, ")")
print(df.columns[:8].tolist())
print(df.head(1)[["1. FORM TYPE", "2. REPORTING YEAR"]])

# Expect form type to be R or A; year like 2022/2023:
print("Form type ok:", df["1. FORM TYPE"].str.upper().isin(["R","A"]).mean())
print("Year ok:", df["2. REPORTING YEAR"].str.fullmatch(r"\d{4}").mean())

281 columns (expected N= 281 )
['1. FORM TYPE', '2. REPORTING YEAR', '3. TRADE SECRET INDICATOR', '4. SANITIZED INDICATOR', '5. TITLE OF CERTIFYING OFFICIAL', '6. NAME OF CERTIFYING OFFICIAL', '7. CERTIFYING OFFICIAL S SIGNATURE INDICATOR', '8. DATE SIGNED']
  1. FORM TYPE 2. REPORTING YEAR
0            R              2022
Form type ok: 1.0
Year ok: 1.0


In [131]:
print(df.iloc[0])

1. FORM TYPE                                                R
2. REPORTING YEAR                                        2022
3. TRADE SECRET INDICATOR                                  NO
4. SANITIZED INDICATOR                                     NO
5. TITLE OF CERTIFYING OFFICIAL              VP-MANUFACTURING
                                                   ...       
277. ON-SITE RECYCLING PROCESSES METHOD 3                    
278. ON-SITE RECYCLING PROCESSES METHOD 4                    
279. ON-SITE RECYCLING PROCESSES METHOD 5                    
280. ON-SITE RECYCLING PROCESSES METHOD 6                    
281. ON-SITE RECYCLING PROCESSES METHOD 7               \t \t
Name: 0, Length: 281, dtype: object


In [132]:
df.head(10)


Unnamed: 0,1. FORM TYPE,2. REPORTING YEAR,3. TRADE SECRET INDICATOR,4. SANITIZED INDICATOR,5. TITLE OF CERTIFYING OFFICIAL,6. NAME OF CERTIFYING OFFICIAL,7. CERTIFYING OFFICIAL S SIGNATURE INDICATOR,8. DATE SIGNED,9. TRIFD,10. FACILITY NAME,...,272. ON-SITE ENERGY RECOVERY METHOD 2,273. ON-SITE ENERGY RECOVERY METHOD 3,274. ON-SITE ENERGY RECOVERY METHOD 4,275. ON-SITE RECYCLING PROCESSES METHOD 1,276. ON-SITE RECYCLING PROCESSES METHOD 2,277. ON-SITE RECYCLING PROCESSES METHOD 3,278. ON-SITE RECYCLING PROCESSES METHOD 4,279. ON-SITE RECYCLING PROCESSES METHOD 5,280. ON-SITE RECYCLING PROCESSES METHOD 6,281. ON-SITE RECYCLING PROCESSES METHOD 7
0,R,2022,NO,NO,VP-MANUFACTURING,JOHN NUNDAHL,ELECTRONIC,2024-04-25,54307FRTHW1919S,GEORGIA-PACIFIC BROADWAY LLC,...,,,,,,,,,,\t \t
1,R,2022,NO,NO,EHS MANAGER,NEEL PATEL,ELECTRONIC,2024-05-09,08861NGLRT1200A,ENGLERT INC,...,,,,,,,,,,\t \t
2,R,2022,NO,NO,PLANT MANAGER,TIM MENKE,ELECTRONIC,2024-06-13,46135LNSTRPUTNA,BUZZI UNICEM USA-GREENCASTLE PLANT,...,,,,,,,,,,\t \t
3,R,2022,NO,NO,CHIEF ENGINEER,STEPHEN STAUDINGER,ELECTRONIC,2024-06-26,53207MLWKF1532E,MILWAUKEE FORGE LLC,...,,,,,,,,,,\t \t
4,R,2022,YES,YES,PLANT MANAGER,RENE NERON,ORIGINAL,2024-04-25,36505MTCHMHWY43,ARKEMA INC,...,,,,H20,,,,,,\t \t
5,R,2022,NO,NO,EHS ENGINEER,MARK MEURETTE,ELECTRONIC,2024-07-15,54401MXXXX144RO,3M CO - WAUSAU DOWNTOWN,...,,,,,,,,,,\t \t
6,R,2022,NO,NO,EHS DIRECTOR,JUSTIN TETLOW,ELECTRONIC,2024-07-22,20794WMTBR2112M,WM. T. BURNETT & CO.,...,,,,,,,,,,\t \t
7,R,2022,NO,NO,HSE SPECIALIST,YANETH HUERTA,ELECTRONIC,2024-08-01,77015MRFRG1377I,AFGLOBAL CORP,...,,,,,,,,,,\t \t
8,R,2022,NO,NO,VICE PRESIDENT OF MANUFACTURING,RODNEY DILLON,ELECTRONIC,2024-09-03,78410KCHRFSUNTI,FLINT HILLS RESOURCES CORPUS CHRISTI LLC - WES...,...,U03,,,,,,,,,\t \t
9,R,2022,NO,NO,CORPORATE ENGINEERING & EH&S MANAGER,PAUL DONNDELINGER,ELECTRONIC,2023-03-21,02860CLYNC50EST,COOLEY INC,...,,,,,,,,,,\t \t


In [133]:
# 2. Verify it’s really a DataFrame and see first few column names
print("Rows:", len(df))
print("Columns:", len(df.columns))
print(df.columns[:282].tolist())  # sample 20 columns

Rows: 80003
Columns: 281
['1. FORM TYPE', '2. REPORTING YEAR', '3. TRADE SECRET INDICATOR', '4. SANITIZED INDICATOR', '5. TITLE OF CERTIFYING OFFICIAL', '6. NAME OF CERTIFYING OFFICIAL', '7. CERTIFYING OFFICIAL S SIGNATURE INDICATOR', '8. DATE SIGNED', '9. TRIFD', '10. FACILITY NAME', '11. FACILITY STREET', '12. FACILITY CITY', '13. FACILITY COUNTY', '14. FACILITY STATE', '15. FACILITY ZIP CODE', '16. BIA CODE', '17. TRIBE NAME', '18. MAILING NAME', '19. MAILING STREET', '20. MAILING CITY', '21. MAILING STATE', '22. MAILING PROVINCE', '23. MAILING ZIP CODE', '24. ENTIRE FACILITY IND', '25. PARTIAL FACILITY IND', '26. FEDERAL FACILITY IND', '27. GOCO FACILITY IND', '28. ASSIGNED FED FACILITY FLAG', '29. ASSIGNED PARTIAL FACILITY FLAG', '30. PUBLIC CONTACT NAME', '31. PUBLIC CONTACT PHONE', '32. PUBLIC CONTACT PHONE EXT', '33. PUBLIC CONTACT EMAIL', '34. PRIMARY SIC CODE', '35. SIC CODE 2', '36. SIC CODE 3', '37. SIC CODE 4', '38. SIC CODE 5', '39. SIC CODE 6', '40. NAICS ORIGIN', '41. P

In [134]:
df["naics2_sector_cd"] = df["41. PRIMARY NAICS CODE"].astype(str).str.extract(r"(\d+)", expand=False).str[:2]
df["state_cd"]  = df["14. FACILITY STATE"].astype(str).str.zfill(2)   # if present
df["cnty_nm"] = df["13. FACILITY COUNTY"].astype(str).str.zfill(3)  # if present

In [135]:
# now df.columns is a valid Index object
rel_cols = [c for c in df.columns if any(k in c.lower()
                for k in ["release", "transfer", "to air", "to water", "to land", "off-site"])]


print("Found", len(rel_cols), "possible release/transfer columns:")
for c in rel_cols:
    print(" •", c)


Found 85 possible release/transfer columns:
 • 110. FUGITIVE AIR EMISSIONS - TOTAL RELEASE POUNDS
 • 111. FUGITIVE AIR EMISSIONS - TOTAL RELEASE RANGE CODE
 • 114. STACK AIR EMISSIONS - RELEASE POUNDS
 • 115. STACK AIR EMISSIONS - RELEASE RANGE CODE
 • 120. DISCHARGES TO STREAM A - RELEASE POUNDS
 • 121. DISCHARGES TO STREAM A - RELEASE RANGE CODE
 • 126. DISCHARGES TO STREAM B - RELEASE POUNDS
 • 127. DISCHARGES TO STREAM B - RELEASE RANGE CODE
 • 132. DISCHARGES TO STREAM C - RELEASE POUNDS
 • 133. DISCHARGES TO STREAM C - RELEASE RANGE CODE
 • 138. DISCHARGES TO STREAM D - RELEASE POUNDS
 • 139. DISCHARGES TO STREAM D - RELEASE RANGE CODE
 • 144. DISCHARGES TO STREAM E - RELEASE POUNDS
 • 145. DISCHARGES TO STREAM E - RELEASE RANGE CODE
 • 150. DISCHARGES TO STREAM F - RELEASE POUNDS
 • 151. DISCHARGES TO STREAM F - RELEASE RANGE CODE
 • 156. DISCHARGES TO STREAM G - RELEASE POUNDS
 • 157. DISCHARGES TO STREAM G - RELEASE RANGE CODE
 • 162. DISCHARGES TO STREAM H - RELEASE POUNDS
 •

In [92]:
#TOTAL TRANSFERRED OFF SITE FOR DISPOSAL
#TOTAL ON-SITE RELEASES

In [136]:
# Prefer EPA-provided totals:
col_on  = next((c for c in df.columns if "TOTAL ON-SITE RELEASES" in c.upper()), None)
col_off = next((c for c in df.columns if "TOTAL TRANSFERRED OFF SITE FOR DISPOSAL" in c.upper()), None)

if col_on and col_off:
    df["tri_ttl_rls_lbs_amt"] = (
        pd.to_numeric(df[col_on], errors="coerce").fillna(0) +
        pd.to_numeric(df[col_off], errors="coerce").fillna(0)
    )
else:
    # fallback: sum components carefully (excluding recycling/energy/treatment/POTW)
    pass  # (use the component-summing snippet we discussed earlier)


In [94]:
df

Unnamed: 0,1. FORM TYPE,2. REPORTING YEAR,3. TRADE SECRET INDICATOR,4. SANITIZED INDICATOR,5. TITLE OF CERTIFYING OFFICIAL,6. NAME OF CERTIFYING OFFICIAL,7. CERTIFYING OFFICIAL S SIGNATURE INDICATOR,8. DATE SIGNED,9. TRIFD,10. FACILITY NAME,...,276. ON-SITE RECYCLING PROCESSES METHOD 2,277. ON-SITE RECYCLING PROCESSES METHOD 3,278. ON-SITE RECYCLING PROCESSES METHOD 4,279. ON-SITE RECYCLING PROCESSES METHOD 5,280. ON-SITE RECYCLING PROCESSES METHOD 6,281. ON-SITE RECYCLING PROCESSES METHOD 7,naics2_sector_cd,state_cd,cnty_nm,tri_ttl_rls_lbs_amt
0,R,2022,NO,NO,VP-MANUFACTURING,JOHN NUNDAHL,ELECTRONIC,2024-04-25,54307FRTHW1919S,GEORGIA-PACIFIC BROADWAY LLC,...,,,,,,\t \t,32,WI,BROWN,27.890
1,R,2022,NO,NO,EHS MANAGER,NEEL PATEL,ELECTRONIC,2024-05-09,08861NGLRT1200A,ENGLERT INC,...,,,,,,\t \t,33,NJ,MIDDLESEX,58.090
2,R,2022,NO,NO,PLANT MANAGER,TIM MENKE,ELECTRONIC,2024-06-13,46135LNSTRPUTNA,BUZZI UNICEM USA-GREENCASTLE PLANT,...,,,,,,\t \t,32,IN,PUTNAM,33.000
3,R,2022,NO,NO,CHIEF ENGINEER,STEPHEN STAUDINGER,ELECTRONIC,2024-06-26,53207MLWKF1532E,MILWAUKEE FORGE LLC,...,,,,,,\t \t,33,WI,MILWAUKEE,1739.460
4,R,2022,YES,YES,PLANT MANAGER,RENE NERON,ORIGINAL,2024-04-25,36505MTCHMHWY43,ARKEMA INC,...,,,,,,\t \t,32,AL,MOBILE,260.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79998,R,2022,NO,NO,SENIOR PLANT MANAGER,IVAN BIRRELL,ELECTRONIC,2023-06-29,42029WSTLK2468I,WESTLAKE VINYLS INC,...,,,,,,\t \t,32,KY,MARSHALL,6045.807
79999,R,2022,NO,NO,DIRECTOR EHS PROGRAMS,JOHN FISCHER,ELECTRONIC,2023-06-29,47670PSNRGHWY64,GIBSON GENERATING STATION,...,,,,,,\t \t,22,IN,GIBSON,170782.000
80000,R,2022,NO,NO,GENERAL MANAGER,JOHN BARTA,ELECTRONIC,2023-06-29,48211SLCTY1923F,EQ DETROIT INC,...,,,,,,\t \t,56,MI,WAYNE,235410.200
80001,R,2022,NO,NO,ASSISTANT MANAGER,FREDERICK BRANHAM,ELECTRONIC,2023-06-29,3741WVLKSW81VLK,VOLKSWAGEN GROUP OF AMERICA CHATTANOOGA OPERAT...,...,,,,,,\t \t,33,TN,HAMILTON,1163.100


In [137]:
tri_g = (df.dropna(subset=["state_cd","cnty_nm","naics2_sector_cd"])
           .groupby(["state_cd","cnty_nm","naics2_sector_cd"], as_index=False)["tri_ttl_rls_lbs_amt"]
           .sum())
           

In [138]:
len(tri_g)

6027

In [139]:
mask = tri_g["state_cd"] == "CA"

In [140]:
tri_g

Unnamed: 0,state_cd,cnty_nm,naics2_sector_cd,tri_ttl_rls_lbs_amt
0,AK,ALEUTIANS EAST BOROUGH,31,196277.000000
1,AK,ALEUTIANS WEST CENSUS ARE,31,2816.000000
2,AK,ALEUTIANS WEST CENSUS ARE,42,256.122754
3,AK,ANCHORAGE MUNICIPALITY,32,255.000000
4,AK,ANCHORAGE MUNICIPALITY,42,12038.382830
...,...,...,...,...
6022,WY,UINTA,32,10052.549000
6023,WY,UINTA,33,1934.600000
6024,WY,UINTA,42,0.110000
6025,WY,WASHAKIE,33,245632.930000


In [141]:
tri_g[mask]

Unnamed: 0,state_cd,cnty_nm,naics2_sector_cd,tri_ttl_rls_lbs_amt
375,CA,ALAMEDA,31,1.283200e+04
376,CA,ALAMEDA,32,1.593656e+05
377,CA,ALAMEDA,33,2.155065e+06
378,CA,ALAMEDA,54,1.100000e+00
379,CA,ALAMEDA,56,5.410000e+00
...,...,...,...,...
529,CA,YOLO,32,0.000000e+00
530,CA,YOLO,33,7.692200e+04
531,CA,YOLO,42,1.900000e+01
532,CA,YUBA,21,1.220000e-01


In [142]:
#df["NAICS CODE 2"]
print(tri_g['naics2_sector_cd'].value_counts())

naics2_sector_cd
32    1924
33    1574
31     934
42     445
22     361
92     295
21     243
56     188
54      16
45      12
81      10
11       5
61       5
71       4
23       3
48       3
49       3
51       2
Name: count, dtype: int64


In [143]:
tri_g

Unnamed: 0,state_cd,cnty_nm,naics2_sector_cd,tri_ttl_rls_lbs_amt
0,AK,ALEUTIANS EAST BOROUGH,31,196277.000000
1,AK,ALEUTIANS WEST CENSUS ARE,31,2816.000000
2,AK,ALEUTIANS WEST CENSUS ARE,42,256.122754
3,AK,ANCHORAGE MUNICIPALITY,32,255.000000
4,AK,ANCHORAGE MUNICIPALITY,42,12038.382830
...,...,...,...,...
6022,WY,UINTA,32,10052.549000
6023,WY,UINTA,33,1934.600000
6024,WY,UINTA,42,0.110000
6025,WY,WASHAKIE,33,245632.930000


In [144]:
county_ref = df = pd.read_csv("/Users/michaelwalker/RDM_Datalab/rdm-datalab-pipelines/data_raw/external/simplemaps/simplemaps_uscounties_basicv1.91/uscounties.csv",dtype=str)

In [145]:
county_ref.dtypes

county          object
county_ascii    object
county_full     object
county_fips     object
state_id        object
state_name      object
lat             object
lng             object
population      object
dtype: object

In [146]:
# --- County FIPS gap remediation (non-PR/VI counties) -------------------
COUNTY_SUFFIX_TERMS = [
    "county", "parish", "borough", "boro", "municipio", "municipality",
    "census area", "census are", "censu", "census district",
    "city and borough", "city", "island",
]
TERRITORY_SKIP = {"PR", "VI"}

def normalize_county_name(series: pd.Series) -> pd.Series:
    cleaned = (series.fillna("")
                     .str.lower()
                     .str.replace(r"[^a-z0-9\s]", " ", regex=True))
    for term in COUNTY_SUFFIX_TERMS:
        pattern = r"\b" + term.replace(" ", r"\s+") + r"\b"
        cleaned = cleaned.str.replace(pattern, " ", regex=True)
    cleaned = (cleaned
               .str.replace(r"\bst\b", "saint", regex=True)
               .str.replace(r"\band\b", "and", regex=True)
               .str.replace(r"\s+", "", regex=True)
               .str.strip())
    return cleaned

name_cols = ["county", "county_ascii", "county_full"]
county_lookup = (
    county_ref.assign(state_code=county_ref["state_id"].str.upper())
              .assign(**{f"{col}_norm": normalize_county_name(county_ref[col])
                         for col in name_cols})
              .melt(
                  id_vars=["state_code", "county_fips"],
                  value_vars=[f"{col}_norm" for col in name_cols],
                  value_name="county_name_norm",
              )
              .dropna(subset=["county_name_norm"])
              .drop_duplicates(["state_code", "county_name_norm"])
              .assign(county_fips_5=lambda df: df["county_fips"].str.zfill(5))
)

manual_overrides = pd.DataFrame([
    ("CT", "fairfield",   "09001"),
    ("CT", "hartford",    "09003"),
    ("CT", "litchfield",  "09005"),
    ("CT", "middlesex",   "09007"),
    ("CT", "newhaven",    "09009"),
    ("CT", "newlondon",   "09011"),
    ("CT", "tolland",     "09013"),
    ("CT", "windham",     "09015"),
    ("AK", "valdezcordova","02261"),
], columns=["state_code", "county_name_norm", "county_fips_5"])

county_lookup = (
    pd.concat([county_lookup, manual_overrides], ignore_index=True)
      .drop_duplicates(["state_code", "county_name_norm"], keep="last")
)

tri_normed = tri_g.assign(
    state_code=tri_g["state_cd"].str.upper(),
    county_name_norm=normalize_county_name(tri_g["cnty_nm"]),
)
tri_enriched = (tri_normed
    .merge(county_lookup, on=["state_code", "county_name_norm"], how="left")
    .assign(
        state_cnty_fips_cd=lambda df: df["county_fips_5"],
        state_fips_cd=lambda df: df["county_fips_5"].str[:2],
        county_fips_cd=lambda df: df["county_fips_5"].str[2:],
    )
)

resolvable_mask = ~tri_enriched["state_code"].isin(TERRITORY_SKIP)
resolved = tri_enriched["state_cnty_fips_cd"].notna() & resolvable_mask
print(f"Resolved {resolved.sum():,} of {resolvable_mask.sum():,} resolvable facilities "
      f"({(resolved.sum() / resolvable_mask.sum()):.2%}).")

remaining = tri_enriched.loc[
    resolvable_mask & tri_enriched["state_cnty_fips_cd"].isna(),
    ["state_cd", "cnty_nm"]
].drop_duplicates()
if not remaining.empty:
    print("Remaining non-PR/VI mismatches (check GU/MP/AS coverage in Simplemaps):")
    display(remaining.head(10))

    
assert tri_enriched["state_cnty_fips_cd"].dropna().str.len().eq(5).all()
assert tri_enriched["county_fips_cd"].dropna().str.len().eq(3).all()


Resolved 5,935 of 5,942 resolvable facilities (99.88%).
Remaining non-PR/VI mismatches (check GU/MP/AS coverage in Simplemaps):


Unnamed: 0,state_cd,cnty_nm
324,AS,EASTERN
1091,GU,GUAM
2871,MP,SAIPAN


In [151]:
tri_final = tri_enriched[["state_cd", "cnty_nm", "state_cnty_fips_cd","naics2_sector_cd", "tri_ttl_rls_lbs_amt"]]
tri_final['tri_ttl_rls_lbs_amt'] = pd.to_numeric(tri_final['tri_ttl_rls_lbs_amt']).round(2)
tri_final

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tri_final['tri_ttl_rls_lbs_amt'] = pd.to_numeric(tri_final['tri_ttl_rls_lbs_amt']).round(2)


Unnamed: 0,state_cd,cnty_nm,state_cnty_fips_cd,naics2_sector_cd,tri_ttl_rls_lbs_amt
0,AK,ALEUTIANS EAST BOROUGH,02013,31,196277.00
1,AK,ALEUTIANS WEST CENSUS ARE,02016,31,2816.00
2,AK,ALEUTIANS WEST CENSUS ARE,02016,42,256.12
3,AK,ANCHORAGE MUNICIPALITY,02020,32,255.00
4,AK,ANCHORAGE MUNICIPALITY,02020,42,12038.38
...,...,...,...,...,...
6022,WY,UINTA,56041,32,10052.55
6023,WY,UINTA,56041,33,1934.60
6024,WY,UINTA,56041,42,0.11
6025,WY,WASHAKIE,56043,33,245632.93


In [154]:
tri_final.to_csv("/Users/michaelwalker/RDM_Datalab/rdm-datalab-pipelines/data_clean/tri/tri_epa.csv", index=False)