In [1]:
# imports
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
import os
from sklearn.linear_model import LinearRegression

def process_usda_data(csv_path, county_shp, template_tif, output_dir, commodity_type):
    
    # load raster template (USA_1km_raster.tif)
    with rasterio.open(template_tif) as src:
        meta = src.meta.copy()
        meta.update(dtype="float32", count=1) 
        transform = src.transform
        width, height = src.width, src.height
        crs = src.crs
        out_shape = (height, width)
    
    # read county shape file
    gdf = gpd.read_file(county_shp)
    if gdf.crs is None:
        gdf.set_crs(epsg=4326, inplace=True)
    gdf = gdf.to_crs(crs)
    gdf["GEOID"] = gdf["STATEFP"] + gdf["COUNTYFP"]
    gdf["area_km2"] = gdf.geometry.to_crs(3857).area / 1e6
    
    # read county shape file
    df = pd.read_csv(csv_path, dtype=str)
    df["Value"] = (
        df["Value"]
        .astype(str)
        .str.replace(",", "", regex=False)
        .str.replace(" ", "", regex=False)
        .replace({"(D)": "0", "(Z)": "0", "(NA)": "0", "(X)": "0", "-": "0"})
        .astype(float)
    )
    df.columns = df.columns.str.strip()
    
    # build GEOID and clean
    df["State ANSI"] = (df["State ANSI"].fillna("").astype(str).str.replace(".0", "", regex=False).str.zfill(2))
    df["County ANSI"] = (df["County ANSI"].fillna("").astype(str).str.replace(".0", "", regex=False).str.zfill(3))
    df["GEOID"] = df["State ANSI"] + df["County ANSI"]
    
    # make sure that both datasets use same zero padded string format
    gdf["GEOID"] = gdf["GEOID"].astype(str).str.zfill(5)
    
    # remove invalid / national rows
    if "Geo Level" in df.columns:
        df = df[df["Geo Level"].str.upper() == "COUNTY"]
    
    # clean Value column
    df["Value"] = pd.to_numeric(df["Value"], errors="coerce")
    df = df.dropna(subset=["Value"])
    
    # keep relevant columns
    df = df[["Year", "GEOID", "Value"]]
    
    # aggregate in case of multiple entries in a county in the same year
    df = df.groupby(["GEOID", "Year"], as_index=False)["Value"].sum()
    
    # pivot to year matrix
    pivot = df.pivot(index="GEOID", columns="Year", values="Value")
    
    
    # define known years and years to output
    known_years = pivot.columns.values
    pred_years = np.arange(2000, 2021)  # we want only 2000–2020
    
    # regression per county
    pred_table = pd.DataFrame(index=pivot.index, columns=pred_years)
    for geoid, row in pivot.iterrows():
        y = row.dropna()
        if len(y) >= 2:
            x = y.index.values.reshape(-1, 1)
            model = LinearRegression().fit(x, y)
            preds = model.predict(pred_years.reshape(-1, 1))
            pred_table.loc[geoid] = preds
        elif len(y) == 1:
            pred_table.loc[geoid] = float(y.iloc[0])
        else:
            pred_table.loc[geoid] = 0.0
    
    # rasterize each year
    for year in pred_years:
        # get dairy density
        vals = pred_table[year].astype(float).fillna(0).reset_index()
        merged = gdf.merge(vals, on="GEOID", how="left")
        merged["density"] = (merged[year] / merged["area_km2"]).astype(float)
    
        # make sure there are no negatives
        merged["density"] = merged["density"].clip(lower=0)
    
        # prepare shapes for rasterization
        shapes = ((geom, val) for geom, val in zip(merged.geometry, merged["density"].fillna(0)))
    
        # rasterize
        raster = rasterize(
            shapes=shapes,
            out_shape=out_shape,
            transform=transform,
            fill=0,
            # make sure raster values are floats
            dtype="float32"
        )
    
        # output as commodity_20xx.tif to poultry_rasters_regressed
        out_path = os.path.join(output_dir, f"{commodity_type}_{year}.tif")
        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(raster, 1)
    
        print(f"Saved {out_path}")


In [2]:
# USDA POULTRY
# get file paths for input/output/templates
csv_data_path = "input_files/all_poultry_data.csv"
us_shp_files_file = "us_shp_files/tl_2024_us_county.shp"
template_tif_file = "USA_1km_raster.tif"
output_directory = "usda_poultry_rasters_regressed"
os.makedirs(output_directory, exist_ok=True)
commodity_type = "poultry"

# call processing function for poultry
process_usda_data(csv_data_path, us_shp_files_file, template_tif_file, output_directory, commodity_type)

Saved usda_poultry_rasters_regressed\poultry_2000.tif
Saved usda_poultry_rasters_regressed\poultry_2001.tif
Saved usda_poultry_rasters_regressed\poultry_2002.tif
Saved usda_poultry_rasters_regressed\poultry_2003.tif
Saved usda_poultry_rasters_regressed\poultry_2004.tif
Saved usda_poultry_rasters_regressed\poultry_2005.tif
Saved usda_poultry_rasters_regressed\poultry_2006.tif
Saved usda_poultry_rasters_regressed\poultry_2007.tif
Saved usda_poultry_rasters_regressed\poultry_2008.tif
Saved usda_poultry_rasters_regressed\poultry_2009.tif
Saved usda_poultry_rasters_regressed\poultry_2010.tif
Saved usda_poultry_rasters_regressed\poultry_2011.tif
Saved usda_poultry_rasters_regressed\poultry_2012.tif
Saved usda_poultry_rasters_regressed\poultry_2013.tif
Saved usda_poultry_rasters_regressed\poultry_2014.tif
Saved usda_poultry_rasters_regressed\poultry_2015.tif
Saved usda_poultry_rasters_regressed\poultry_2016.tif
Saved usda_poultry_rasters_regressed\poultry_2017.tif
Saved usda_poultry_rasters_r

In [6]:
# USDA LIVESTOCK
# get file paths for input/output/templates
csv_data_path = "input_files/livestock_all.csv"
us_shp_files_file = "us_shp_files/tl_2024_us_county.shp"
template_tif_file = "USA_1km_raster.tif"
output_directory = "usda_livestock_rasters_regressed"
os.makedirs(output_directory, exist_ok=True)
commodity_type = "livestock"

# call processing function for livestock
process_usda_data(csv_data_path, us_shp_files_file, template_tif_file, output_directory, commodity_type)

Saved usda_livestock_rasters_regressed\livestock_2000.tif
Saved usda_livestock_rasters_regressed\livestock_2001.tif
Saved usda_livestock_rasters_regressed\livestock_2002.tif
Saved usda_livestock_rasters_regressed\livestock_2003.tif
Saved usda_livestock_rasters_regressed\livestock_2004.tif
Saved usda_livestock_rasters_regressed\livestock_2005.tif
Saved usda_livestock_rasters_regressed\livestock_2006.tif
Saved usda_livestock_rasters_regressed\livestock_2007.tif
Saved usda_livestock_rasters_regressed\livestock_2008.tif
Saved usda_livestock_rasters_regressed\livestock_2009.tif
Saved usda_livestock_rasters_regressed\livestock_2010.tif
Saved usda_livestock_rasters_regressed\livestock_2011.tif
Saved usda_livestock_rasters_regressed\livestock_2012.tif
Saved usda_livestock_rasters_regressed\livestock_2013.tif
Saved usda_livestock_rasters_regressed\livestock_2014.tif
Saved usda_livestock_rasters_regressed\livestock_2015.tif
Saved usda_livestock_rasters_regressed\livestock_2016.tif
Saved usda_liv

In [3]:
# USDA CATTLE
# get file paths for input/output/templates
csv_data_path = "input_files/cattle_usda.csv"
us_shp_files_file = "us_shp_files/tl_2024_us_county.shp"
template_tif_file = "USA_1km_raster.tif"
output_directory = "usda_cattle_usda_rasters_regressed"
os.makedirs(output_directory, exist_ok=True)
commodity_type = "cattle_usda"

# call processing function for cattle land
process_usda_data(csv_data_path, us_shp_files_file, template_tif_file, output_directory, commodity_type)

Saved usda_cattle_usda_rasters_regressed\cattle_usda_2000.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2001.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2002.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2003.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2004.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2005.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2006.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2007.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2008.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2009.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2010.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2011.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2012.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2013.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2014.tif
Saved usda_cattle_usda_rasters_regressed\cattle_usda_2015.tif
Saved us