In [21]:
# imports
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
import os
from sklearn.linear_model import LinearRegression

# get file paths for input/output/templates
csv_path = "dairy_data.csv"
state_shp = "us_shp_files/tl_2020_us_state.shp"
template_tif = "USA_1km_raster.tif"
output_dir = "usda_dairy_rasters_regressed"
os.makedirs(output_dir, exist_ok=True)
livestock_type = "dairy"

# load raster template (USA_1km_raster.tif)
with rasterio.open(template_tif) as src:
    meta = src.meta.copy()
    meta.update(dtype="float32", count=1)
    transform = src.transform
    width, height = src.width, src.height
    crs = src.crs
    out_shape = (height, width)

# read state shape file
gdf = gpd.read_file(state_shp)
if gdf.crs is None:
    gdf.set_crs(epsg=4326, inplace=True)
gdf = gdf.to_crs(crs)
gdf["STATEFP"] = gdf["STATEFP"].astype(str).str.zfill(2)
gdf["area_km2"] = gdf.geometry.to_crs(3857).area / 1e6

# clean data
df = pd.read_csv(csv_path, dtype=str)
df.columns = df.columns.str.strip()

# build GEOID and clean
df["State ANSI"] = (df["State ANSI"].fillna("").astype(str).str.replace(".0", "", regex=False).str.zfill(2))
df["GEOID"] = df["State ANSI"]

# make sure that both datasets use same zero padded string format
gdf["GEOID"] = gdf["GEOID"].astype(str).str.zfill(5)

# remove invalid / national rows
if "Geo Level" in df.columns:
    df = df[df["Geo Level"].str.upper().str.strip() == "STATE"]

# clean Value column
df["Value"] = pd.to_numeric(df["Value"], errors="coerce")
df = df.dropna(subset=["Value"])

# keep relevant columns
df = df[["Year", "GEOID", "Value"]]

# aggregate in case of multiple entries in a county in the same year
df = df.groupby(["GEOID", "Year"], as_index=False)["Value"].sum()

# pivot to year matrix
pivot = df.pivot(index="GEOID", columns="Year", values="Value")

# define known years and years to output
known_years = pivot.columns.values
pred_years = np.arange(2000, 2021)

# regression per county
pred_table = pd.DataFrame(index=pivot.index, columns=pred_years)
for geoid, row in pivot.iterrows():
    y = row.dropna()
    if len(y) >= 2:
        x = y.index.values.reshape(-1, 1)
        model = LinearRegression().fit(x, y)
        preds = model.predict(pred_years.reshape(-1, 1))
        pred_table.loc[geoid] = preds
    elif len(y) == 1:
        pred_table.loc[geoid] = float(y.iloc[0])
    else:
        pred_table.loc[geoid] = 0.0

# rasterize each year
for year in pred_years:
    vals = pred_table[year].astype(float).fillna(0).reset_index()
    merged = gdf.merge(vals, left_on="STATEFP", right_on="GEOID", how="left")
    
    # get dairy density
    merged["density"] = (merged[year] / merged["area_km2"]).astype(float)
    merged["density"] = merged["density"].clip(lower=0)
    
    # prepare shapes for rasterization
    shapes = ((geom, val) for geom, val in zip(merged.geometry, merged["density"].fillna(0)))

    # rasterize
    raster = rasterize(
        shapes=shapes,
        out_shape=out_shape,
        transform=transform,
        fill=0,
        # make sure raster values are floats
        dtype="float32"
    )

    # output as livestock_type_20xx.tif to poultry_rasters_regressed
    out_path = os.path.join(output_dir, f"{livestock_type}_{year}.tif")
    with rasterio.open(out_path, "w", **meta) as dst:
        dst.write(raster, 1)

    print(f"Saved {out_path}")


Saved usda_dairy_rasters_regressed\dairy_2000.tif
Saved usda_dairy_rasters_regressed\dairy_2001.tif
Saved usda_dairy_rasters_regressed\dairy_2002.tif
Saved usda_dairy_rasters_regressed\dairy_2003.tif
Saved usda_dairy_rasters_regressed\dairy_2004.tif
Saved usda_dairy_rasters_regressed\dairy_2005.tif
Saved usda_dairy_rasters_regressed\dairy_2006.tif
Saved usda_dairy_rasters_regressed\dairy_2007.tif
Saved usda_dairy_rasters_regressed\dairy_2008.tif
Saved usda_dairy_rasters_regressed\dairy_2009.tif
Saved usda_dairy_rasters_regressed\dairy_2010.tif
Saved usda_dairy_rasters_regressed\dairy_2011.tif
Saved usda_dairy_rasters_regressed\dairy_2012.tif
Saved usda_dairy_rasters_regressed\dairy_2013.tif
Saved usda_dairy_rasters_regressed\dairy_2014.tif
Saved usda_dairy_rasters_regressed\dairy_2015.tif
Saved usda_dairy_rasters_regressed\dairy_2016.tif
Saved usda_dairy_rasters_regressed\dairy_2017.tif
Saved usda_dairy_rasters_regressed\dairy_2018.tif
Saved usda_dairy_rasters_regressed\dairy_2019.tif
