In [1]:
import geopandas as gp
import pandas as pd

In [2]:
county_lines = gp.read_file("./cb_2022_us_county_20m.dbf")

In [3]:
c = pd.read_csv("../yield_data/wheat_yield_data_final.csv")

In [4]:
selected_counties = (c["state_name"] + "-" + c["county_name"]).str.upper().unique()

In [5]:
county_lines["NAME"] = county_lines["NAME"].str.upper()
county_lines["STATE_NAME"] = county_lines["STATE_NAME"].str.upper()

In [6]:
mask = (county_lines["STATE_NAME"] + "-" + county_lines["NAME"]).isin(selected_counties)

In [7]:
selected_county_lines = county_lines[mask]

In [8]:
selected_county_lines_5 = selected_county_lines.to_crs(epsg="5070")

In [9]:
selected_county_lines.to_file("./counties.geojson", driver="GeoJSON")

In [125]:
import rasterio
import rasterio.mask
import urllib
import os
from shapely.geometry import MultiPoint, Point
import numpy as np

In [17]:
for year in [2008, 2022]:
  url = "https://www.nass.usda.gov/Research_and_Science/Cropland/Release/datasets/" + str(year) + "_30m_cdls.zip"
  urllib.request.urlretrieve(url, dest);
  dest = "temp.zip";
  os.system("unzip -p temp.zip " + str(year) + "_30m_cdls.tif > ./temp.tif")
  with rasterio.open("./temp.tif") as tf:
    outimg, transform = rasterio.mask.mask(
        tf, selected_county_lines_5["geometry"], crop=True)
    cp = tf.profile.copy()
    cp.update({
        "height": outimg.shape[1],
        "width": outimg.shape[2],
        "transform": transform,
    })
    with rasterio.open("./" + str(year) + "_wheat_mask.tif", 'w', nbits=1, **cp) as dst:
        dst.write(outimg == 24)


In [45]:
# Assuming that if a field is used to grow wheat in 2008 and 2022, it will be
# used to grow wheat for the entire duration from 2008 and 2022.
with rasterio.open("./2008_wheat_mask.tif", nbits=1,) as mask_08:
  with rasterio.open("./2022_wheat_mask.tif", nbits=1, ) as mask_22:
    with rasterio.open("./final_wheat_mask.tif", 'w', nbits=1, **cp) as dst:
        dst.write(mask_08.read() & mask_22.read())


In [45]:
wheat_mask = rasterio.open("./final_wheat_mask.tif", nbits=1,);
selected_county_lines = gp.read_file("./counties.geojson", driver="GeoJSON").to_crs(wheat_mask.crs)


In [91]:
rng = np.random.default_rng()

In [156]:
def getCoordinatesInShape(tf, shape, n=20):
    outimg, transform = rasterio.mask.mask(
        tf, [shape], crop=True)
    coords = np.argwhere(outimg[0] == 1);
    if len(coords) > n:
      coords = rng.choice(coords, n, replace = False);
    return  MultiPoint([rasterio.transform.xy(transform, x, y) for x, y in coords])

In [157]:
selected_county_lines["points"] = selected_county_lines.apply(lambda cell: getCoordinatesInShape(wheat_mask, cell['geometry']) , axis=1)

In [168]:
counties_rand_fields = selected_county_lines.set_geometry("points").drop('geometry', axis=1)
counties_rand_fields.crs = selected_county_lines.crs;
counties_rand_fields = counties_rand_fields.to_crs("4326")
counties_rand_fields[~counties_rand_fields.is_empty].to_file("./counties_rand_fields.geojson", driver="GeoJSON")