1. [USDA CropScape data](https://nassgeodata.gmu.edu/CropScape/)
2. Select county AOI
3. Download county for 2017, **disable** frequency and mask, set projection to UTM 15
4. Extract and rename to `CDL_2017_<FIPS>.tif`, where `<FIPS>` is the county FIPS code (`200001` for Allen)

In [1]:
%%capture
%%script false  # disable to run cell

# Install required libraries
!pip install rasterio numpy geopandas rasterstats

In [2]:
# Load required libraries
import csv
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape
import rasterstats
import numpy as np
import geopandas as gpd
from os.path import exists

In [3]:
# Function to mask any raster by any value
def mask_raster(src_path, value_to_keep, dst_path):
    '''Entire crop raster -> Masked raster'''
    with rasterio.open(src_path) as src:
        raster_data = src.read()
        mask = np.isin(raster_data, value_to_keep, invert=True)
        masked_data = np.ma.masked_array(raster_data, mask=mask)
        meta = src.meta
    
    with rasterio.open(dst_path, 'w+', **meta) as dst:
        dst.write(masked_data)

In [4]:
# Function to convert crop raster to GeoDataFrame
def crop_to_gdf(src_path, fips, crop):
    '''Masked crop raster -> GeoDataFrame'''
    with rasterio.open(src_path) as src:
        raster_data = src.read(1)
        shapes_ = list(shapes(raster_data, mask=None, transform=src.transform))
    
    polygons = []
    for geom, val in shapes_:
        if val == crop:  # select only crop polygons
            polygons.append({'geometry': shape(geom)})
    
    gdf = gpd.GeoDataFrame(polygons)
    gdf = gdf.set_crs(src.crs)  # set CRS
    
    gdf['area'] = gdf.geometry.area
    
    return gdf

In [5]:
def process_crop(src_path, fips, crop, dest):
    gdf = crop_to_gdf(src_path, fips, crop)
    
    # Remove small areas
    selection = gdf.area > 2023  # 2023m2 is  0.5 acres
    gdf = gdf.loc[selection]
    
    if gdf.empty:
        raise AttributeError('No large enough polygons.')  # TODO: too lazy to pick better error
    
    # Buffer
    gdf['geometry'] = gdf.buffer(60.96)  # 0.4 mile = 60.96m
    
    gdf.to_file(dest)

To run the full code:
- Set `fipses` and `crops` variables to lists of what you want to calculate stats for
- Set `prefix` variable to the file path that holds your data folders (relative or absolute)
  - CDL and TOFI files should be in a `/sourcedata` subfolder
- Run `main()`. The last code cell will run it and profile runtime

In [6]:
# fipses = [20001, 20003, 20007]
fipses = range(20001, 20114, 2)
# crops = [1, 5, 6, 23, 24, 26, 225, 226, 236, 237, 238, 240, 241, 254]
# crops = [1, 5, 24, 26]
crops = [1]

# prefix = '.\\'
prefix = 'D:\\windbreak-croploss\\'


def main():
    data = []
    
    for crop in crops:
            for fips in fipses:
                print(fips, crop)  # for debug
                try:
                    crop_file = f'{prefix}sourcedata\\CDL_2017_{fips}.tif'
                    tree_file = f'{prefix}sourcedata\\TOFI_2017_{fips}.tif'
                    crop_mask_file = f'{prefix}data\\masked_{crop}_CDL_2017_{fips}.tif'
                    tree_mask_file = f'{prefix}data\\masked_TOFI_2017_{fips}.tif'
                    shp_file = f'{prefix}data\\crop_{crop}_{fips}.shp'
                    
                    # skip the masking if it's already been done
                    if not exists(tree_mask_file):
                        mask_raster(tree_file, 1, tree_mask_file)
                    
                    if not exists(shp_file):
                        if not exists(crop_mask_file):
                            mask_raster(crop_file, crop, crop_mask_file)
                        process_crop(crop_mask_file, fips, crop, shp_file)
                    
                    # Count tree pixels
                    # TODO: there must be a faster method than zonal_stats
                    # over 80% of total processing time is in here
                    stats = rasterstats.zonal_stats(vectors=shp_file,
                                                    raster=tree_mask_file,
                                                    stats=['sum'],
                                                    nodata=-9999,
                                                    all_touched=True)
                    
                    tree_px = sum([x['sum'] for x in stats])
                    
                    # calc crop area
                    # crop resolution is 30m, so 'area' field is in m^2
                    # tree resolution is 2m, so we quadruple the pixel count to get area
                    
                    gdf = gpd.read_file(shp_file)
                    crop_area = gdf['area'].sum()
                    
                    # gdf.plot()
                    
                    # assemble data output
                    data.append({'fips': fips, 'crop_id': crop, 'crop_area_m2': crop_area, 'tree_area_m2': tree_px*4})
                
                except AttributeError:  # crop type not in county
                    print(f'County {fips} has no crop of id {crop}.')
                    pass
    
    # write data to csv
    header = ['fips', 'crop_id', 'crop_area_m2', 'tree_area_m2']
    with open('output.csv', 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(data)

In [7]:
import cProfile

cProfile.run('main()')

20001 1
[{'sum': 42063.0}, {'sum': 8946.0}, {'sum': 11943.0}, {'sum': 0.0}, {'sum': 43644.0}, {'sum': 13019.0}, {'sum': 5527.0}, {'sum': 174.0}, {'sum': 5042.0}, {'sum': 30641.0}, {'sum': 9023.0}, {'sum': 50029.0}, {'sum': 5488.0}, {'sum': 1212.0}, {'sum': 47024.0}, {'sum': 6346.0}, {'sum': 23830.0}, {'sum': 0.0}, {'sum': 114933.0}, {'sum': 32525.0}, {'sum': 22622.0}, {'sum': 10250.0}, {'sum': 83086.0}, {'sum': 10283.0}, {'sum': 198116.0}, {'sum': 5612.0}, {'sum': 671.0}, {'sum': 67875.0}, {'sum': 95745.0}, {'sum': 122.0}, {'sum': 22410.0}, {'sum': 9149.0}, {'sum': 57370.0}, {'sum': 9284.0}, {'sum': 18648.0}, {'sum': 6987.0}, {'sum': 0.0}, {'sum': 12277.0}, {'sum': 5891.0}, {'sum': 16372.0}, {'sum': 30925.0}, {'sum': 54746.0}, {'sum': 4037.0}, {'sum': 13292.0}, {'sum': 25652.0}, {'sum': 25514.0}, {'sum': 33016.0}, {'sum': 5765.0}, {'sum': 57870.0}, {'sum': 16330.0}, {'sum': 57220.0}, {'sum': 2185.0}, {'sum': 13987.0}, {'sum': 17615.0}, {'sum': 18473.0}, {'sum': 0.0}, {'sum': 16174.0}, 

20003 1
[{'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0

20007 1
[{'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0}, {'sum': 0.0

       14    0.000    0.000    0.000    0.000 base.py:5919(_index_as_unique)
        6    0.000    0.000    0.000    0.000 base.py:5931(_maybe_promote)
        6    0.000    0.000    0.000    0.000 base.py:6018(_should_compare)
        6    0.000    0.000    0.000    0.000 base.py:6036(_is_comparable_dtype)
    12176    0.013    0.000    0.135    0.000 base.py:620(has_z)
     5545    0.006    0.000    0.071    0.000 base.py:626(is_empty)
       73    0.000    0.000    0.000    0.000 base.py:6298(_maybe_cast_indexer)
        6    0.000    0.000    0.000    0.000 base.py:6307(_maybe_cast_listlike_indexer)
        6    0.000    0.000    0.000    0.000 base.py:6313(_validate_indexer)
        2    0.000    0.000    0.000    0.000 base.py:6535(delete)
       25    0.000    0.000    0.000    0.000 base.py:654(_simple_new)
        2    0.000    0.000    0.001    0.000 base.py:6575(insert)
        2    0.000    0.000    0.001    0.000 base.py:6632(drop)
       13    0.000    0.000    0.001    0