1. [USDA CropScape data](https://nassgeodata.gmu.edu/CropScape/)
2. Select county AOI
3. Download county for 2017, **disable** frequency and mask, set projection to UTM 15
4. Extract and rename to `CDL_2017_<FIPS>.tif`, where `<FIPS>` is the county FIPS code (`200001` for Allen)

In [1]:
%%capture
%%script false  # disable to run cell

# Install required libraries
!pip install rasterio numpy geopandas rasterstats

In [2]:
# Load required libraries
import csv
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape
import rasterstats
import numpy as np
import geopandas as gpd

In [3]:
# Function to mask any raster by any value
def mask_raster(src_path, value_to_keep, dst_path):
    '''Entire crop raster -> Masked raster'''
    with rasterio.open(src_path) as src:
        raster_data = src.read()
        mask = np.isin(raster_data, value_to_keep, invert=True)
        masked_data = np.ma.masked_array(raster_data, mask=mask)
        meta = src.meta
    
    with rasterio.open(dst_path, 'w+', **meta) as dst:
        dst.write(masked_data)

In [4]:
# Function to convert crop raster to GeoDataFrame
def crop_to_gdf(src_path, fips, crop):
    '''Masked crop raster -> GeoDataFrame'''
    with rasterio.open(src_path) as src:
        raster_data = src.read(1)
        shapes_ = list(shapes(raster_data, mask=None, transform=src.transform))
    
    polygons = []
    for geom, val in shapes_:
        if val == crop:  # select only crop polygons
            polygons.append({'geometry': shape(geom)})
    
    gdf = gpd.GeoDataFrame(polygons)
    gdf = gdf.set_crs(src.crs)  # set CRS
    
    gdf['area'] = gdf.geometry.area
    
    return gdf

In [5]:
def process_crop(src_path, fips, crop):
    gdf = crop_to_gdf(src_path, fips, crop)
    
    # Remove small areas
    selection = gdf.area > 2023  # 2023m2 is  0.5 acres
    gdf = gdf.loc[selection]
    
    # Buffer
    gdf['geometry'] = gdf.buffer(60.96)  # 0.4 mile = 60.96m
    
    output_shapefile = f'data/crop_{crop}_{fips}.shp'
    gdf.to_file(output_shapefile)

### TODO: Rename all county TOFI files to `TOFI_2017_<FIPS>.tif`

In [11]:
fipses = [20001]
# crops = [1, 5, 6, 23, 24, 26, 225, 226, 236, 237, 238, 240, 241, 254]
crops = [1, 5, 24, 26]

def main():
    data = []
    
    for crop in crops:
        # TODO: extract crop raster from state file
            for fips in fipses:
                try:
                    crop_file = f'sourcedata/CDL_2017_{fips}.tif'
                    tree_file = f'sourcedata/TOFI_2017_{fips}.tif'
                    crop_mask_file = f'data/masked_{crop}_CDL_2017_{fips}.tif'
                    tree_mask_file = f'data/masked_TOFI_2017_{fips}.tif'
                    shp_file = f'data/crop_{crop}_{fips}.shp'
                    
                    mask_raster(crop_file, crop, crop_mask_file)
                    mask_raster(tree_file, 1, tree_mask_file)
                    
                    process_crop(crop_mask_file, fips, crop)
                    
                    # Count tree pixels
                    # TODO: there must be a faster method than zonal_stats
                    stats = rasterstats.zonal_stats(vectors=shp_file,
                                                    raster=tree_mask_file,
                                                    stats=['sum'],
                                                    nodata=-9999,
                                                    all_touched=True)
                    
                    tree_px = sum([x['sum'] for x in stats])
                    
                    # calc crop area
                    # crop resolution is 30m, so 'area' is in m^2
                    # tree resolution is 2m
                    
                    gdf = gpd.read_file(shp_file)
                    crop_area = gdf['area'].sum()
                    
                    # gdf.plot()
                    
                    # assemble data output
                    data.append({'fips': fips, 'crop_id': crop, 'crop_area_m2': crop_area, 'tree_area_m2': tree_px*4})
                
                except AttributeError:  # crop type not in county
                    print(f'County {fips} has no crop of id {crop}.')
                    pass
    
    # write data to csv
    header = ['fips', 'crop_id', 'crop_area_m2', 'tree_area_m2']
    with open('output.csv', 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(data)

In [12]:
import cProfile

cProfile.run('main()')

         16223483 function calls (15992081 primitive calls) in 185.420 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        4    0.021    0.005    6.149    1.537 3598799413.py:1(process_crop)
        4    0.001    0.000    0.001    0.000 724066848.py:31(<listcomp>)
        1    0.905    0.905  185.425  185.425 724066848.py:5(main)
        8   59.109    7.389   71.544    8.943 815958551.py:2(mask_raster)
        4    0.022    0.005    1.647    0.412 92394483.py:2(crop_to_gdf)
        8    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(append)
    11110    0.011    0.000    0.162    0.000 <__array_function__ internals>:2(array_equal)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(atleast_1d)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(bincount)
        8    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(concatenate)
   