### Code for generating the csv's that hold the training and validation extents used for training the U-net models

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from common import Tile, ard, train_val_split

In [2]:
train_paths = {
    "image": "leafon{year}.tif",
    "target": "NLCD_{year}_Land_Cover.tif",
   }
train_years = ['2001', '2004', '2006', '2008', '2011', '2013', '2016', '2019']

In [3]:
ard_tiles = [Tile(3, 10), Tile(4, 1), Tile(13, 13), Tile(20, 8), Tile(24, 13)]

In [4]:
gsd = 30 # ground sampling distance
chip_size = (256, 256) # row, col
chips = []
for tile in tqdm(ard_tiles):
    left, bottom, right, top = ard[((ard['h'] == tile.h) & (ard['v'] == tile.v))]['geometry'].bounds.iloc[0]
    for row in range(0, int((top - bottom) / gsd), chip_size[1]):
        for col in range(0, int((right - left) / gsd), chip_size[0]):
            chips.append({"xgeo": (col * gsd) + left, "ygeo": top - (row * gsd)})

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 512.91it/s]


In [5]:
## create training, val datasets
train_indx, val_indx = train_val_split(chips, .80, 42)

In [6]:
train_chips = np.array(chips)[train_indx]
val_chips = np.array(chips)[val_indx]
print(len(train_chips), len(val_chips))

1600 400


In [7]:
train_dfs = []
val_dfs = []

for year in tqdm(train_years):
    train_data = pd.DataFrame(list(train_chips))
    val_data = pd.DataFrame(list(val_chips))
    train_data['image'] = train_paths['image'].replace('{year}', year)
    train_data['target'] = train_paths['target'].replace('{year}', year)
    val_data['image'] = train_paths['image'].replace('{year}', year)
    val_data['target'] = train_paths['target'].replace('{year}', year)
    train_dfs.append(train_data)
    val_dfs.append(val_data)    

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 492.80it/s]


In [8]:
train_csv = pd.concat(train_dfs).sample(frac=1, random_state=42).reset_index(drop=True)
val_csv = pd.concat(val_dfs).sample(frac=1, random_state=42).reset_index(drop=True)

In [9]:
len(train_csv), len(val_csv)

(12800, 3200)

In [10]:
train_csv.to_csv('./samples/train_unet_chips_256.csv')
val_csv.to_csv('./samples/val_unet_chips_256.csv')