In [1]:
import json
import logging
import os
from pathlib import Path

import numpy as np

In [2]:
def load_geojson(filename):
    with open(filename, 'r') as f:
        return json.load(f)

def save_geojson(obj, path):
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        f.write(json.dumps(obj))

In [3]:
# this file was prepared in the datasets-identify tutorial
ground_truth_file = os.path.join('src', 'ground-truth.geojson')
ground_truth = load_geojson(ground_truth_file)

In [4]:
crop_ground_truth = [f for f in ground_truth
                     if f['properties']['CLASS1'] == 'F']
print('{} out of {} features are field crops.'
      .format(len(crop_ground_truth),len(ground_truth)))

991 out of 7429 features are field crops.


In [5]:
field_type_names = {
    1: 'cotton',
    2: 'safflower',
    3: 'flax',
    4: 'hops',
    5: 'sugar beets',
    6: 'corn',
    7: 'grain sorghum',
    8: 'sudan',
    9: 'castor beans',
    10: 'beans',
    11: 'misc field',
    12: 'sunflowers',
    13: 'hybrid sorghum/sudan',
    14: 'millet',
    15: 'sugar cane'
}

In [6]:
cat_crop_ground_truth = [f for f in crop_ground_truth
                         if f['properties']['SUBCLASS1'] != '**']
print('{} out of {} crop field features are categorized.'
      .format(len(cat_crop_ground_truth),len(crop_ground_truth)))

946 out of 991 crop field features are categorized.


In [7]:
# determine the subclasses in this set and counts
subclasses_list = [field_geojson['properties']['SUBCLASS1']
                   for field_geojson in cat_crop_ground_truth]

subclasses = dict([int(x), subclasses_list.count(x)]
                  for x in set(subclasses_list))
print('subclasses and counts')
print(json.dumps(subclasses, indent=4))

subclasses and counts
{
    "4": 8,
    "12": 14,
    "10": 26,
    "13": 4,
    "6": 776,
    "2": 65,
    "11": 1,
    "8": 35,
    "7": 17
}


In [8]:
# number of samples for each subclass
num_samples = 5

In [9]:
# filter the subclasses to those with adequate number of features
filt_subclasses = [subclass
                   for (subclass, count) in subclasses.items()
                   if count > num_samples]
print('filtered subclasses: {}'.format(filt_subclasses))

filtered subclasses: [4, 12, 10, 6, 2, 8, 7]


In [10]:
print(f'filtered subclass names: {[field_type_names[int(x)] for x in filt_subclasses]}')

filtered subclass names: ['hops', 'sunflowers', 'beans', 'corn', 'safflower', 'sudan', 'grain sorghum']


In [11]:
# lets focus on only 3 subclasses for now, comment to use all subclasses
# filt_subclasses = filt_subclasses[:3]
filt_subclasses = ['12', '6', '10']
print(f'filtered subclasses: {filt_subclasses}')

filtered subclasses: ['12', '6', '10']


In [12]:
# create a list of sample features
# first filter to features within a subclass, then randomly pick a sample of those features

np.random.seed(0) # make random sampling repeatable

def get_subclass(crop_geojson):
    return crop_geojson['properties']['SUBCLASS1']

sample_features = []
for subclass in filt_subclasses:
    subclass_features = [f for f in crop_ground_truth if get_subclass(f) == subclass]
    sample_features.extend(np.random.choice(subclass_features, num_samples, replace=False))
print('{} sample field features'.format(len(sample_features)))

15 sample field features


In [13]:
def save_field_geojson(field_geojson, root):
    filename = get_filename(field_geojson, root=root)
    save_geojson(field_geojson, filename)
    return filename

def get_filename(field_geojson, root):
    return Path(root, f'{field_geojson["id"]}.json')

def save_features(features, root):
    for field_geojson in features:
        geojson_filename = save_field_geojson(field_geojson, root=root)
        print('wrote to {}'.format(geojson_filename))
        
save_features(sample_features, 'data/run2/fields')

wrote to data/run2/fields/28416.json
wrote to data/run2/fields/25645.json
wrote to data/run2/fields/24947.json
wrote to data/run2/fields/29488.json
wrote to data/run2/fields/20839.json
wrote to data/run2/fields/36547.json
wrote to data/run2/fields/38244.json
wrote to data/run2/fields/20984.json
wrote to data/run2/fields/37595.json
wrote to data/run2/fields/36458.json
wrote to data/run2/fields/8147.json
wrote to data/run2/fields/2503.json
wrote to data/run2/fields/9485.json
wrote to data/run2/fields/140.json
wrote to data/run2/fields/3846.json


In [14]:
def featurecollection(features):
    return {
      "type": "FeatureCollection",
      "features": features
    }

def save_featurecollection(features, filename):
    filename = Path(filename)
    fc = featurecollection(features)
    save_geojson(fc, filename)
    print(f'wrote to {filename}')
    
save_featurecollection(sample_features, 'data/fc_run2/src/fc.geojson')

wrote to data/fc_run2/src/fc.geojson


In [15]:
save_featurecollection(ground_truth, 'data/run2/src/gtruth.geojson')

wrote to data/run2/src/gtruth.geojson
