In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%cd ..

/Users/rubenbroekx/Documents/Projects/radix-co2-reduction


# Tillage Classification - CNN

Tillage classification using a 1-dimensional CNN.

In [3]:
import json

from glob import glob
from collections import Counter
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

## Load Data

Load in all data used for the classification.

In [4]:
# Root data folder
DATA = Path.home() / 'data/agoro/2021-05-01'

In [5]:
# Fetch field-IDs
all_ids = glob(str(DATA / '*/ndti.png'))
all_ids = [int(i.split('/')[-2]) for i in all_ids]

In [6]:
# Load in all data
meta_data = {}
for i in all_ids:
    with open(DATA / f"{i}/meta.json", 'r') as f:
        meta_data[i] = json.load(f)
        
# Extract tillage types
tillage_types = {k:v['tillage'] for k,v in meta_data.items()}

# Get total number of fields
print(f"Total number of data samples:", len(tillage_types))

# Show classes
print(f"Number of samples by class:")
for k,v in Counter(tillage_types.values()).items():
    print(f" - {k}: {v}")

Total number of data samples: 478
Number of samples by class:
 - Conv.-Till: 237
 - No-Till: 241


In [53]:
with open(DATA / '104403/samples/sentinel2.json', 'r') as f:
    sample = json.load(f)

In [49]:
import numpy as np
from typing import List
from src.radix_co2_reduction.tillage_detection.index import b,g,r,nir,swir1,swir2
from src.radix_co2_reduction.tillage_detection.process import normalise_band

def _normalise(x:int, ls:bool) -> float:
    """Normalise the band's value."""
    return min(max(x/(10_000 if ls else 20_000), 0), 1)
    
BANDS = ['B','G','R','NIR','SWIR1','SWIR2']

def _to_vector(s) -> List[np.ndarray]:
    """Transform the sample to a list of spectral vectors for each of its pixels."""
    vectors = []
    dates = sorted(s.keys())
    n_pixels = len(s[dates[0]][BANDS[0]])
    for p_idx in range(n_pixels):
        v = []
        for d in dates:
            if s[d][BANDS[0]][p_idx] is None: continue
            v.append([_normalise(s[d][b][p_idx]) for b in BANDS])
        vectors.append(np.asarray(v, dtype=np.float32))
    return vectors
    
vectors =_to_vector(sample)

TypeError: _normalise() missing 1 required positional argument: 'ls'

In [37]:
vectors[0][-1]

array([0.1192, 0.1346, 0.1506, 0.3072, 0.4148, 0.3277], dtype=float32)

In [27]:
BANDS

['B', 'G', 'R', 'NIR', 'SWIR1', 'SWIR2']

In [45]:
r_ls = [x for x in sample2['2020-05-25']['R'] if x]

In [46]:
sum(r_ls)/len(r_ls)

1172.6161616161617

In [57]:
sample3.keys()

dict_keys(['2019-11-07', '2019-12-25', '2020-02-27', '2020-03-30', '2020-05-01'])

In [58]:
sample2.keys()

dict_keys(['2020-01-02', '2020-02-19', '2020-03-06', '2020-05-09', '2020-05-25'])

In [56]:
sample.keys()

dict_keys(['2019-11-05', '2019-12-05', '2019-12-10', '2019-12-25', '2020-01-09', '2020-02-18', '2020-03-04', '2020-04-08', '2020-04-18', '2020-05-23'])

In [55]:
len(sample)

10

In [47]:
r_s = [x for x in sample['2020-05-23']['R'] if x]

In [48]:
sum(r_s)/len(r_s)

2113.070707070707

In [19]:
# Load in the cloud filter
cloud_filter = CloudFilter(
    model_path = Path.cwd() / 'models'
)

Loaded existing cloud-filter!




In [20]:
# Load in the dataset (containing field-samples)
dataset = Dataset(
    ids=all_ids,
    data_path=DATA,
    cloud_filter=cloud_filter,
    datasets=('landsat7', 'landsat8'),
)
ids, x, y = dataset.get_data()

Loading in data: 100%|██████████| 333/333 [00:01<00:00, 332.88it/s]


In [21]:
clf = RandomForestClassifier()
scores = cross_val_score(
    clf,
    x,
    y,
    cv=5,
)
print(f"Score: {100*scores.mean():.2f}%")
print(f"Standard deviation: {100*scores.std():.2f}%")

Score: 78.11%
Standard deviation: 6.38%


In [22]:
# Print out misclassified field IDs
frac = round(1./5. * (len(y)-1))
c = 0
for i in range(5):
    x_train, x_test = x[:i*frac] + x[(i+1)*frac:], x[i*frac:(i+1)*frac]
    y_train, y_test = y[:i*frac] + y[(i+1)*frac:], y[i*frac:(i+1)*frac]
    ids_test = dataset.ids[i*frac:(i+1)*frac]
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    for a,b,i in zip(pred, y_test, ids_test):
        if a != b: 
            print(f"Field {i} - Target: {b}, predicted: {a}")
            c += 1
print(f"\nTotal of {c} misclassified samples ({100*c/len(y):.2f}%)")

Field 94148 - Target: Conv.-Till, predicted: No-Till
Field 94294 - Target: No-Till, predicted: Conv.-Till
Field 103458 - Target: No-Till, predicted: Conv.-Till
Field 103810 - Target: No-Till, predicted: Conv.-Till
Field 93912 - Target: No-Till, predicted: Conv.-Till
Field 102818 - Target: No-Till, predicted: Conv.-Till
Field 103915 - Target: Conv.-Till, predicted: No-Till
Field 102969 - Target: No-Till, predicted: Conv.-Till
Field 94076 - Target: Conv.-Till, predicted: No-Till
Field 102999 - Target: Conv.-Till, predicted: No-Till
Field 103725 - Target: No-Till, predicted: Conv.-Till
Field 103046 - Target: No-Till, predicted: Conv.-Till
Field 93731 - Target: Conv.-Till, predicted: No-Till
Field 102953 - Target: Conv.-Till, predicted: No-Till
Field 93781 - Target: No-Till, predicted: Conv.-Till
Field 102843 - Target: Conv.-Till, predicted: No-Till
Field 103593 - Target: Conv.-Till, predicted: No-Till
Field 103438 - Target: Conv.-Till, predicted: No-Till
Field 94801 - Target: No-Till, pre

In [24]:
import pickle

# Store the classifier as pickled object
with open(Path.cwd() / 'models/till_clf_rf.pickle', 'wb') as f:
    pickle.dump(clf, f)