# Run model on training/validation/test eopatches

```
#
# Copyright (c) Sinergise, 2019 -- 2021.
#
# This file belongs to subproject "field-delineation" of project NIVA (www.niva4cap.eu).
# All rights reserved.
#
# This source code is licensed under the MIT license found in the LICENSE
# file in the root directory of this source tree.
#
```

This notebook runs a trained model on the eopatches.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import pandas as pd
import geopandas as gpd
from tqdm.notebook import tqdm

from eolearn.core import FeatureType

from fd.utils import prepare_filesystem
from fd.prediction import PredictionConfig, run_prediction_on_eopatch
from fd.prediction import load_model, load_metadata

## Workflow configuration set-up

In [3]:
INPUT_DATA_DIR = Path('../../input-data/')

In [4]:
model_version = 'folds_avg_10e'

prediction_config = PredictionConfig(
    bucket_name='bucket-name',
    aws_access_key_id='',
    aws_secret_access_key='',
    aws_region='eu-central-1',
    eopatches_folder='data/Castilla/2020-04/eopatches',
    feature_extent=(FeatureType.DATA, f'EXTENT_PREDICTED_{model_version}'),
    feature_boundary=(FeatureType.DATA, f'BOUNDARY_PREDICTED_{model_version}'),
    feature_distance=(FeatureType.DATA, f'DISTANCE_PREDICTED_{model_version}'),
    model_path='models/Castilla/2020-04',
    model_name='resunet-a_avg_2021-01-04-17-13-50',
    model_version=model_version,
    temp_model_path='/home/ubuntu/niva-cyl-models/',
    normalise='to_medianstd',
    height=1122,
    width=1122,
    n_channels=4,
    n_classes=2,
    metadata_path='metadata/Castilla/2020-04/patchlet-info.csv',
    batch_size=16)

In [5]:
filesystem = prepare_filesystem(prediction_config) 

### Check the meta-data used for normalisation

A file with the normalisation factors is used by the workflow to determine how to normalise the input images based on their acquisition datetime.

In [6]:
normalisation_factors = load_metadata(filesystem, prediction_config)

In [7]:
normalisation_factors

Unnamed: 0_level_0,chunk,eopatch,patchlet,chunk_pos,timestamp,mean_b0,mean_b1,mean_b2,mean_b3,std_b0,...,norm_meanstd_mean_b3,norm_meanstd_median_b0,norm_meanstd_median_b1,norm_meanstd_median_b2,norm_meanstd_median_b3,norm_meanstd_std_b0,norm_meanstd_std_b1,norm_meanstd_std_b2,norm_meanstd_std_b3,fold
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03,patchlets_field_delineation_99.npz,30TWN_2_7,data/Castilla/2020-04/patchlets/30TWN_2_7_1,49,2020-03-29,8480.957047,8022.577744,8426.146729,7814.784149,2509.563721,...,2746.590043,1017.891939,995.769063,933.1703,2726.109618,175.795333,232.388964,382.406135,649.973699,5
2020-04,patchlets_field_delineation_99.npz,30TWN_2_7,data/Castilla/2020-04/patchlets/30TWN_2_7_1,49,2020-04-28,5006.373322,4819.498505,5079.535522,5397.941116,2712.429383,...,2625.291505,978.491214,962.87429,917.062855,2573.705866,180.552451,231.539699,391.28097,630.743793,5
2020-05,patchlets_field_delineation_99.npz,30TWN_2_7,data/Castilla/2020-04/patchlets/30TWN_2_7_1,49,2020-05-11,1560.881699,1743.572723,2137.17955,4692.755554,521.665082,...,3063.496882,1004.421605,1016.390513,867.046771,3020.553016,174.398337,232.913813,407.72596,600.311697,5


List of eopatches from definition file

In [8]:
grid_definition = gpd.read_file(INPUT_DATA_DIR/'cyl-grid-definition.gpkg')
grid_definition.head()

Unnamed: 0,id,name,geometry
0,2302137,30TVM_2_5,"POLYGON ((-3.96458 41.90790, -3.96594 41.99795..."
1,2301391,30TUN_7_8,"POLYGON ((-4.58294 42.53146, -4.58522 42.62148..."
2,2301392,30TUN_7_9,"POLYGON ((-4.58067 42.44143, -4.58294 42.53146..."
3,2301398,30TUN_8_5,"POLYGON ((-4.46755 42.80315, -4.46969 42.89318..."
4,2301399,30TUN_8_6,"POLYGON ((-4.46543 42.71313, -4.46755 42.80315..."


In [9]:
eopatches_list = grid_definition.name.values

### Load model

Test loading the model, by copying locally from S3 if hte local directory doesn't exist. This is done automatically in the workflow

In [10]:
model = load_model(filesystem=filesystem, config=prediction_config)

  tensor_proto.tensor_content = nparray.tostring()


## Run prediction sequentially on all patches

This workflow could be run on CPU on multiple processes as well.

In [11]:
def process_eopatches(fn, eopatches, **kwargs): 
    results = [] 
    for eopatches_path in tqdm(eopatches): 
        results.append(fn(eopatches_path, **kwargs))
    return results

def multiprocess_eopatches(fn, eopatches, max_workers, **kwargs):
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        partial_fn = partial(fn, **kwargs)
        return list(tqdm(executor.map(partial_fn, eopatches), total=len(eopatches)))
        
def prefect_processing(): 
    # The idea why processing is not inside the module is to decouple it so any kind of processing can be used, 
    # Either prefect, or single processing or multiprocessing or whatever
    pass 

If model or normalisation factors are not passed they are loaded from bucket/disk for each eopatch, allowing to run the workflow on separate instances/processes

In [None]:
status = process_eopatches(run_prediction_on_eopatch, 
                           eopatches_list, 
                           config=prediction_config,
                           model=model,
                           normalisation_factors=normalisation_factors)

In [13]:
status_df = pd.DataFrame(status)

In [14]:
status_df.head()

Unnamed: 0,name,status
0,30TVM_2_5,Success
1,30TUN_7_8,Success
2,30TUN_7_9,Success
3,30TUN_8_5,Success
4,30TUN_8_6,Success


In [15]:
len(status_df), len(status_df[status_df.status=='Success'])

(1083, 1076)

In [16]:
status_df[status_df.status!='Success']

Unnamed: 0,name,status
1047,29TPG_7_0,There are no features of type FeatureType.DATA...
1063,30TWM_7_3,There are no features of type FeatureType.DATA...
1067,30TWN_0_4,There are no features of type FeatureType.DATA...
1073,30TXM_0_7,There are no features of type FeatureType.DATA...
1076,30TWM_3_3,There are no features of type FeatureType.DATA...
1077,29TQE_1_3,There are no features of type FeatureType.DATA...
1078,29TPG_6_2,There are no features of type FeatureType.DATA...


### Check if files have been written

In [17]:
pred_files = [f'BOUNDARY_PREDICTED_{model_version}.npy', 
              f'DISTANCE_PREDICTED_{model_version}.npy', 
              f'EXTENT_PREDICTED_{model_version}.npy']

In [18]:
for eopatch in tqdm(eopatches_list):
    try:
        files = filesystem.listdir(f'{prediction_config.eopatches_folder}/{eopatch}/data/')
        if not all([pf in files for pf in pred_files]):
            print(eopatch)
    except Exception as exc:
        print(f'{eopatch}: {exc}')
    

HBox(children=(FloatProgress(value=0.0, max=1083.0), HTML(value='')))

29TPG_7_0
30TWM_7_3
30TWN_0_4
30TXM_0_7
30TWM_3_3
29TQE_1_3
29TPG_6_2

