## GeometricAnnotationErrors - Point Shifting 


### Part 1: Loading Data

In [None]:
""" Environment Configuration """

from  config import INPUT_DATA_DIR, TENSOR_DIR, RESULTS_DIR

data_path = TENSOR_DIR
source_path = INPUT_DATA_DIR
out_root_dir = RESULTS_DIR

# Pre-Load Determiner.
preload = False
# - True, the candidates are loaded and stored. 
# - False, the candidates are loaded from jupyter persistent storage


""" Point Shifting Annotator Configuration """

# Weight consideration candidate0 line distance between candidate points
length_weight_value = 0.02

# Buffer in meters to apply to candidates
weight_buffer = 2

buff_dist = 4

# Option to normalize over K^2 (True) or by K (False).
normalize_full = False

# Number of EM iterations
em_target = 6



In [None]:
# Std Imports
import os, sys, time, csv
from datetime import datetime as dt 

# Module Imports
import tensorflow as tf
import numpy as np
import rasterio as rio
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely.geometry as shp
from tensorflow.keras.optimizers import Adam

# Lib imports
import lib.Doc_Tools as doc
import lib.GeoTools as gt
import lib.Tiling as tile
import lib.K_Tools as kt
from lib import *

# Seed environment
tf.random.set_seed(2001)
np.random.seed(2001)

### Create Folder for test documentation
test_idx, test_dir = doc.InitTest(out_root_dir,
                                  em_target=em_target,
                                  LR=learning_rate,
                                  pairs=pairs,
                                  interval=interval,
                                  off_distance=off_dist,
                                  min_p=min_probability)

# Open Raster Imagery
train_raster = rio.open(os.path.join(source_path, 'train_raster.tif'))
test_raster  = rio.open(os.path.join(source_path, 'test_raster.tif'))
# Open shapefiles, ensure correct CRS
refined_labels  = gpd.read_file(os.path.join(source_path, 'refined_labels.shp'))
refined_labels.to_crs(test_raster.crs, inplace=True)
impefect_labels = gpd.read_file(os.path.join(source_path, 'imperfect_labels.shp'))
impefect_labels.to_crs(train_raster.crs, inplace=True)

### Loading Tensors and tile offsets
X_train = np.load(os.path.join(data_path, 'X_train.npy'))
Y_train = np.load(os.path.join(data_path, 'Y_train.npy'))
X_val = np.load(os.path.join(data_path, 'X_val.npy'))
Y_val = np.load(os.path.join(data_path, 'Y_val.npy'))
X_test = np.load(os.path.join(data_path, 'X_test.npy'))
Y_test = np.load(os.path.join(data_path, 'Y_test.npy'))
train_offsets_fp = os.path.join(data_path, 'train_offsets.csv')
val_offsets_fp = os.path.join(data_path, 'val_offsets.csv')
print("Successfully loaded tensors.")


### Evaluate original Shapefile Precision
source_iou = gt.gdf_iou(gt_labels, imperfect_labels)
prev_iou = source_iou

### Part 1: Baseline Model Training and Evaluation


In [None]:
### Baseline Model Training
""" UNET Config """
learning_rate = 0.1
batch_size = 32
epochs = 50

# Prepare Baseline Folder
base_folder = os.path.join(test_dir, 'baseline')
if not os.path.exists(base_folder): os.mkdir(base_folder)
    
# Prepare Callbacks, including weight output
base_callbacks = kt.SetCallbacks(weights_out=os.path.join(base_folder, 'BaselineWeights.h5'),
                                 tensorboard_path=os.path.join(base_folder, 'tensorboard'))

# Create Optimizer 
optimizer = Adam(lr=learning_rate, epsilon=1e-8, decay=1e-5)

# Select and Build Model
model = kt.Get_Model('UNET')
model.compile(optimizer=optimizer, 
              loss = kt.dice_coef_loss, 
              metrics=[kt.dice_coef,'accuracy', kt.f1_score])

# Train Model
baseline_results = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), shuffle=True, batch_size=batch_size, epochs=epochs, callbacks=base_callbacks)

# Save History plot and csv
doc.plot_history(baseline_results, test_dir=base_folder, config_idx='base')

epochs_used = len(baseline_results.history['accuracy'])

""" Evaluate Baseline Model Preformance """
print("Baseline Preformance:")
train_rpt = kt.ModelReport(X_train, Y_train, model, 'Training')
val_rpt = kt.ModelReport(X_val, Y_val, model, 'Validation')
test_rpt = kt.ModelReport(X_test, Y_test, model, 'Testing')

print(dt.now().strftime('\n\n%a at %I:%M:%S%p'))


### Part 2: EM Iteration

`Warning`: By proceeding, the EM Iteration will use the model configured above with the parameters already set. Tune the baseline model above as many times as needed before proceeding.

#### Section 1: Configure EM Test with Annotator

In [None]:
seg_class_map_fp = None
preload = False

In [None]:
# Prepare annotator
annotator = Dynamic_Preloading_Annotator(pairs=15,                   
                                         off_dist=1.5,             
                                         interval=10,             
                                         min_p=1e-02,                   
                                         L=0.02,                        
                                         weight_buffer=4,   
                                         normalize_full=normalize_full, 
                                         )

# Preload Candidate Data 
print("PRELOADING")
initial_pmap = kt.Get_Pmap(source_raster=train_raster, pmodel=model, pmap_fp=None)
all_data = annotator.preload_candidates(imperfect_labels, initial_pmap)
    
        
# Prepare performance storage
em_dict = {
    'Name': ['Base'],
    'Test_Data': [test_rpt],
    'Train_Data': [train_rpt],
    'Val_Data': [val_rpt],
    'Line_IoU': [np.round((source_iou*100), 2)],
    'Epochs': [epochs_used],
}

# Prepare EM iterator index.
EM_iterator = 0

# Prepare storage for top F1 and IoU
top_f1, top_f1_idx = 0, 0
top_iou, top_iou_idx = 0, 0


In [None]:
buff_dist = 4

#### Run EM Iteration

In [None]:
print("Beginning Iteration, Target steps:", em_target)

while EM_iterator < em_target: 
    
    # 0. Initialization
    # ---------------------------

    # Create folder for em step
    emfolder = os.path.join(test_dir, 'step_{:02}'.format(EM_iterator))
    if not os.path.exists(emfolder): os.mkdir(emfolder)

    print("\nEM Step {:02} begun.".format(EM_iterator))
    print(dt.now().strftime('%a at %I:%M:%S%p'))

    
    # 1. Update Annotations
    # ---------------------------

    # Get predicted class map 
    if (EM_iterator == 0):
        pmap_fp = os.path.join(emfolder, 'pmap_baseline.tif')
    else:
        pmap_fp = os.path.join(emfolder, 'pmap_{:02}.tif'.format(EM_iterator))
    predicted_class_map = kt.Get_Pmap(train_raster, model, pmap_fp)
    print("\nGenerated Predicted Class and Intermediate Feature Maps from previous model. (Step {:02})".format(EM_iterator))
    print(dt.now().strftime('%a at %I:%M:%S%p'))
    

    # Update annotation
    annotation_fp = os.path.join(emfolder, 'annotation_{:02}.shp'.format(EM_iterator))
    new_annotation = annotator.update_gdf_from_preload(all_data, class_map=predicted_class_map, out_path=annotation_fp) 

    print("\nCreated New Annotation. (Step {:02})".format(EM_iterator))
    print(dt.now().strftime('%a at %I:%M:%S%p'))

    # Generate and save all considered point groups
    candidate_fp = os.path.join(emfolder, 'candidates_{:02}.shp'.format(EM_iterator))
    annotator.get_candidates(imperfect_labels, class_map=predicted_class_map, out_path=candidate_fp)
    
    # Save iou for this annotation.
    anno_iou = gt.gdf_iou(gt_labels, new_annotation)


    # 2. Create new Label Tensors
    # ---------------------------
    
    # 2.1 Rasterize New Labels
    buff_anno = gt.gdf_buffer(new_annotation, buff_dist=buff_dist, flatten=True)
    anno_raster_fp = os.path.join(emfolder, 'rasterized_annotation_{:02}.tif'.format(EM_iterator))
    anno_raster = gt.GDF_Rasterize(buff_anno, train_raster, out_path=anno_raster_fp)
    
    # Read Y_train, Y_val tensors from rasterized label
    Y_train = tile.ResampleTiles(anno_raster, train_offsets_fp)
    Y_val = tile.ResampleTiles(anno_raster, val_offsets_fp)
    
    # Upsample label tensors to match shape
    Y_train = tile.AugmentImages(Y_train, h_flip=False, v_flip=True, rotate=True)
    Y_val = tile.AugmentImages(Y_val, h_flip=False, v_flip=True, rotate=True)
    
    print("\nCreated Y_train {} and Y_val {}. (Step {:02})".format(Y_train.shape, Y_val.shape, EM_iterator))
    print(dt.now().strftime('%a at %I:%M:%S%p'))
    
    
    # 3. Re-Train U-Net 
    # ---------------------------
    
    # Load Callbacks 
    callbacks =  kt.SetCallbacks(weights_out=os.path.join(emfolder, 'unet_weights_{:02}.h5'.format(EM_iterator)), 
                                 tensorboard_path=os.path.join(emfolder, 'tensorboard_{:02}'.format(EM_iterator)))
    
    # Re-compile and Train Model
    model = kt.Get_Model('UNET')
    model.compile(optimizer=optimizer, 
                  loss=kt.dice_coef_loss, 
                  metrics=[kt.dice_coef,'accuracy', kt.f1_score])
    training_history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), shuffle=True, batch_size=batch_size, epochs=epochs, callbacks=callbacks, verbose=0)
    print("Completed model Training. (Step {:02})".format(EM_iterator))
    print(dt.now().strftime('%a at %I:%M:%S%p'))
    

    # 4. Evaluate Model
    # ---------------------------

    # Save History Plots and CSV
    doc.plot_history(training_history, test_dir=emfolder, config_idx=EM_iterator)
    
    hist_markdown_fp = os.path.join(emfolder, 'history_{:02}.md'.format(EM_iterator))
    with open(hist_markdown_fp, 'w+') as hist_md:
        test_rpt = kt.ModelReport(X_test, Y_test, model, "Testing", index=(EM_iterator+1), report_md=hist_md)
        train_rpt = kt.ModelReport(X_train, Y_train, model, "Training", index=(EM_iterator+1), report_md=hist_md)
        val_rpt = kt.ModelReport(X_val, Y_val, model, "Validation", index=(EM_iterator+1), report_md=hist_md)
    
    # Record performance
    em_dict['Name'].append('Step {:02}'.format(EM_iterator))
    em_dict['Line_IoU'].append(np.round((anno_iou*100), 2))
    em_dict['Epochs'].append(len(training_history.history['accuracy']))
    em_dict['Test_Data'].append(test_rpt)
    em_dict['Train_Data'].append(train_rpt)
    em_dict['Val_Data'].append(val_rpt)
    
    
    # Update top F1 and Annotation IoU
    if test_rpt['F1_Score'] > top_f1:
        top_f1 = test_rpt['F1_Score']
        top_f1_idx = EM_iterator
        print("\nNew Top F1: {:.2f}".format(top_f1*100))
    if np.round((anno_iou*100), 2) > top_iou:
        top_iou = np.round((anno_iou*100), 2)
        top_iou_idx = EM_iterator
        print("\nNew Top IoU: {:.2f}".format(top_iou))
    
    # Print step data
    print("\nEM Step ({:02}) Complete on {}".format(EM_iterator, dt.now().strftime('%a at %I:%M:%S%p')))
    print('- Annotation IoU:     {:.2f}'.format(anno_iou*100))
    print('  - Source Improvement: {:+.2f}'.format((anno_iou-source_iou)*100))
    print('  - Step Improvement:   {:+.2f}'.format((anno_iou-prev_iou)*100))
    print('- Model Performance: (trained on new labels)')
    print('  - Training:')
    kt.PrintReport(train_rpt)
    print('  - Validation:')
    kt.PrintReport(val_rpt)
    print('  - Testing:')
    kt.PrintReport(test_rpt)
    print("----------------------------------\n\n")
    
    # Increase iterator and save previous precision for step_delta
    EM_iterator += 1
    prev_iou = anno_iou
 
# Increase EM target for optional subsequent runs
em_target += 1

### Create Plots

In [None]:
# Clean data for easy indexing

## Converts each of the report lists into a dict of lists for each value
model_dict = {'Test_Data': {}, 'Train_Data': {}, 'Val_Data': {}}
for em_key in model_dict.keys():
    for report in em_dict[em_key]:
        for rpt_key, rpt_value in [(key, item) for key, item in report.items()]:
            if rpt_key not in model_dict[em_key].keys():
                model_dict[em_key].update({rpt_key: np.array([rpt_value])})
            else:
                model_dict[em_key][rpt_key] = np.append(model_dict[em_key][rpt_key], report[rpt_key])


# Create Figure for Plots
fig, axs = plt.subplots(2, 2, sharex=True, figsize=(16,10))

## Plot Testing F1
doc.plot_axis(ax=axs[0,0], 
              data=model_dict['Test_Data']['F1_Score']*100, 
              name='Testing F1', 
              color_char='r', 
              symbol_char='s', 
              y_off=2,
              label_delta=False)

## Plot Training F1
doc.plot_axis(ax=axs[0,1], 
              data=model_dict['Train_Data']['F1_Score']*100, 
              name='Training F1', 
              color_char='g', 
              symbol_char='^', 
              y_off=2,
              label_delta=False)

## Plot Validation F1
doc.plot_axis(ax=axs[1,0], 
              data=model_dict['Val_Data']['F1_Score']*100, 
              name='Validation F1', 
              color_char='c', 
              symbol_char='^', 
              y_off=2,
              label_delta=False)

## Plot Annotation IoU
doc.plot_axis(ax=axs[1,1], 
              data=em_dict['Line_IoU'], 
              name='Line IoU', 
              color_char='m', 
              x_label='EM Step',
              label_delta=False)
    
## Title and show, and save figure
fig.suptitle("EM Test {:02}".format(test_idx))
fig_path = os.path.join(test_dir, 'test_{:02}_plot.png'.format(test_idx))
fig.savefig(fig_path)
fig.show()


### Save Info to Markdown

Saves:
- Annotator Config
- Model Config
- EM preformance table
- Model preformance table

In [None]:
### Write test data to markdown
markdown_fp = os.path.join(test_dir, 'em_test_info_{:02}.md'.format(test_idx))
md = open(markdown_fp, 'w+')

# Header / Notes
md.write("# Geometric Annotation Errors - EM Test {:02}\n".format(test_idx))
md.write(dt.now().strftime('### %a at %I:%M:%S%p\n'))
md.write("\n---\n\n")


# Results Section
md.write("## **Results**:\n\n")
# Write best scores 
top_f1 = np.round((top_f1*100), 2)
source_f1 = np.round((em_dict['Test_Data'][0]['F1_Score']*100), 2)
source_iou = np.round(source_iou*100, 2)
md.write("### Top Values:\n")
md.write(f" - Testing F1 Score: **{top_f1}** (`{top_f1-source_f1}`) - Step {top_f1_idx}\n")
md.write(f" - Annotation IoU: **{top_iou}** (`{top_iou-source_iou}`) - Step {top_iou_idx}\n\n")

# Write results by EM step
md.write("### EM Iteration:\n")
md.write("Step | Anno IoU | F1 | Epochs | LR | Train | Update\n")
md.write("---- | -------- | -- | ------ | -- | ----- | ------\n")
for idx in range(em_target):
    if idx == 0:
        md.write("{} | {} | {} | {} | {} | {} | {}\n".format(em_dict['Name'][idx], em_dict['Line_IoU'][idx], em_dict['Test_Data'][idx]['F1_Score'], em_dict['Epochs'][idx], em_dict['LR'][idx], em_dict['Training_Time'][idx], em_dict['Update_Time'][idx]))
    else:
        md.write("{} | {} (`{:+.2f}`) | {} | {} | {} | {} | {}\n".format(em_dict['Name'][idx], em_dict['Line_IoU'][idx], (em_dict['Line_IoU'][idx] - em_dict['Line_IoU'][0]), em_dict['Test_Data'][idx]['F1_Score'], em_dict['Epochs'][idx], em_dict['LR'][idx], em_dict['Training_Time'][idx], em_dict['Update_Time'][idx]))
md.write("\n\n</br>\n\n")
md.write("### Model Performance:\n\n")
md.write("Step | Test F1 | Test (FP, FN) | Train F1 | Train (FP, FN) | Val F1 | Val (FP, FN) \n")
md.write("---- | ------- | ------------- | -------- | -------------- | ------ | ------------ \n")
for idx in range(em_target):
    # Create a string to hold this row's data
    row_string = f"{em_dict['Name'][idx]} | "
    for key in ['Test_Data', 'Train_Data', 'Val_Data']:
        row_string += "{:.2f} | ({:.2e}, {:.2e}) | ".format(em_dict[key][idx]['F1_Score']*100, em_dict[key][idx]['False_Positives'], em_dict[key][idx]['False_Negatives'])
    md.write(row_string)
    
# Close Markdown
md.close()

print(f"Results written to {markdown_fp}.")