### Model evaluation
Evaluates the final models on the test sets of each study area, calculating the model accuracy metrics presented in the paper.

#### Input:
* *data_path_sa*: Matching image pairs (256 $\times$ 256 pixels) of KH-9 image and crater label split into training, validation and test sets for each study area (hdf5 files)
* *model_path_sa*: Best models for each study area after fine-tuning

#### Parameters:
* *crater_ids*: Integers that represent craters in the labelled image tiles
* *crater_classes*: Names of the crater classes
* *min_crater_area*: Minimum area (in pixels) of a crater in the labelled image tiles (smaller craters are removed)
* *batch_size*: Batch size used during model prediction

#### Outputs:
* *pixel_eval_path*: Pixel evaluation results
* *crater_eval_path*: Crater evaluation results

#### Created paper content:
* **Table 1**: Accuracy metrics (F1-score, precision, recall) for each study area and crater class
* **Combined F1-score**: Combined F1-score taking into account all craters from both study areas

In [9]:
import numpy as np
import torch

from torch.utils.data import DataLoader, Dataset
from torchvision.datapoints import Mask
from evaluation import (
    evaluate_crater_accuracy,
    evaluate_pixel_accuracy,
)
from utils import create_dir, load_config, load_data, apply_min_max_scaling

In [10]:
config = load_config("../config.yaml")

# Model accuracy on test set
Evaluate model accuracy on pixel level and resulting accuracy on crater level after post-processing segmentation results for all study areas combined as well as individual study areas 

In [11]:
class CustomDataset(Dataset):
    def __init__(self, images, masks, transform=None):
        self.images = images
        self.masks = masks
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        mask = self.masks[idx]

        if self.transform:
            # set to type Mask to ensure the transform functions know
            # to treat it as a label
            image, mask = self.transform(image, Mask(mask))

        return image, mask
    
def create_dataset(x, y, transform=None):
    x_tensor = torch.FloatTensor(x).permute(0, 3, 1, 2)
    y_tensor = torch.LongTensor(y.argmax(axis=-1))
    dataset = CustomDataset(x_tensor, y_tensor, transform=transform)
    return dataset

def create_dataset_loader(x, y, batch_size, transform, shuffle=True):
    # create training and validation loaders
    dataset = create_dataset(x, y, transform=transform)
    dataset_loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle
      )

    return dataset_loader
    
def pred_val_data(model, data_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    
    pred_list = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            pred_list.append(outputs.cpu().numpy())
    
    # Concatenate the predictions and masks along the batch dimension to get the final result
    pred = np.concatenate(pred_list, axis=0)
    pred = pred.transpose((0, 2, 3, 1))
    
    return pred
    
def evaluate_model_accuracy(data_path, model_path, crater_classes, crater_ids, min_crater_area, batch_size=1, threshold=0.5, return_pred=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    x, y = load_data(data_path, "x_test", "y_test")
    x = apply_min_max_scaling(x)

    test_loader = create_dataset_loader(
        x, y,
        batch_size=batch_size,
        transform=None,
        shuffle=False
    )
    # load the trained model
    model = torch.load(model_path, map_location=device)

    # predict on the validation data
    pred = pred_val_data(model, test_loader)
    if return_pred:
        return pred, y

    res_pixel = evaluate_pixel_accuracy(
        pred, y,
        crater_ids=crater_ids
    )

    res_crater, cm = evaluate_crater_accuracy(
        pred, y,
        crater_classes=crater_classes,
        crater_ids=crater_ids,
        min_crater_area=min_crater_area,
        threshold=threshold,
        plot_cm=False
        )

    return res_pixel, res_crater, cm

In [12]:
for study_area in config.get("study_areas"):
    print(study_area)
    res_pixel, res_crater, cm = evaluate_model_accuracy(
        config.get("data_path_sa").format(study_area=study_area),
        model_path=config.get("model_path_sa").format(study_area=study_area),
        crater_classes=config.get("crater_classes"),
        crater_ids=config.get("crater_ids"),
        min_crater_area=config.get("min_crater_area"),
        batch_size=config.get("batch_size"),
    )
    
    # write out pixel accuracy
    res_pixel.columns = ["background", *config.get("crater_classes"), "boundary", "craters"]
    res_pixels_path = config.get("pixel_eval_path").format(study_area=study_area)
    create_dir(res_pixels_path, is_file=True)
    np.round(res_pixel, 2).to_csv(res_pixels_path)

    # write out crater accuracy
    res_crater = res_crater.drop(0, axis=1)
    res_crater.columns = [*config.get("crater_classes"), "craters"]
    res_crater_path = config.get("crater_eval_path").format(study_area=study_area) 
    create_dir(res_crater_path, is_file=True)
    np.round(res_crater, 2).to_csv(res_crater_path)

quang_tri
Directory already exists: ../outputs/tables/quang_tri
Directory already exists: ../outputs/tables/quang_tri
tri_border_area


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Directory already exists: ../outputs/tables/tri_border_area
Directory already exists: ../outputs/tables/tri_border_area


### Both study areas combined
This is done to provide a combined F1-score across both study areas

In [13]:
pred_qt, y_qt = evaluate_model_accuracy(
    config.get("data_path_sa").format(study_area="quang_tri"),
    model_path=config.get("model_path_sa").format(study_area="quang_tri"),
    crater_classes=config.get("crater_classes"),
    crater_ids=config.get("crater_ids"),
    min_crater_area=config.get("min_crater_area"),
    batch_size=config.get("batch_size"),
    return_pred=True
    )


pred_tba, y_tba = evaluate_model_accuracy(
    config.get("data_path_sa").format(study_area="tri_border_area"),
    model_path=config.get("model_path_sa").format(study_area="tri_border_area"),
    crater_classes=config.get("crater_classes"),
    crater_ids=config.get("crater_ids"),
    min_crater_area=config.get("min_crater_area"),
    batch_size=config.get("batch_size"),
    return_pred=True
    )

pred_comb = np.vstack([pred_qt, pred_tba])
y_comb = np.vstack([y_qt, y_tba])

In [14]:
res_pixel = evaluate_pixel_accuracy(
    pred_comb, y_comb,
    crater_ids=config.get("crater_ids")
)
res_pixel.columns = ["background", *config.get("crater_classes"), "boundary", "craters"]


res_crater, cm = evaluate_crater_accuracy(
    pred_comb, y_comb,
    crater_classes=config.get("crater_classes"),
    crater_ids=config.get("crater_ids"),
    min_crater_area=config.get("min_crater_area"),
    threshold=0.5,
    plot_cm=False
    )
res_crater = res_crater.drop(0, axis=1)
res_crater.columns = [*config.get("crater_classes"), "craters"]


In [15]:
res_pixel

Unnamed: 0,background,pattern,rim,group,crescent,bowl,boundary,craters
Precision,0.998,0.613,0.599,0.446,0.295,0.357,0.435,0.697
Recall,0.998,0.614,0.555,0.071,0.342,0.185,0.431,0.569
F1-Score,0.998,0.614,0.576,0.122,0.317,0.244,0.433,0.626
N,52109340.0,53206.0,73001.0,22127.0,14522.0,28024.0,128577.0,190880.0


In [16]:
res_crater

Unnamed: 0,pattern,rim,group,crescent,bowl,craters
Precision,0.667,0.58,0.333,0.247,0.291,0.67
Recall,0.706,0.491,0.038,0.32,0.143,0.561
F1-Score,0.686,0.532,0.068,0.279,0.192,0.611
N,890.0,503.0,262.0,147.0,224.0,2026.0
