# Model Metrics workflow notebook

In [None]:
import sys, os
from pathlib import Path
from pyprojroot import here
# spyder up to find the root
root = here(project_files=[".here"])
# append to path
sys.path.append(str(here()))

# %load_ext autoreload
# %autoreload 2

## Step 1: Get config

In [2]:
from src.models.config_setup import get_default_config

model_type = "linear"
path_to_models = f"gs://ml4cc_data_lake/0_DEV/2_Mart/2_MLModelMart/worldfloods-demo/{model_type}/model.pt"
config_fp = os.path.join(root, 'src', 'models', 'configurations', 'worldfloods_template.json')
config = get_default_config(config_fp)
config["data_params"]

Loaded Config for experiment:  worldfloods_demo_test
{   'data_params': {   'bands': 'all',
                       'batch_size': 32,
                       'bucket_id': 'ml4floods',
                       'image_count': 3,
                       'input_folder': 'S2',
                       'loader_type': 'local',
                       'path_to_splits': 'worldfloods/public',
                       'target_folder': 'gt',
                       'test_transformation': {   'normalize': True,
                                                  'num_classes': 3,
                                                  'totensor': True,
                                                  'use_channels': 'all'},
                       'train_transformation': {   'normalize': True,
                                                   'num_classes': 3,
                                                   'totensor': True,
                                                   'use_channels': 'all'},
              

{'loader_type': 'local',
 'image_count': 3,
 'bucket_id': 'ml4floods',
 'path_to_splits': 'worldfloods/public',
 'input_folder': 'S2',
 'target_folder': 'gt',
 'batch_size': 32,
 'window_size': [256, 256],
 'bands': 'all',
 'train_transformation': {'num_classes': 3,
  'totensor': True,
  'use_channels': 'all',
  'normalize': True},
 'test_transformation': {'num_classes': 3,
  'totensor': True,
  'use_channels': 'all',
  'normalize': True}}

In [3]:
config["data_params"]

{'loader_type': 'local',
 'image_count': 3,
 'bucket_id': 'ml4floods',
 'path_to_splits': 'worldfloods/public',
 'input_folder': 'S2',
 'target_folder': 'gt',
 'batch_size': 32,
 'window_size': [256, 256],
 'bands': 'all',
 'train_transformation': {'num_classes': 3,
  'totensor': True,
  'use_channels': 'all',
  'normalize': True},
 'test_transformation': {'num_classes': 3,
  'totensor': True,
  'use_channels': 'all',
  'normalize': True}}

## Step 2: Load dataloader

In [4]:
from src.models import dataset_setup

# Configure the dataset to load directly from the Google bucket
config["data_params"]["loader_type"] = "bucket"
config["data_params"]["bucket_id"] = "ml4floods"
config["data_params"]["path_to_splits"] = "worldfloods/public/"

# train_transform, test_transform = dataset_setup.get_transformations(config["data_params"])

In [5]:
data_module = dataset_setup.get_dataset(config["data_params"])

Using remote bucket storate dataset for this run
train 196648  tiles
val 1284  tiles
test 11  tiles


In [11]:
dl = data_module.test_dataloader()

In [7]:
# from src.data.worldfloods.dataset import WorldFloodsDataset
# from src.data.utils import get_files_in_bucket_directory
# import torch

# data_params = config["data_params"]
# bucket_id = data_params["bucket_id"]
# input_folder = data_params["input_folder"]
# path_to_splits = data_params["path_to_splits"]
# test_dir = os.path.join(path_to_splits,"test",input_folder)

# test_files = get_files_in_bucket_directory(
#     bucket_id, test_dir, ".tif"
# )

# test_files = [f"gs://{bucket_id}/{x}" for x in test_files]

# test_dataset = WorldFloodsDataset(image_files=test_files,
#                                   image_prefix=input_folder,
#                                   gt_prefix=data_params['target_folder'],
#                                   lock_read=True,
#                                   transforms=test_transform)

# print(f"The dataset has: {len(test_dataset)} images")
# dl = torch.utils.data.DataLoader(test_dataset, batch_size=1,num_workers=0)

## Load a pre-trained model

In [8]:
model_type = config.model_params.hyperparameters.model_type
print('Model type: ', model_type)
path_to_models = f"gs://ml4cc_data_lake/0_DEV/2_Mart/2_MLModelMart/worldfloods-demo/{model_type}/model.pt"

Model type:  linear


In [9]:
from pytorch_lightning.utilities.cloud_io import load
from src.models.model_setup import get_model

model = get_model(config.model_params)
model.load_state_dict(load(path_to_models))

13 3


<All keys matched successfully>

In [10]:
from src.models.model_setup import get_model_inference_function
inference_function = get_model_inference_function(model, config,apply_normalization=False)

In [None]:
import torch
import numpy as np
import pandas as pd
from src.models.utils import metrics

# Otherwise fails when reading test dataset from remote bucket
torch.set_num_threads(1)

mets = metrics.compute_metrics(
    dl,
    inference_function, 
    config.model_params.hyperparameters.num_classes, 
    label_names=config.model_params.hyperparameters.label_names, 
    thresholds_water=np.arange(0, 1, .05), 
    plot=False)
metrics.plot_metrics(mets, config.model_params.hyperparameters.label_names)

 55%|█████▍    | 6/11 [02:05<03:01, 36.23s/it]

In [None]:
metrics.calculate_iou(np.array(mets["confusions"]).transpose(0,1,2),
                      config.model_params.hyperparameters.label_names)

In [None]:
# mets = metrics.compute_metrics(dl, inference_func2, opt2.num_class, label_names, thresholds_water=np.arange(0, 1, .05), plot=True)
# metrics.plot_metrics(mets, label_names)