# Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

import copy
import functools
import gc
import itertools
import logging
import operator
import os
import pathlib
import re
import socket
import sys
import time
from collections import Counter
from dataclasses import asdict, dataclass, field
from enum import Enum
from functools import partial
from pathlib import Path
from pprint import PrettyPrinter, pprint
from typing import *

In [3]:
%autoreload 2

import humanize
import matplotlib
import numpy as np
import pandas as pd
import scipy as sp
import tensorflow as tf
import yaml
from matplotlib import cm, patches, pyplot as plt
from numpy import ndarray
from numpy.random import RandomState
from progressbar import progressbar as pbar
from pymicro.file import file_utils
from sklearn import metrics, metrics as met, model_selection, preprocessing
from skimage import measure as skimage_measure
import tabulate
from tensorflow import keras
from tensorflow.keras import (
    callbacks as keras_callbacks,
    layers,
    losses,
    metrics as keras_metrics,
    optimizers,
    utils,
)
from tqdm import tqdm
from yaml import YAMLObject

In [7]:
%autoreload 2

from tomo2seg import (
    analyse as tomo2seg_analyse,
    callbacks as tomo2seg_callbacks,
    data as tomo2seg_data,
    datasets as tomo2seg_datasets,
    hosts,
    losses as tomo2seg_losses,
    schedule as tomo2seg_schedule,
    slack,
    slackme,
    utils as tomo2seg_utils,
    viz as tomo2seg_viz,
    volume_sequence,
)
from tomo2seg.data import EstimationVolume, Volume
from tomo2seg.logger import add_file_handler, dict2str, logger
from tomo2seg.model import Model as Tomo2SegModel

In [5]:
# this registers a custom exception handler for the whole current notebook
get_ipython().set_custom_exc((Exception,), slackme.custom_exc)

In [13]:
logger.setLevel(logging.DEBUG)


# Host

In [6]:
this_host = hosts.hosts[socket.gethostname()]

# Args

In [12]:
# [manual-input]

@dataclass
class Args:
    this_nb_name: str
    volume_name: str
    volume_version: str
    partition: str  # its alias...
    
    random_state_seed: int = 42
    runid: int = field(default_factory=lambda: int(time.time()))

args = Args(
    this_nb_name = "compare-models-00.ipynb",
    volume_name=tomo2seg_datasets.VOLUME_COMPOSITE_V1[0],
    volume_version=tomo2seg_datasets.VOLUME_COMPOSITE_V1[1],
    partition="test",
)

In [14]:
logger.debug(f"args\n{dict2str(asdict(args))}")

DEBUG::tomo2seg::{<ipython-input-14-d1573204856a>:<module>:001}::[2020-12-16::23:18:13.451]
args
{   'partition': 'test',
    'random_state_seed': 42,
    'runid': 1608157068,
    'this_nb_name': 'compare-models-00.ipynb',
    'volume_name': 'PA66GF30',
    'volume_version': 'v1'}



# estimation volumes

In [22]:
volume_fullname = tomo2seg_data.Volume.name_pieces2fullname(name=args.volume_name, version=args.volume_version)

logger.debug(f"{volume_fullname=}")

DEBUG::tomo2seg::{<ipython-input-22-7c2dbe905f3c>:<module>:003}::[2020-12-16::23:32:42.834]
volume_fullname='PA66GF30.v1'



In [116]:
datadir_paths = [
    tomo2seg_data.data_dir / name
    for name in os.listdir(tomo2seg_data.data_dir)
]

estimation_volumes = []

for path in datadir_paths:
    
    try:
        ev = tomo2seg_data.EstimationVolume.from_fullname(path.name)
    
    except ValueError as ex:
        
        if "not an estimation volume" not in ex.args[0]:
            raise ex
            
        continue
    
    if ev.volume_fullname == volume_fullname and ev.partition.alias == args.partition:
        estimation_volumes.append(ev)
        
logger.info(f"{len(estimation_volumes)=}")

all_estimation_volumes = copy.deepcopy(estimation_volumes)

INFO::tomo2seg::{data.py:from_fullname:494}::[2020-12-17::09:21:12.905]
Creating volume object to get partition dimensions.

DEBUG::tomo2seg::{data.py:metadata:180}::[2020-12-17::09:21:12.908]
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

INFO::tomo2seg::{data.py:from_fullname:494}::[2020-12-17::09:21:12.920]
Creating volume object to get partition dimensions.

DEBUG::tomo2seg::{data.py:metadata:180}::[2020-12-17::09:21:12.921]
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

INFO::tomo2seg::{data.py:from_fullname:494}::[2020-12-17::09:21:12.931]
Creating volume object to get partition dimensions.

DEBUG::tomo2seg::{data.py:metadata:180}::[2020-12-17::09:21:12.932]
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

ERROR::tomo2seg::{data.py:from_fullname:457}::[2020-12-17::09:21:12.942]
not enough values 

# models

In [36]:
for ev in sorted(estimation_volumes, key=lambda x: x.model_name):
    print(f'"{ev.model_name}",')

"unet2d-sep.vanilla03-f16.fold000.1606-575-226",
"unet2d.crop112-f16.fold000.1607-533-765",
"unet2d.crop48-f16.fold000.1607-530-580",
"unet2d.vanilla02-f08.fold000.1606-431-664",
"unet2d.vanilla02-f16.fold000.1606-461-820",
"unet2d.vanilla03-f16.fold000.1606-505-109",
"unet2halfd-sep.crop112-f16.fold000.1607-789-290",
"unet2halfd-sep.vanilla03-f16.fold000.1606-729-672",
"unet2halfd.crop112-f16.fold000.1607-788-628",
"unet2halfd.vanilla03-f16.fold000.1606-683-705",
"unet3d.crop112-f12.fold000.1607-466-349",
"unet3d.crop304-f16.fold000.1607-790-699",
"unet3d.crop96-f08.fold000.1607-109-265",
"unet3d.crop96-f08.fold000.1607-109-265",
"unet3d.vanilla03-f08.fold000.1606-842-005",
"unet3d.vanilla03-f16.fold000.1606-750-939",


In [117]:
# [manual-input]
models_to_compare = [
    "unet2d-sep.vanilla03-f16.fold000.1606-575-226",
    "unet2d.vanilla03-f16.fold000.1606-505-109",
    "unet2halfd-sep.vanilla03-f16.fold000.1606-729-672",
    "unet2halfd.vanilla03-f16.fold000.1606-683-705",
    "unet3d.vanilla03-f08.fold000.1606-842-005",
    "unet3d.vanilla03-f16.fold000.1606-750-939",
]

estimation_volumes = [
    ev
    for ev in estimation_volumes 
    if ev.model_name in models_to_compare
]

In [40]:
len(estimation_volumes)

6

In [89]:
def get_nparams(model_name):
    t2s_model = Tomo2SegModel.build_from_model_name(ev.model_name)
    summary = t2s_model.summary_path.read_text()
    trainable_params_line = summary.split("\n")[-4]
    trainable_params_str = trainable_params_line.split(" ")[-1]
    return int("".join(trainable_params_str.split(",")))

In [111]:
def get_records(estimation_volumes, metrics=("jaccard", "f1", )):
    
    records = []

    metric_cols = {}

    for ev in estimation_volumes:

        record = {}

        record["model"] = ev.model_name
        record["nparams"] = get_nparams(ev.model_name)

        ev_classif_report = pd.read_csv(ev.classification_report_table_exact_csv_path).set_index("class/average")

        for m in metrics:

            mcols = metric_cols[m] = []

            for row in ev_classif_report.index:
                col = f"{m}.{row}"
                mcols.append(col)
                record[col] = ev_classif_report.loc[row][m]

        records.append(record)
        
    return records
    

In [112]:
df = pd.DataFrame.from_records(get_records(estimation_volumes)).set_index("model")    

In [113]:
df.sort_index()

Unnamed: 0_level_0,nparams,jaccard.matrix,jaccard.fiber,jaccard.porosity,jaccard.macro,jaccard.micro,f1.matrix,f1.fiber,f1.porosity,f1.macro,f1.micro
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
unet2d-sep.vanilla03-f16.fold000.1606-575-226,13340067,0.987656,0.952714,0.660743,0.867037,,0.99379,0.975784,0.79572,0.921765,0.989755
unet2d.vanilla03-f16.fold000.1606-505-109,13340067,0.988335,0.955137,0.686747,0.876739,,0.994133,0.977054,0.814286,0.928491,0.990322
unet2halfd-sep.vanilla03-f16.fold000.1606-729-672,13340067,0.987035,0.9509,0.650163,0.862699,,0.993475,0.974832,0.787998,0.918768,0.989245
unet2halfd.vanilla03-f16.fold000.1606-683-705,13340067,0.98791,0.953669,0.669623,0.870401,,0.993918,0.976285,0.802125,0.92411,0.989969
unet3d.vanilla03-f08.fold000.1606-842-005,13340067,0.986341,0.949016,0.586432,0.840596,,0.993123,0.973841,0.73931,0.902091,0.988651
unet3d.vanilla03-f16.fold000.1606-750-939,13340067,0.986356,0.949471,0.591205,0.842344,,0.993131,0.974081,0.743091,0.903434,0.988669


In [114]:
df.to_csv(sys.stdout)

model,nparams,jaccard.matrix,jaccard.fiber,jaccard.porosity,jaccard.macro,jaccard.micro,f1.matrix,f1.fiber,f1.porosity,f1.macro,f1.micro
unet2halfd-sep.vanilla03-f16.fold000.1606-729-672,13340067,0.9870350222354484,0.950899642588346,0.6501625971163105,0.8626990873133683,,0.9934752142667492,0.9748319409467366,0.7879982230266055,0.9187684594133638,0.989244820019724
unet3d.vanilla03-f16.fold000.1606-750-939,13340067,0.9863557720260464,0.9494708327165444,0.5912049598174871,0.8423438548533593,,0.9931310250831668,0.974080572822398,0.7430908962039672,0.9034341647031772,0.9886687475345168
unet3d.vanilla03-f08.fold000.1606-842-005,13340067,0.986340535475514,0.9490160010922576,0.5864322167416625,0.8405962511031447,,0.9931233017297229,0.9738411593957308,0.7393095154687699,0.902091325531408,0.9886514990138068
unet2d-sep.vanilla03-f16.fold000.1606-575-226,13340067,0.9876558877412529,0.9527135905417172,0.660742905588907,0.8670374612906256,,0.9937896130135612,0.975784257513584,0.7957196786634528,0.92

In [115]:
df.sort_values("jaccard.macro")[metric_cols["jaccard"]]

Unnamed: 0_level_0,jaccard.matrix,jaccard.fiber,jaccard.porosity,jaccard.macro,jaccard.micro
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
unet3d.vanilla03-f08.fold000.1606-842-005,0.986341,0.949016,0.586432,0.840596,
unet3d.vanilla03-f16.fold000.1606-750-939,0.986356,0.949471,0.591205,0.842344,
unet2halfd-sep.vanilla03-f16.fold000.1606-729-672,0.987035,0.9509,0.650163,0.862699,
unet2d-sep.vanilla03-f16.fold000.1606-575-226,0.987656,0.952714,0.660743,0.867037,
unet2halfd.vanilla03-f16.fold000.1606-683-705,0.98791,0.953669,0.669623,0.870401,
unet2d.vanilla03-f16.fold000.1606-505-109,0.988335,0.955137,0.686747,0.876739,


In [95]:
df.sort_values("f1.macro")[metric_cols["f1"]]

Unnamed: 0_level_0,f1.matrix,f1.fiber,f1.porosity,f1.macro,f1.micro
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
unet3d.vanilla03-f08.fold000.1606-842-005,0.993123,0.973841,0.73931,0.902091,0.988651
unet3d.vanilla03-f16.fold000.1606-750-939,0.993131,0.974081,0.743091,0.903434,0.988669
unet2halfd-sep.vanilla03-f16.fold000.1606-729-672,0.993475,0.974832,0.787998,0.918768,0.989245
unet2d-sep.vanilla03-f16.fold000.1606-575-226,0.99379,0.975784,0.79572,0.921765,0.989755
unet2halfd.vanilla03-f16.fold000.1606-683-705,0.993918,0.976285,0.802125,0.92411,0.989969
unet2d.vanilla03-f16.fold000.1606-505-109,0.994133,0.977054,0.814286,0.928491,0.990322


# different crop sizes

In [119]:
# [manual-input]
models_to_compare = [
    "unet2d.crop48-f16.fold000.1607-530-580",
    "unet2d.crop112-f16.fold000.1607-533-765",
    "unet2d.vanilla03-f16.fold000.1606-505-109",

    "unet2halfd-sep.crop112-f16.fold000.1607-789-290",
    "unet2halfd-sep.vanilla03-f16.fold000.1606-729-672",

    "unet2halfd.crop112-f16.fold000.1607-788-628",
    "unet2halfd.vanilla03-f16.fold000.1606-683-705",
    
    "unet3d.crop96-f08.fold000.1607-109-265",
    "unet3d.crop112-f12.fold000.1607-466-349",
    "unet3d.crop304-f16.fold000.1607-790-699",
    "unet3d.vanilla03-f08.fold000.1606-842-005",
    "unet3d.vanilla03-f16.fold000.1606-750-939",
]

estimation_volumes_crops = [
    ev
    for ev in all_estimation_volumes 
    if ev.model_name in models_to_compare
] 

df_crops = pd.DataFrame.from_records(get_records(estimation_volumes_crops)).set_index("model")    

In [124]:
df_crops.sort_index().loc[[
    "unet3d.crop304-f16.fold000.1607-790-699",
    "unet3d.vanilla03-f16.fold000.1606-750-939",
    "unet2d.vanilla03-f16.fold000.1606-505-109",
]]

Unnamed: 0_level_0,nparams,jaccard.matrix,jaccard.fiber,jaccard.porosity,jaccard.macro,jaccard.micro,f1.matrix,f1.fiber,f1.porosity,f1.macro,f1.micro
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
unet3d.crop304-f16.fold000.1607-790-699,13340067,0.98764,0.952945,0.658634,0.866406,,0.993782,0.975906,0.794188,0.921292,0.989741
unet3d.vanilla03-f16.fold000.1606-750-939,13340067,0.986356,0.949471,0.591205,0.842344,,0.993131,0.974081,0.743091,0.903434,0.988669
unet2d.vanilla03-f16.fold000.1606-505-109,13340067,0.988335,0.955137,0.686747,0.876739,,0.994133,0.977054,0.814286,0.928491,0.990322


In [123]:
df_crops.to_csv(sys.stdout)

model,nparams,jaccard.matrix,jaccard.fiber,jaccard.porosity,jaccard.macro,jaccard.micro,f1.matrix,f1.fiber,f1.porosity,f1.macro,f1.micro
unet2d.crop112-f16.fold000.1607-533-765,13340067,0.9876047829580394,0.9527990281306836,0.6509558367538262,0.8637865492808497,,0.9937637415907636,0.9758290683324952,0.7885805571077673,0.9193911223436754,0.9897086242603549
unet2halfd-sep.vanilla03-f16.fold000.1606-729-672,13340067,0.9870350222354484,0.950899642588346,0.6501625971163105,0.8626990873133683,,0.9934752142667492,0.9748319409467366,0.7879982230266055,0.9187684594133638,0.989244820019724
unet3d.crop304-f16.fold000.1607-790-699,13340067,0.987640285337652,0.9529449198629022,0.658633673842883,0.866406293014479,,0.9937817145519124,0.975905577439224,0.7941882336403997,0.9212918418771786,0.9897406410256412
unet3d.vanilla03-f16.fold000.1606-750-939,13340067,0.9863557720260464,0.9494708327165444,0.5912049598174871,0.8423438548533593,,0.9931310250831668,0.974080572822398,0.7430908962039672,0.9034341647

# training histories

In [109]:
def get_model_history(model_name):
    t2s_model = Tomo2SegModel.build_from_model_name(model_name)
    return pd.read_csv(t2s_model.history_path).set_index("epoch")

In [110]:
get_model_history(ev.model_name)

Unnamed: 0_level_0,loss,val_loss,lr,train.batch_size,train.epoch_size,train.crop_shape,val.batch_size,val.epoch_size,val.crop_shape,seconds
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.353225,0.688173,0.000100,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",395.463511
1,0.147847,0.670125,0.000129,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",64.422751
2,0.077742,0.627553,0.000167,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",64.768077
3,0.048210,0.553276,0.000215,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",64.652122
4,0.038719,0.459207,0.000278,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",65.125528
...,...,...,...,...,...,...,...,...,...,...
195,0.014503,0.016770,0.000100,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",60.588666
196,0.014672,0.016732,0.000100,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",61.497682
197,0.014722,0.016769,0.000100,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",61.133355
198,0.015059,0.016695,0.000100,16,10,"(144, 144, 5)",16,1200,"(144, 144, 5)",65.960171
