# Analyze Classification Results

This notebook is meant to be run after evaluating a trained classifier model. Copy this notebook to the specific directory where the classifier model logs were saved:

```
CameraTraps/
    classification/
        BASE_LOGDIR/
            classification_ds.csv
            label_index.json
            LOGDIR/
                analyze_classification_results.ipynb  # COPY THIS NOTEBOOK TO HERE

                # files created by train_classifier.py
                ckpt_XX.pt
                events.out.tfevents...
                params.json

                # files created by evaluate_classifier.py
                confusion_matrices.npz
                label_stats.csv
                outputs_{split}.csv.gz
                overall_metrics.csv
```

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
!pwd

## Imports and Constants

In [None]:
import os
import json
from typing import Optional, Sequence

from IPython.display import Image
import matplotlib.figure
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
import sklearn.metrics
from tqdm.auto import tqdm

from visualization import plot_utils


SPLITS = ['train', 'val', 'test']

## Plot Confusion Matrices

In [None]:
with open('../label_index.json', 'r') as f:
    idx_to_label = json.load(f)
label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]

# in the case of analyzing MegaClassifier's outputs,
# we may need to add an extra 'other' category
# label_names_no_other = list(label_names)
# label_names.append('other')

cms = np.load('confusion_matrices.npz')
for split in SPLITS:
    if split not in cms:
        print(f'Split {split} not found in confusion matrices npz file.')
        continue
    print(split)
    fig = plot_utils.plot_confusion_matrix(cms[split], classes=label_names, normalize=True, fmt='{:.1f}')
    fig.set_facecolor('white')
    display(fig)

## Load model outputs

In [None]:
output_df = {}
for split in SPLITS:
    if os.path.exists(f'outputs_{split}.csv.gz'):
        output_df[split] = pd.read_csv(f'outputs_{split}.csv.gz')
        output_df[split]['pred'] = output_df[split][label_names].idxmax(axis=1)

In [None]:
label_stats = pd.read_csv('label_stats.csv')
label_stats.set_index(['split', 'label'], inplace=True)
for split in output_df:
    label_stats.loc[(split, 'mean'), :] = label_stats.loc[split, :].mean()

    # for MegaClassifier, we might want to take the mean excluding the "other" category
    # label_stats.loc[(split, 'mean (excluding other)'), :] = (
    #     label_stats.loc[(split, label_names_no_other), :].mean()
    # )
display(label_stats.unstack('split'))

In [None]:
test_perf = label_stats.loc['test', :].copy()
test_perf['count'] = output_df['test'].groupby('label').size()
display(test_perf)

## Plot classifier calibration on test set

In [None]:
ncols = 3
nrows = int(np.ceil(len(label_names) / ncols))
fig = matplotlib.figure.Figure(figsize=(ncols * 5, nrows * 5), tight_layout=True, facecolor='white')
axs = fig.subplots(nrows, ncols, squeeze=False)
for i, label_name in enumerate(label_names):
    r, c = i // ncols, i % ncols
    ax = axs[r, c]
    mask = output_df['test']['pred'] == label_name
    plot_utils.plot_calibration_curve(
        true_scores=(output_df['test'].loc[mask, 'label'] == output_df['test'].loc[mask, 'pred']),
        pred_scores=output_df['test'].loc[mask, label_name],
        num_bins=20, name=label_name, ax=ax)
    ax.legend()

# hide unused axes
for i in range(len(label_names), nrows * ncols):
    ax = axs[i // ncols, i % ncols]
    ax.set_axis_off()

display(fig)

Plot confusion matrix for test set if we set prediction confidence threshold at 0.99

In [None]:
test_df = output_df['test']
test_df['pred_conf'] = test_df.lookup(
    row_labels=range(len(test_df)), col_labels=test_df['pred'])

mask = test_df['pred_conf'] >= 0.99
cm = sklearn.metrics.confusion_matrix(
    y_true=test_df.loc[mask, 'label'],
    y_pred=test_df.loc[mask, 'pred'],
    labels=label_names)

fig = plot_utils.plot_confusion_matrix(cm, classes=label_names, normalize=True, fmt='{:.1f}')
fig.set_facecolor('white')
display(fig)

## Plot images

In [None]:
def plot_img_grid(paths: Sequence[str], ncols: int, size: float) -> matplotlib.figure.Figure:
    """Plot a grid of square images.

    Args:
        paths: list of str, paths to image crops
        ncols: int, number of columns for output figure
        size: float, size (in inches) of each row/column

    Returns: matplotlib Figure
    """
    DPI = 113
    nrows = int(np.ceil(len(paths) / ncols))
    fig = matplotlib.figure.Figure(figsize=(ncols * size / DPI, nrows * size / DPI))
    axs = fig.subplots(nrows, ncols, squeeze=False)
    for i, path in tqdm(enumerate(paths)):
        r, c = i // ncols, i % ncols
        ax = axs[r, c]
        img = mpimg.imread(path)
        ax.imshow(img)
    for r in range(nrows):
        for c in range(ncols):
            axs[r, c].set_axis_off()
            axs[r, c].set_aspect('equal')
    fig.subplots_adjust(wspace=0, hspace=0)
    return fig

def plot_images_groupby_pred(df: pd.DataFrame, ncols: int, size: int,
                             cropped_images_dir: str = '/ssd/crops_sq',
                             count: Optional[int] = None) -> None:
    """Creates one figure for each prediction.

    Args:
        df: pd.DataFrame, classifier output dataframe, all examples belong
            to the same label
        ncols: int, number of columns for output figure
        size: float, size (in inches) of each row/column
        cropped_images_dir: str, path to cropped images
        count: optional int, limit on number of images to show for each label
    """
    for name, pred_df in df.groupby('pred'):
        print(name)
        if count is not None and len(pred_df) > count:
            print(f'Original count: {len(pred_df)}. Sampling {count}')
            pred_df = pred_df.sample(count)
        image_crop_paths = pred_df['path'].map(lambda x: os.path.join(cropped_images_dir, x))
        fig = plot_img_grid(image_crop_paths, ncols=ncols, size=size)
        display(fig)

In [None]:
# plot all pronghorn images from test set, and show top predicted label
label = 'pronghorn'
df = output_df['test']
plot_images_groupby_pred(df.loc[df['label'] == label], ncols=8, size=224, count=40)