In [1]:
%load_ext autoreload
%autoreload 2

In [79]:
import sys
sys.path.append("../..")
from pathlib import Path

import pandas as pd
import numpy as np
import seaborn as sns
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from src.data import label_to_string, string_to_label, DataPaths
from src.image import get_image_with_id, get_image_from_class, plot_rgby

##### Load in the results file

In [8]:
# ROOT_PATH = Path("results/gapnet_resnet34_20181212-140515/training_logs")
ROOT_PATH = Path("results/gapnet_resnet34_20181213-193008/training_logs")

In [9]:
csv_paths = sorted(ROOT_PATH.glob("*.csv"), key=lambda x: int(x.stem.split("_")[1]))

In [10]:
csv_paths

[PosixPath('results/gapnet_resnet34_20181213-193008/training_logs/epoch_0_train.csv'),
 PosixPath('results/gapnet_resnet34_20181213-193008/training_logs/epoch_0_val.csv')]

In [11]:
string_to_num = lambda x: np.fromstring(x.strip("[]").replace("\n", ""), sep=" ")
def load_combined_df(paths):
    dfs = []
    for path in paths:
        df = pd.read_csv(path)
        df['session'] = path.stem
        df['label'] = df['label'].map(string_to_num)
        df['label_class'] = df["label"].map(lambda x: np.where(x == 1)[0])
        df['label_names'] = df["label_class"].map(lambda x: [label_to_string[l] for l in x])
        df['prediction'] = df['prediction'].map(string_to_num)
        df['prediction_probs'] = df['prediction_probs'].map(string_to_num)
        df['prediction_class'] = df['prediction'].map(lambda x: np.where(x == 1)[0])
        df['prediction_names'] = df['prediction_class'].map(lambda x: [label_to_string[l] for l in x])
        
        dfs.append(df)
    return pd.concat(dfs)

In [12]:
combined_df = load_combined_df(csv_paths)

In [29]:
epoch_0_val_df = combined_df[combined_df['session'] == 'epoch_0_val'].sort_values(['FocalLoss'], ascending=[True])

In [35]:
epoch_0_val_df.describe()

Unnamed: 0,FocalLoss,SoftF1Loss
count,10553.0,10553.0
mean,3.433499,0.564987
std,2.650196,0.249131
min,0.020953,0.010307
25%,1.348639,0.37157
50%,2.840354,0.550553
75%,4.883572,0.77173
max,22.056818,0.999961


In [89]:
from sklearn.metrics import f1_score, precision_recall_fscore_support, precision_score

In [109]:
all_labels = np.stack(epoch_0_val_df.label.values)
all_predictions = np.stack(epoch_0_val_df.prediction.values)

In [139]:
f1_score_df_data = []
for i in range(28):
    (_, precision), (_, recall), (_, f1_score), (_, n_labels)  = precision_recall_fscore_support(all_labels[:,i], all_predictions[:,i])
    data = {
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
        "n_labels": n_labels,
        "class_name": label_to_string[i]
    }
    f1_score_df_data.append(data)

In [148]:
f1_score_df = pd.DataFrame(f1_score_df_data)
f1_score_df = f1_score_df.reindex_axis(['class_name', 'n_labels', 'precision', 'recall', 'f1_score'], axis=1)

In [149]:
f1_score_df

Unnamed: 0,class_name,n_labels,precision,recall,f1_score
0,Nucleoplasm,4096,0.73999,0.730957,0.735446
1,Nuclear membrane,307,0.825243,0.276873,0.414634
2,Nucleoli,1087,0.721284,0.392824,0.508636
3,Nucleoli fibrillar center,333,0.5,0.045045,0.082645
4,Nuclear speckles,513,0.520492,0.495127,0.507493
5,Nuclear bodies,594,0.418605,0.030303,0.056515
6,Endoplasmic reticulum,373,0.5,0.018767,0.036176
7,Golgi apparatus,941,0.725557,0.449522,0.555118
8,Peroxisomes,22,0.0,0.0,0.0
9,Endosomes,20,0.0,0.0,0.0
