In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import joblib
import torch
import tqdm
import numpy as np
import seaborn as sns

import torchmetrics


from data import KelpNCDataset, Channel
import shared
import torch_deeplabv3 as dlv3

In [None]:
ds = KelpNCDataset(img_nc_path="data_ncf/train_imgs_fe.nc", mask_nc_path="data_ncf/train_masks.ncf")
_, _, ds_test = dlv3.get_dataset(use_channels=None, random_seed=shared.GLOBAL_SEED)

In [None]:
y_test = ds_test.masks
y_test

In [None]:
y_test_t = torch.from_numpy(y_test.to_numpy())
y_test_t.shape

In [None]:
_, h, w = y_test.shape
y_test_kf = y_test.sum(dim=("i", "j")) / (h * w)
y_test_kf = y_test_kf.compute()
y_test_kf

In [None]:
np.argsort(-y_test_kf)

In [None]:
scores, y_hat_aa, used_ch  = joblib.load("ens_dlv3/dev/pred_dlv3_test.joblib")
y_hat_aa.shape

In [None]:
torchmetrics.functional.dice(y_hat_aa[1, 0], y_test_t[0])

In [None]:
# Create matrix where each row holds scores for model with used channels
ch_scores = np.zeros(
    (len(scores), len(Channel))
)
ch_scores[:, :] = np.nan
for i, (s, ch) in enumerate(zip(scores, used_ch)):
    ch_scores[i, ch] = s

# Convert to df
ch_scores = pd.DataFrame(ch_scores, columns=[c.name for c in Channel])
ch_scores = ch_scores.iloc[:, shared.VALID_CHANNELS]
ch_scores

In [None]:
fig, ax = plt.subplots()
sns.heatmap(ch_scores, ax=ax)

In [None]:
plt.hist(scores)

In [None]:
y_hat_aa_mean = torch.mean(y_hat_aa.float(), dim=0)
y_hat_aa_mean.shape

In [None]:
plt.contour(y_hat_aa_mean[50], origin="lower", levels=[1])

In [None]:
plt.imshow(y_hat_aa_mean[49], origin="lower")

In [None]:
fig, ax = plt.subplots()
i = 41
ax.contour(y_test[i], levels=[0])  # true
ax.imshow(y_hat_aa_mean[i], origin="lower", cmap="pink_r")  # pred

In [None]:
y_hat_clf = joblib.load("pred_clf_test_agg_aa.joblib")
y_hat_clf.shape

In [None]:
fig, ax = plt.subplots()
i = 41
ax.contour(y_test[i], levels=[0])  # true
ax.imshow(y_hat_clf[i], origin="lower", cmap="pink_r")  # pred

In [None]:
# Read leaderboard and set column title to "score" since data does not have a header
lb_me = 38 - 1
lb = pd.read_csv("leaderboard.txt")
lb

In [None]:
fig, ax = plt.subplots(figsize=(5, 8))
lb_ticks = np.array([1, 5, 10, 25, lb_me + 1, 50, 100]) - 1
ax.barh(-lb.index, lb["score"], height=1, color="lightgrey")
ax.barh(-lb_me, lb["score"][lb_me], color="red", height=1)
ax.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
ax.set_xlabel("Dice score")
ax.set_yticks(-lb_ticks)
ax.set_yticklabels(lb_ticks + 1)
ax.set_ylabel("Rank")
ax.margins(0)

In [None]:
fig, ax = plt.subplots()

lb_ticks = np.array([1, 5, 10, 25, lb_me + 1, 50, 100]) - 1
ax.bar(lb.index, lb["score"], width=1, color="lightgrey")
ax.bar(lb_me, lb["score"][lb_me], color="red", width=1)
ax.set_xticks(lb_ticks)
ax.set_xticklabels(lb_ticks + 1)
ax.set_xlabel("Rank")
ax.set_ylabel("Dice score")
ax.margins(x=0)

In [None]:
y_hat_aa = torch.from_numpy(joblib.load("pred_2staged_test.joblib"))
y_hat_aa.shape

In [None]:
y_true = torch.from_numpy(ds_test.masks.to_numpy())
y_true.shape

In [None]:


score = torchmetrics.functional.dice(
    preds=torch.from_numpy(y_hat_aa),
    target=torch.from_numpy(y_test.to_numpy()),
)
score

In [None]:
def get_sample_dice(y_true, y_pred):
    tp = (y_true * y_pred).sum()
    fp = ((1 - y_true) * y_pred).sum()
    tn = ((1 - y_true) * (1 - y_pred)).sum()
    fn = (y_true * (1 - y_pred)).sum()
    dice = (2 * tp) / (2 * tp + fp + fn)
    if np.isnan(dice):
        return torch.tensor(1.1)
    else:
        return dice

In [None]:
sample_scores = torch.stack([
    get_sample_dice(y_hat_aa[i], y_true[i])
    for i in tqdm.trange(len(y_hat_aa))
])
sample_scores

In [None]:
sns.histplot(sample_scores, stat="probability", bins=np.linspace(0, 1, 21))

In [None]:
np.where(sample_scores == 0)

In [None]:
import matplotlib.colors

In [None]:
# fig, ax = plt.subplots()
r = 1/5
bins = 25
bins_kf = np.linspace(0, .2 ** r, bins)
bins_score = np.linspace(0, 1, bins)
stat="probability"

# g = sns.jointplot(x=sample_score, y=y_test_kf ** r, kind="hist", bins=bins, marginal_kws={"bins": bins})
g = sns.JointGrid(x=y_test_kf ** r, y=sample_scores, ratio=3, height=6)
g.plot_joint(plt.hist2d, bins=25, norm=matplotlib.colors.LogNorm(), cmap="Blues")
g.plot_marginals(sns.histplot, bins=bins, stat=stat)
g.set_axis_labels("True kelp fraction", "Dice score")

g.ax_joint.axhline(1, color="grey", linestyle="--")
g.ax_marg_y.axhline(1, color="grey", linestyle="--")

# Add custom ticks to x axis
kf_ticks = np.array([0, 1e-3, 0.025, 0.05, 0.1, 0.2])
g.ax_joint.set_xticks(kf_ticks ** r)
g.ax_joint.set_xticklabels(kf_ticks)

# Add custom ticks to y axis
score_ticks = np.array([0, 0.2, 0.4, 0.6, 0.8, 1])
g.ax_joint.set_yticks(score_ticks)
g.ax_joint.set_yticklabels(score_ticks)

g.savefig("plots/jointplot_kf_dice.svg")

In [None]:
no_kelp_but_predicted,  = np.where((sample_scores == 0) & (y_test_kf  == 0))
no_kelp_but_predicted

In [None]:
kelp_missed,  = np.where((sample_scores == 0) & (y_test_kf > 0))
kelp_missed

In [None]:
no_kelp_present_no_kelp_predicted,  = np.where((sample_scores > 0) & (y_test_kf  == 0))
no_kelp_present_no_kelp_predicted

In [None]:
kelp_present_kelp_predicted,  = np.where((sample_scores > 0) & (y_test_kf  > 0))
kelp_present_kelp_predicted

In [None]:
fp = len(no_kelp_but_predicted)
fn = len(kelp_missed)
tp = len(kelp_present_kelp_predicted)
tn = len(no_kelp_present_no_kelp_predicted)
fp + fn + tp + tn

In [None]:
tp / (fp + fn + tp + tn)

In [None]:
tn / (fp + fn + tp + tn)

In [None]:
fp / (fp + fn + tp + tn)

In [None]:
fn / (fp + fn + tp + tn)

In [None]:
tn / (tn + fp)

In [None]:
fp / (tn + fp)

In [None]:
y_test_kf[kelp_missed]

In [None]:
y_test_kf.sample[no_kelp_but_predicted]

In [None]:
sample_scores_da = xr.DataArray(sample_scores, dims="sample", coords={"sample": y_test_kf.sample})
sample_scores_da

In [None]:
sample_scores_da.sel(sample="TV825104")

In [None]:
sample_scores_da[(sample_scores_da < 0.4) & (sample_scores_da > 0.3)]

In [None]:
import torch_deeplabv3 as dlv3
import torch_simple_clf as clf

In [None]:
def get_num_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f"{get_num_params(dlv3.DeepLabV3(3, 512)):.2e}")
print(f"{get_num_params(clf.BinaryClfCNN(3, 128, .2)):.2e}")