## Metric


In [None]:
import torch

from learners.metrics import MultiIoUMetric


metric = MultiIoUMetric()

metric(torch.tensor([1, 0, 1]), torch.tensor([1, 0, 1]))

metric.compute()

## Utils


In [None]:
from utils.diff_dict import diff_dict


diff_dict(
    {
        "config": {},
        "a": {"a": {"p": 1}},
        "b": 2,
        "p": (12, 34),
        "d": {"e": 4, "f": 5, "g": [6, 7, 8]},
        "g": [6, 7, 8],
    },
    {
        "config": {},
        "a": {"a": {"p": 1, "q": 0}},
        "c": 3,
        "d": {"e": 4, "f": 5, "g": [6, 9, 8, {"a": 1}]},
        "g": [6, 9],
        "p": (12, 33, 34),
    },
)

In [None]:
import os
import datetime


for item in os.listdir("logs/SL"):
    mtime = os.path.getmtime(os.path.join("logs/SL", item))
    print(datetime.datetime.fromtimestamp(mtime).isoformat())

In [None]:
def make_batch_sample_indices(
    population_size: int, sample_size: int, batch_size: int
) -> list[list[int]]:
    import random

    samples = sorted(random.sample(range(population_size), sample_size))
    population_batch_size = population_size // batch_size + 1
    batch_samples = [[] for _ in range(population_batch_size)]
    for s in samples:
        batch_samples[s // batch_size].append(s - (s // batch_size) * batch_size)
    return batch_samples


make_batch_sample_indices(100, 10, 20)

In [None]:
def make_batch_sample_indices_multi(
    iterations_batches: list[tuple[int, int]], total_samples: int
) -> list[list[int]]:
    import random

    populations = [iter * batch for iter, batch in iterations_batches]

    sum_populations = sum(populations)
    samples = [round(p * total_samples / sum_populations) for p in populations]
    while True:
        sum_samples = sum(samples)
        if sum_samples == total_samples:
            break
        index = random.randint(0, len(samples) - 1)
        samples[index] += 1 if sum_samples < total_samples else -1

    batch_samples = []
    zipped = zip(iterations_batches, populations, samples)
    for (_, batch), population, sample in zipped:
        batch_samples += make_batch_sample_indices(
            population,
            sample,
            batch,
        )

    return batch_samples


make_batch_sample_indices_multi([(5, 3), (4, 2), (10, 1)], 20)

## WandB


In [1]:
import wandb

from config.constants import WANDB_SETTINGS
from utils.wandb import wandb_login

In [None]:
def wandb_log_dataset_ref(dataset_path: str, dataset_name: str, dummy: bool = False):
    wandb_login()
    wandb.init(
        tags=["helper"],
        project=WANDB_SETTINGS["dummy_project" if dummy else "project"],
        name=f"log dataset {dataset_name}",
    )
    dataset_artifact = wandb.Artifact(dataset_name, type="dataset")
    dataset_artifact.add_reference(f"file://{dataset_path}")
    wandb.log_artifact(dataset_artifact)
    wandb.finish()


# wandb_log_dataset_ref("D:/Penelitian/FWS/data/REFUGE-train", "REFUGE-train", True)
# wandb_log_dataset_ref("D:/Penelitian/FWS/data/REFUGE-val", "REFUGE-val", True)
# wandb_log_dataset_ref("D:/Penelitian/FWS/data/REFUGE-test", "REFUGE-test", True)

In [None]:
# ac = wandb.Api().artifact_collection(
#     "run_table", "pandegaaz/few-shot-weakly-seg-old/run-svgff5kf-metrics"
# )

# ac.delete()

# for art in ac.artifacts():
#     print(art.name, art.id)

# art: wandb.Artifact = ac.artifacts()[0]  # type: ignore

# print(art.name, art.aliases)

# art.download("ppp/qqq")



In [None]:
# import os
# from utils.wandb import prepare_study_ckpt_artifact_name, prepare_ckpt_artifact_alias, wandb_log_file

# wandb_login()
# wandb.init(
#     tags=["helper"],
#     project=WANDB_SETTINGS["project"],
#     name="log ckpts",
# )

# ckpts = sorted(filter(lambda x: x.endswith(".ckpt"), os.listdir(os.getcwd())))
# for ckpt in ckpts:
#     study_id = ckpt.split("=")[-1].split(".")[0]
#     artifact_name = prepare_study_ckpt_artifact_name(study_id)
#     new_ckpt = ckpt.split(" study")[0].replace("F=", "fold=") + ".ckpt"
#     artifact_alias = prepare_ckpt_artifact_alias(new_ckpt)
#     artifact_path = os.path.join(os.getcwd(), new_ckpt)
#     os.rename(ckpt, new_ckpt)
#     wandb_log_file(
#         wandb.run,
#         artifact_name,
#         artifact_path,
#         "study-checkpoint",
#         [artifact_alias],
#     )
#     print(ckpt, artifact_name, artifact_alias, new_ckpt, sep=" | ")

# wandb.finish()

In [10]:
# from utils.wandb import wandb_path

# runs = wandb.Api().runs(
#     wandb_path(False),
#     filters={"config.dataset": "RIM-ONE-3-train", "config.study": {"$ne": "8kcKT"}},
# )

# for i, run in enumerate(runs):
#     print(i, run.name)
#     run.config["study"] = "8kcKT"
#     run.update()

# runs = wandb.Api().runs(
#     wandb_path(False),
#     filters={"display_name":"2024-09-13 05-37 eRF"},
# )

# for i, run in enumerate(runs):
#     print(i, run.name, run.group)
#     run.group = "WS multi-step"
#     run.update()

0 2024-09-13 05-37 eRF WS


## Aiven


In [None]:
import os
import requests
import time
from typing import Literal

from dotenv import load_dotenv


def turn_aiven_db(state: Literal["on", "off"]):
    url = (
        "https://api.aiven.io/v1/project/few-shot-weakly-seg/service/optuna-postgres-db"
    )
    load_dotenv()
    aiven_token = os.getenv("AIVEN_API_TOKEN")
    auth_headers = {"Authorization": f"aivenv1 {aiven_token}"}

    res = requests.put(
        url,
        json={"powered": True if state == "on" else False},
        params={"allow_unclean_poweroff": "false"},
        headers=auth_headers,
    )
    if res.status_code != 200:
        raise ValueError(
            f"Failed to turn db {state}, response {res.status_code}: {res.text}"
        )

    if state == "off":
        print("Successfully turned off db")
        return

    print("Waiting for db to turn on")
    while True:
        res = requests.get(url, headers=auth_headers)
        if res.status_code != 200:
            raise ValueError(
                f"Failed to get db status, response {res.status_code}: {res.text}"
            )
        if res.json()["service"]["state"] == "RUNNING":
            break
        time.sleep(10)
    print("Successfully turned on db")


In [None]:
# turn_aiven_db("off")

## Lightning Issue

https://github.com/Lightning-AI/pytorch-lightning/issues/20095


In [None]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.tensorboard.writer import SummaryWriter


In [None]:
class SimpleModel(LightningModule):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(3, 3, kernel_size=3, padding=1)
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, x):
        out = self.conv(x)
        out = nn.functional.interpolate(
            out, x.size()[2:], mode="bilinear"
        )  # main error
        return out

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

    def on_fit_start(self):
        super().on_fit_start()
        self.log_graph()

    def training_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        loss = self.loss(pred, y)
        return loss

    def log_graph(self, inp=None):
        if inp is None:
            inp = torch.randn(8, 3, 64, 64, device=self.device)

        self.to_onnx("model.onnx", inp, export_params=False)

        if isinstance(self.logger, TensorBoardLogger):
            self.logger.log_graph(self, inp)

        tensorboard_writer = SummaryWriter("tensorboard/manual")
        tensorboard_writer.add_graph(self, inp)
        tensorboard_writer.close()


model = SimpleModel()
train_dataset = TensorDataset(
    torch.randn(20, 3, 64, 64), torch.randint(0, 3, (20, 64, 64))
)
train_loader = DataLoader(train_dataset, batch_size=8)

# model.log_graph(torch.randn(8, 3, 64, 64, device=model.device))

trainer = Trainer(
    deterministic="warn",
    accelerator="gpu",
    max_epochs=1,
    logger=TensorBoardLogger("tensorboard", name="auto", log_graph=True),
    enable_checkpointing=False,
)
# trainer.fit(model, train_loader)

# model.log_graph(torch.randn(8, 3, 64, 64, device=model.device))


## Study Time


In [None]:
def get_study_time(train_size: int, val_size: int, num_folds: int) -> float:
    return train_size / 100 * 4 * num_folds + val_size / 100 * 2 * num_folds + 1


print(get_study_time(45, 5, 3) * 10)
print(get_study_time(80, 20, 3) * 10)
print(get_study_time(320, 80, 3) * 10)
print(get_study_time(400, 400, 1) * 10)
print(get_study_time(45 + 80 + 320, 5 + 20 + 80, 3) * 10)

## Weight Averaging


In [9]:
import torch

from copy import deepcopy

In [10]:
ckpt0 = torch.load(
    "logs/PS/best study 3UQpU/val_score=0.8294 epoch=11 fold=0 trial=40.ckpt"
)
ckpt1 = torch.load(
    "logs/PS/best study 3UQpU/val_score=0.7772 epoch=17 fold=1 trial=40.ckpt"
)

ckpt = deepcopy(ckpt0)
weight = {
    k: (v + ckpt1["state_dict"][k]) / 2
    for k, v in ckpt0["state_dict"].items()
    if k.startswith("net.")
}
ckpt["state_dict"].update(weight)
# torch.save(ckpt, "logs/PS/best study 3UQpU/merged.ckpt")

# Data Consistency


In [32]:
import os

import pandas as pd

In [46]:
dfs = []

smallest_max_epoch = 200
for item in os.listdir(os.getcwd()):
    if item.endswith(".csv"):
        df = pd.read_csv(item)
        max_epoch = df["epoch"].max()
        if max_epoch < smallest_max_epoch:
            smallest_max_epoch = max_epoch
        df.drop(columns=["loss", "iou_cup", "iou_disc"], inplace=True)
        dfs.append(df)

dfs = [df[df["epoch"] <= smallest_max_epoch].reset_index(drop=True) for df in dfs]

In [47]:
print((dfs[0] != dfs[1]).sum().sum())
print((dfs[0] != dfs[2]).sum().sum())
print((dfs[0] != dfs[3]).sum().sum())
print((dfs[1] != dfs[2]).sum().sum())
print((dfs[1] != dfs[3]).sum().sum())
print((dfs[2] != dfs[3]).sum().sum())

0
0
0
0
0
0


In [63]:
df = dfs[0]
df = df[df["type"] == "VL"]
df_1 = df[df["epoch"] == 1].drop(columns="epoch").reset_index(drop=True)

for n in range(3, 35, 2):
    df_n = df[df["epoch"] == n].drop(columns="epoch").reset_index(drop=True)
    print((df_1 != df_n).sum().sum())

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [64]:
df = dfs[0]
df = df[df["type"] == "TR"]
df_1 = df[df["epoch"] == 1].drop(columns="epoch").reset_index(drop=True)
df_3 = df[df["epoch"] == 3].drop(columns="epoch").reset_index(drop=True)

In [65]:
df_1

Unnamed: 0,type,batch,shot,sparsity_mode,sparsity_value
0,TR,0,6,point grid contour skeleton region point,7 0.873 0.964 0.37 0.728 19
1,TR,1,6,point grid contour skeleton region point,32 0.328 0.103 0.867 0.275 41
2,TR,2,17,contour skeleton region point grid contour ske...,0.516 0.729 0.26 33 0.348 0.886 0.76 0.631 40 ...
3,TR,3,16,point grid contour skeleton region point grid ...,24 0.545 0.935 0.501 0.529 31 0.164 0.167 0.76...
4,TR,4,18,region point grid contour skeleton region poin...,0.173 18 0.175 0.479 0.921 0.592 39 0.434 0.27...
...,...,...,...,...,...
95,TR,95,10,skeleton region point grid contour skeleton re...,0.391 0.605 31 0.247 0.247 0.329 0.635 22 0.97...
96,TR,96,3,contour skeleton region,0.116 0.577 0.465
97,TR,97,13,contour skeleton region point grid contour ske...,0.57 0.263 0.709 48 0.248 0.962 0.932 0.741 29...
98,TR,98,14,grid contour skeleton region point grid contou...,0.987 0.465 0.826 0.648 25 0.72 0.887 0.534 0....


In [66]:
df_3

Unnamed: 0,type,batch,shot,sparsity_mode,sparsity_value
0,TR,0,13,contour skeleton region point grid contour ske...,0.66 0.816 0.207 44 0.719 0.402 0.98 0.952 13 ...
1,TR,1,10,contour skeleton region point grid contour ske...,0.317 0.12 0.128 13 0.531 0.151 0.565 0.68 9 0.66
2,TR,2,8,grid contour skeleton region point grid contou...,0.5 0.799 0.584 0.437 8 0.54 0.385 0.688
3,TR,3,8,contour skeleton region point grid contour ske...,0.914 0.678 0.423 25 0.139 0.229 0.766 0.694
4,TR,4,11,point grid contour skeleton region point grid ...,18 0.127 0.666 0.199 0.923 44 0.472 0.576 0.45...
...,...,...,...,...,...
95,TR,95,7,grid contour skeleton region point grid contour,0.825 0.565 0.576 0.515 48 0.239 0.583
96,TR,96,10,skeleton region point grid contour skeleton re...,0.391 0.605 31 0.247 0.247 0.329 0.635 22 0.97...
97,TR,97,1,region,0.843
98,TR,98,17,point grid contour skeleton region point grid ...,30 0.624 0.209 0.269 0.439 23 0.604 0.415 0.88...


In [82]:
val_indices = []

for item in os.listdir(os.getcwd()):
    if item.startswith("val") and item.endswith(".txt"):
        with open(item, "r") as f:
            val_indices.append(f.read())

In [85]:
print(val_indices[0] == val_indices[1])
print(val_indices[0] == val_indices[2])
print(val_indices[0] == val_indices[3])

val_indices[0]

True
True
True


'[200, 201, 202, 203, 204] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n[205, 206, 207, 208, 209] [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n[210, 211, 212, 213, 214] [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]\n[215, 216, 217, 218, 219] [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]\n[220, 221, 222, 223, 224] [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]\n[225, 226, 227, 228, 229] [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]\n[230, 231, 232, 233, 234] [60, 61, 62, 63, 64, 65, 66, 67, 68, 69]\n[235, 236, 237, 238, 239] [70, 71, 72, 73, 74, 75, 76, 77, 78, 79]\n[240, 241, 242, 243, 244] [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]\n[245, 246, 247, 248, 249] [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]\n[250, 251, 252, 253, 254] [100, 101, 102, 103, 104, 105, 106, 107, 108, 109]\n[255, 256, 257, 258, 259] [110, 111, 112, 113, 114, 115, 116, 117, 118, 119]\n[260, 261, 262, 263, 264] [120, 121, 122, 123, 124, 125, 126, 127, 128, 129]\n[265, 266, 267, 268, 269] [130, 131, 132, 133, 134, 135, 136, 137, 138, 139]\n[270, 271, 272, 2