In [None]:
import numpy as np
import matplotlib.pyplot as plt

epochs = 50

k = 0.93
initial_lr = 0.01
x = np.array(range(0, epochs, 1))

y = np.ones(shape=x.shape, dtype=np.float) * initial_lr
y = y * k ** x

plt.plot(x, y)

In [None]:
from pytorch_lightning.loggers import NeptuneLogger

logger = NeptuneLogger(
    project_name='szymswiat5/nih-classification',
    api_key=''
)

In [None]:
exp = logger.experiment

In [None]:

for i in range(epochs):
    logger.log_metric('test/test_metric', y[i], x[i])

In [None]:
logger.finalize('ok')

In [None]:
from models.efficient_net_v2_module import EfficientNetV2Module
import torch

mod = EfficientNetV2Module.load_from_checkpoint(
    'lightning_logs/hpc_ckpt_1.ckpt',
    num_classes=14,
    pos_weight=[0.09084507042253522, 0.027230046948356807, 0.03943661971830986, 0.0215962441314554, 0.11455399061032864, 0.023004694835680753, 0.014788732394366197, 0.002112676056338028, 0.1779342723004695, 0.0528169014084507, 0.056572769953051646, 0.031220657276995304, 0.011032863849765259, 0.04765258215962441]
)

# with open('lightning_logs/epoch=22_val_auc_roc=0.812_top.ckpt', 'r') as f:
#     ckpt = torch.load(f)

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import json

def split_df(df: pd.DataFrame, chunk1_frac=0.95, seed=0):

    chunk1 = df.sample(frac=chunk1_frac, random_state=seed)
    chunk2 = df.drop(chunk1.index)

    return chunk1, chunk2

prefix = 'orig'
df_root = Path('/media/DATA_SSD/datasets/nih_dataset/df_split_files')

train_val_df_path = df_root / f'{prefix}_train_val_df.csv'
train_df_path = df_root / f'{prefix}_train_df.csv'
val_df_path = df_root / f'{prefix}_val_df.csv'
test_df_path = df_root / f'{prefix}_test_df.csv'

In [None]:
labels_all = []
for df_path in [train_df_path, val_df_path, test_df_path]:

    val_df = pd.read_csv(df_path)
    labels = val_df['Label'].tolist()
    labels = list(map(lambda row: json.loads(row), labels))
    labels = np.array(labels)

    labels = np.sum(labels, axis=0)
    labels_all.append(labels)

labels_all_np = np.sum(np.array(labels_all), axis=0)

labels_all_perc = []

for l in labels_all:
    labels_all_perc.append(l / labels_all_np)

labels_all_perc = np.array(labels_all_perc)


In [None]:
train_val_df = pd.read_csv(train_val_df_path)

train_df, val_df = split_df(train_val_df)

train_df.to_csv(train_df_path, index=False)
val_df.to_csv(val_df_path, index=False)