In [None]:
from src.sampling.images import plot_frame_count_distributions

samples_root_dir = "data/samples"

plot_frame_count_distributions(samples_root_dir)

# Sota K-Fold cross validation

In [None]:
from glob import glob

data_root = "data/img/techniques_kf"

filenames = glob(data_root + "/all/**/*.*", recursive=True)

In [None]:
from os import listdir
from os.path import join
from shutil import rmtree, copy
from random import random
from numpy import average
import matplotlib.pyplot as plt

from src.sampling.images import __build_image_dirs
from src.sota.model import SOTA

def build_fold(data_root, fold_idx, train_idx, test_idx, filenames):
    fold_num = fold_idx + 1
    __build_image_dirs(join(data_root, "current_fold"))
    print(f"Building fold {fold_num} ...")
    train_ratio = 0.9

    for filename_idx in train_idx:
        src = filenames[filename_idx]
        dest = src.replace("/all/", "/current_fold/train/")  \
            if random() < train_ratio \
            else src.replace("/all/", "/current_fold/val/")
        
        copy(src, dest)

    for filename_idx in test_idx:
        src = filenames[filename_idx]
        dest = src.replace("/all/", "/current_fold/test/")
        copy(src, dest)

    train_len = len(glob(data_root + "/current_fold/train/**/*.*", recursive=True))
    val_len = len(glob(data_root + "/current_fold/val/**/*.*", recursive=True))
    test_len = len(glob(data_root + "/current_fold/test/**/*.*", recursive=True))
    print(f"Fold {fold_num}: Train size = {train_len}, Val size = {val_len}, Test size = {test_len}")
    
def clear_fold(data_root):
    rmtree(join(data_root, "current_fold"))

def calculate_avg_test_performance(data_root, base_name):
    model_root = join(data_root, "runs", "sota")
    fold_models = [model_path for model_path in listdir(model_root) if f"{base_name}-f" in model_path]
    metrics = []
    for fold_model in fold_models:
        sota = SOTA("data", fold_model)
        metrics.append(sota.get_test_metrics()["metrics/accuracy_top1"])

    print(f"Average Top 1 accuracy: {average(metrics)}")
    
    plt.figure()
    plt.boxplot(metrics)
    plt.show()

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=10, shuffle=True)

for i, (train, test) in enumerate(kf.split(filenames)):
    build_fold(data_root, i, train, test, filenames)

    sota = SOTA("data", f"yolo11m-cls-f{i + 1}", dataset_name="techniques_kf/current_fold")
    sota.execute_train_runs(model="yolo11m-cls", runs=1, epochs=5, balanced=False)

    sota.test_model(write_to_wandb=False)

    clear_fold(data_root)

    if (i == 1):
        break


calculate_avg_test_performance("data", "yolo11m-cls")

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11m-cls")
sota.execute_train_runs(model="yolo11m-cls", runs=1, epochs=1, balanced=False)

#sota = SOTA("data", "yolo11m-cls")
sota.test_model(write_to_wandb=False)

In [None]:
from os import rename

rename("data/img/techniques_kf/current_fold/val", "data/img/techniques_kf/current_fold/test")
rename("data/img/techniques_kf/current_fold/val_temp", "data/img/techniques_kf/current_fold/val")

In [None]:
clear_fold(data_root)

## Sota model training 

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11m-cls")
sota.execute_train_runs(model="yolo11m-cls", runs=3, epochs=10, balanced=False)
#sota.train_model(optimizer="AdamW", lr0=0.0005)

#metrics = model.val(data="data/img/techniques/val")

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11n-cls")
sota.execute_train_runs(model="yolo11n-cls", runs=5, epochs=10, balanced=False)

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11m-cls-balanced")
sota.execute_train_runs(model="yolo11m-cls", runs=5, epochs=10, balanced=True)

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11n-cls-full-balanced")
sota.execute_train_runs(model="yolo11n-cls", runs=2, epochs=5, balanced=True)

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11m-balance-50-155", dataset_name="techniques_balanced")
sota.initialize_model("yolo11m-cls")
sota.train_model()

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11n-balance-50-155", dataset_name="techniques_balanced")
sota.initialize_model("yolo11n-cls")
#sota.train_model()
sota.test_model()

# Sota model testing

In [None]:
from src.sota.model import SOTA

sota = SOTA("data", "yolo11m-cls")
metrics = sota.test_model(write_to_wandb=False)

print(metrics)

In [None]:
metrics.top1

## HPE DNN model training

In [None]:
from src.hpe_dnn.model import HpeDnn

hpednn = HpeDnn("data", "arch1")
hpednn.execute_train_runs(runs=5, epochs=10, augment=True)

In [None]:
from src.hpe_dnn.model import HpeDnn

hpednn = HpeDnn("data", "arch1-balanced")
hpednn.execute_train_runs(runs=5, epochs=10, augment=True, balanced=True)

In [None]:
%load_ext tensorboard

from src.hpe_dnn.model import HpeDnn

hpednn = HpeDnn("data", "arch1_balanced", "techniques_balanced")
hpednn.initialize_model()
hpednn.train_model()

In [None]:
from os.path import join
run_dir = join("data", "sota", "yolo11n-cls-full-balanced")
run_dir

In [None]:
from src.hpe_dnn.model import HpeDnn

hpednn = HpeDnn("data", "arch1_balanced_augmented", "techniques_balanced")
hpednn.initialize_model()
hpednn.train_model(augment=True)

In [None]:
from src.hpe_dnn.model import HpeDnn, DnnArch

hpednn = HpeDnn("data", "arch2_balanced", "techniques_balanced")
hpednn.initialize_model(DnnArch.ARCH2)
hpednn.train_model()

In [None]:
from src.hpe_dnn.model import HpeDnn, DnnArch

hpednn = HpeDnn("data", "arch3_balanced", "techniques_balanced")
hpednn.initialize_model(DnnArch.ARCH3)
hpednn.train_model()

In [None]:
from src.hpe_dnn.model import HpeDnn

hpednn = HpeDnn("data", "arch1_full_balanced")
hpednn.execute_train_runs(runs=2, epochs=10, augment=True, balanced=True)

In [None]:
from src.hpe_dnn.model import HpeDnn, DnnArch

hpednn = HpeDnn("data", "arch1_balanced_not_norm", "techniques_balanced")
hpednn.initialize_model(DnnArch.ARCH1, normalize=False)
hpednn.train_model()

In [None]:
from src.hpe_dnn.model import HpeDnn, DnnArch

hpednn = HpeDnn("data", "arch1_balanced_dr_0.3", "techniques_balanced")
hpednn.initialize_model(DnnArch.ARCH1, dropout_rate=0.3)
hpednn.train_model()

In [None]:
%tensorboard --logdir data/runs/hpe_dnn/arch1_balanced/train1/logs/train

In [None]:
from src.hpe_dnn.model import read_data

df_path = "data/df/techniques/train.pkl"
train = read_data(df_path)