In [1]:
# === FLAT (filtered) -> NLQ ufficiale (compatibile prepare) ===
import json, os, math, hashlib
from collections import defaultdict

# --- CONFIG ---
IN_PATH  = "nlq_pretrain.json"
OUT_PATH = "nlq_pretrain.json"



def sfloat(x, default=None):
    try: return float(x)
    except: return default

def mk_clip_uid(video_uid, cs, ce, idx):
    base = f"{video_uid}_{int(math.floor(cs or 0))}_{int(math.floor(ce or 0))}_{idx}"
    return base.replace(" ", "_")

def mk_ann_uid(clip_uid, ann_idx=0):
    return f"{clip_uid}-ann{ann_idx}"

def mk_query_id(video_uid, clip_uid, local_idx):
    raw = f"{video_uid}|{clip_uid}|{local_idx}"
    return hashlib.md5(raw.encode("utf-8")).hexdigest()[:16]

def get_query_times(item, cs, ce):
    """
    Ordine di priorità: video_*  -> query_* -> start/end.
    Clamp dentro [cs, ce] e corregge eventuali swap.
    """
    cands = [
        (item.get("video_start_sec"), item.get("video_end_sec")),
        (item.get("query_start_sec"), item.get("query_end_sec")),
        (item.get("start_sec"),       item.get("end_sec")),
    ]
    qs = qe = None
    for a, b in cands:
        qa, qb = sfloat(a), sfloat(b)
        if qa is not None and qb is not None:
            qs, qe = qa, qb
            break
    if qs is None or qe is None:
        qs, qe = cs, ce
    if qe < qs:
        qs, qe = qe, qs
    # clamp nella clip
    qs = max(cs, min(qs, ce))
    qe = max(cs, min(qe, ce))
    # evita finestra degenerata
    if qe == qs:
        mid = (cs + ce) / 2.0
        eps = min(0.01, max(1e-3, (ce - cs) * 1e-4))
        qs = max(cs, mid - eps)
        qe = min(ce, mid + eps)
    return float(qs), float(qe)

# --- Carica flat filtrato ---
with open(IN_PATH, "r", encoding="utf-8") as f:
    flat = json.load(f)

# --- Raggruppa per (video_uid, clip_start_sec, clip_end_sec) ---
groups = defaultdict(list)
for it in flat:
    vid = it.get("video_uid")
    cs  = sfloat(it.get("clip_start_sec"))
    ce  = sfloat(it.get("clip_end_sec"))
    if not vid or cs is None or ce is None:
        continue
    if ce < cs:
        cs, ce = ce, cs
    groups[(str(vid), cs, ce)].append(it)

# --- Costruisci struttura NLQ ---
videos = defaultdict(list)
per_video_counter = defaultdict(int)

kept_queries = 0
for (vid, cs, ce), items in groups.items():
    idx = per_video_counter[vid]; per_video_counter[vid] += 1
    clip_uid = mk_clip_uid(vid, cs, ce, idx)

    lqs = []
    for i, x in enumerate(items):
        qtext = (x.get("query") or "").strip()
        if not qtext:
            continue
        qs, qe = get_query_times(x, cs, ce)
        lqs.append({
            "query": qtext,
            "template": x.get("template"),
            "template_id": int(x["template_id"]) if x.get("template_id") is not None else None,
            # richiesti in molti prepare:
            "clip_start_sec": float(cs),
            "clip_end_sec":   float(ce),
            "video_start_sec": float(qs),
            "video_end_sec":   float(qe),
            "rationale": x.get("rationale"),
            "query_id": mk_query_id(vid, clip_uid, i),
        })
    if not lqs:
        continue

    videos[vid].append({
        "clip_uid": clip_uid,
        "clip_start_sec": float(cs),
        "clip_end_sec":   float(ce),
        "video_start_sec": float(cs),
        "video_end_sec":   float(ce),
        "annotations": [{
            "annotation_uid": mk_ann_uid(clip_uid, 0),
            "language_queries": lqs
        }]
    })
    kept_queries += len(lqs)

nlq = {
    "videos": [{"video_uid": vid, "clips": clips} for vid, clips in videos.items()]
}

with open(OUT_PATH, "w", encoding="utf-8") as f:
    json.dump(nlq, f, ensure_ascii=False, indent=2)

print(f"[OK] NLQ scritto: {OUT_PATH}")
print("  - #videos :", len(nlq["videos"]))
print("  - #clips  :", sum(len(v['clips']) for v in nlq['videos']))
print("  - #queries:", sum(len(a['language_queries']) for v in nlq['videos'] for c in v['clips'] for a in c['annotations']))

# sanity: mostra le chiavi del primo datum
for v in nlq["videos"][:1]:
    for c in v["clips"][:1]:
        ex = c["annotations"][0]["language_queries"][0]
        print("Esempio language_query keys:", sorted(ex.keys()))
        break
    break

AttributeError: 'str' object has no attribute 'get'

In [None]:
%%bash

BASE="/home/nicolo/ingegneriaMatematica/machineLearning"
REPO="$BASE/episodic-memory/NLQ/VSLNet"
DATA="$BASE/ego4d_data"

mkdir -p "$REPO/data/features/nlq_official_v1_omnivore/official"
mkdir -p "$REPO/data/dataset/nlq_official_v1_omnivore"

cd "$REPO/utils"
python3 prepare_ego4d_dataset.py \
  --input_train_split "$BASE/nlq_pretrain.json" \
  --input_val_split   "$DATA/v1/annotations/nlq_val.json" \
  --input_test_split  "$DATA/v1/annotations/nlq_test_unannotated.json" \
  --video_feature_read_path "$DATA/v1/omnivore_video_swinl_fp16/v1/omnivore_video_swinl_fp16/" \
  --clip_feature_save_path  "$REPO/data/features/nlq_pretrain_v1_omnivore/official" \
  --output_save_path        "$REPO/data/dataset/nlq_pretrain_v1_omnivore"


Reading [train]: /home/nicolo/ingegneriaMatematica/machineLearning/nlq_pretrain.json
# train: 22981
Writing [train]: /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/dataset/nlq_pretrain_v1_omnivore/train.json
Reading [val]: /home/nicolo/ingegneriaMatematica/machineLearning/ego4d_data/v1/annotations/nlq_val.json
# val: 3874
Writing [val]: /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/dataset/nlq_pretrain_v1_omnivore/val.json
Reading [test]: /home/nicolo/ingegneriaMatematica/machineLearning/ego4d_data/v1/annotations/nlq_test_unannotated.json
# test: 4004
Writing [test]: /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/dataset/nlq_pretrain_v1_omnivore/test.json


Extracting features:  26%|██▌       | 6016/23394 [50:37<6:31:44,  1.35s/it] 

Error while terminating subprocess (pid=13020): 


TypeError: %d format: a real number is required, not NoneType

Extracting features:  26%|██▌       | 6016/23394 [51:21<2:28:20,  1.95it/s]
Traceback (most recent call last):
  File "/home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/utils/prepare_ego4d_dataset.py", line 157, in <module>
    convert_ego4d_dataset(parsed_args)
  File "/home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/utils/prepare_ego4d_dataset.py", line 123, in convert_ego4d_dataset
    torch.save(clip_feature.to(torch.float16), feature_save_path)
  File "/home/nicolo/ingegneriaMatematica/machineLearning/venv-ia/lib/python3.10/site-packages/torch/serialization.py", line 977, in save
    _save(
  File "/home/nicolo/ingegneriaMatematica/machineLearning/venv-ia/lib/python3.10/site-packages/torch/serialization.py", line 1278, in _save
    zip_file.write_record(name, storage, num_bytes)
KeyboardInterrupt


In [6]:
%%bash
REPO="/home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet"
mkdir -p "$REPO/data/features/nlq" "$REPO/data/dataset"


# link a feature e dataset del tuo tag
ln -sfn "$REPO/data/features/nlq_pretrain_v1_omnivore/official" "$REPO/data/features/nlq/official"
ln -sfn "$REPO/data/dataset/nlq_pretrain_v1_omnivore" "$REPO/data/dataset/nlq"

# verifica esistenza file chiave
ls -l "$REPO/data/features/nlq/official/feature_shapes.json"
ls -l "$REPO/data/dataset/nlq/train.json" "$REPO/data/dataset/nlq/val.json"



-rw-rw-r-- 1 nicolo nicolo 662259 ott 23 11:09 /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/features/nlq/official/feature_shapes.json
-rw-rw-r-- 1 nicolo nicolo 5856481 ott 23 10:39 /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/dataset/nlq/train.json
-rw-rw-r-- 1 nicolo nicolo  496981 ott 23 10:39 /home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet/data/dataset/nlq/val.json


In [4]:
%%bash

export HSA_OVERRIDE_GFX_VERSION=11.0.0
export HIP_VISIBLE_DEVICES=0
export HIP_LAUNCH_BLOCKING=1
export AMD_SERIALIZE_KERNEL=3

REPO="/home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet"
MODEL_SUBDIR="/home/nicolo/ingegneriaMatematica/machineLearning/checkpoints/omnivore_vslnet_pretrain"

export DATALOADER_WORKERS=1
export NUM_WORKERS=2
export BATCH_SIZE=16
export DIM=128
export NUM_EPOCH=10      
export MAX_POS_LEN=128
export INIT_LR=0.0001    # LR tipica per pretrain su narrations
export VAL_JSON_PATH="/home/nicolo/ingegneriaMatematica/machineLearning/ego4d_data/v1/annotations/nlq_val.json"
export TB_LOG_NAME="pretrain_bs${BATCH_SIZE}_dim${DIM}_ep${NUM_EPOCH}_lr${INIT_LR}"

cd "$REPO"
python main.py \
  --model_name vslnet \
  --task nlq \
  --predictor bert \
  --dim $DIM \
  --mode train \
  --video_feature_dim 1536 \
  --max_pos_len $MAX_POS_LEN \
  --init_lr $INIT_LR \
  --epochs $NUM_EPOCH \
  --batch_size $BATCH_SIZE \
  --fv official \
  --num_workers $NUM_WORKERS \
  --data_loader_workers $DATALOADER_WORKERS \
  --model_dir "$MODEL_SUBDIR" \
  --eval_gt_json "$VAL_JSON_PATH" \
  --log_to_tensorboard $TB_LOG_NAME \
  --tb_log_freq 5 \
  --remove_empty_queries_from train



Running with Namespace(save_dir='datasets', task='nlq', eval_gt_json='/home/nicolo/ingegneriaMatematica/machineLearning/ego4d_data/v1/annotations/nlq_val.json', fv='official', max_pos_len=128, num_workers=2, data_loader_workers=1, word_size=None, char_size=None, word_dim=300, video_feature_dim=1536, char_dim=50, dim=128, highlight_lambda=5.0, num_heads=8, drop_rate=0.2, predictor='bert', gpu_idx='0', seed=12345, mode='train', epochs=10, batch_size=16, num_train_steps=None, init_lr=0.0001, clip_norm=1.0, warmup_proportion=0.0, extend=0.1, period=100, text_agnostic=False, video_agnostic=False, model_dir='/home/nicolo/ingegneriaMatematica/machineLearning/checkpoints/omnivore_vslnet_pretrain', model_name='vslnet', suffix=None, log_to_tensorboard='pretrain_bs16_dim128_ep10_lr0.0001', tb_log_dir='./runs', tb_log_freq=5, slurm=False, slurm_wait=False, slurm_partition='pixar', slurm_constraint='volta', slurm_gpus=1, slurm_cpus=10, slurm_timeout_min=720, slurm_log_folder='slurm_log', remove_emp

  feature = torch.load(filename).to(torch.float32).cpu().numpy()
load video features:   1%|          | 77/11843 [00:01<03:04, 63.82it/s]

load video features: 100%|██████████| 11843/11843 [03:27<00:00, 57.08it/s]


Using device=cuda:0
Writing to tensorboard: ./runs/pretrain_bs16_dim128_ep10_lr0.0001




start training...


Epoch   1 /  10:  50%|████▉     | 780/1563 [01:30<01:29,  8.72it/s]


Epoch:  1 | Step:   781



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:46,  5.25it/s][A
evaluate val:   1%|          | 3/243 [00:00<00:22, 10.69it/s][A
evaluate val:   2%|▏         | 6/243 [00:00<00:15, 15.60it/s][A
evaluate val:   4%|▎         | 9/243 [00:00<00:13, 18.00it/s][A
evaluate val:   5%|▍         | 12/243 [00:00<00:11, 19.33it/s][A
evaluate val:   6%|▌         | 15/243 [00:00<00:11, 20.17it/s][A
evaluate val:   7%|▋         | 18/243 [00:00<00:10, 20.83it/s][A
evaluate val:   9%|▊         | 21/243 [00:01<00:10, 21.25it/s][A
evaluate val:  10%|▉         | 24/243 [00:01<00:10, 21.34it/s][A
evaluate val:  11%|█         | 27/243 [00:01<00:10, 21.48it/s][A
evaluate val:  12%|█▏        | 30/243 [00:01<00:09, 21.67it/s][A
evaluate val:  14%|█▎        | 33/243 [00:01<00:09, 21.85it/s][A
evaluate val:  15%|█▍        | 36/243 [00:01<00:09, 21.86it/s][A
evaluate val:  16%|█▌        | 39/243 [00:01<00:09, 21.96it/s][A
evaluate val:  17%|█▋ 

Evaluated: 3874 / 3875 instances
+Epoch 1, Step 781----+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   4.16   |   2.14   |   42.51   |   5.50   |   2.81   |   48.37   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   1 /  10: 100%|█████████▉| 1561/1563 [03:13<00:00,  8.73it/s]


Epoch:  1 | Step:  1562



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:36,  6.63it/s][A
evaluate val:   2%|▏         | 4/243 [00:00<00:15, 15.10it/s][A
evaluate val:   3%|▎         | 7/243 [00:00<00:12, 18.18it/s][A
evaluate val:   4%|▍         | 10/243 [00:00<00:11, 19.58it/s][A
evaluate val:   5%|▍         | 12/243 [00:00<00:12, 18.60it/s][A
evaluate val:   6%|▌         | 15/243 [00:00<00:11, 19.75it/s][A
evaluate val:   7%|▋         | 18/243 [00:00<00:10, 20.51it/s][A
evaluate val:   9%|▊         | 21/243 [00:01<00:10, 21.01it/s][A
evaluate val:  10%|▉         | 24/243 [00:01<00:12, 17.41it/s][A
evaluate val:  11%|█         | 27/243 [00:01<00:11, 18.53it/s][A
evaluate val:  12%|█▏        | 30/243 [00:01<00:10, 19.49it/s][A
evaluate val:  14%|█▎        | 33/243 [00:01<00:10, 20.23it/s][A
evaluate val:  15%|█▍        | 36/243 [00:01<00:10, 20.62it/s][A
evaluate val:  16%|█▌        | 39/243 [00:02<00:09, 21.00it/s][A
evaluate val:  17%|█▋

Evaluated: 3874 / 3875 instances
+Epoch 1, Step 1562---+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   2.56   |   1.32   |   40.55   |   3.30   |   1.57   |   42.33   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   1 /  10: 100%|█████████▉| 1562/1563 [03:26<00:03,  3.74s/it]


Epoch:  1 | Step:  1563



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:35,  6.87it/s][A
evaluate val:   2%|▏         | 4/243 [00:00<00:15, 15.46it/s][A
evaluate val:   3%|▎         | 7/243 [00:00<00:12, 18.44it/s][A
evaluate val:   4%|▍         | 10/243 [00:00<00:11, 19.81it/s][A
evaluate val:   5%|▌         | 13/243 [00:00<00:11, 20.41it/s][A
evaluate val:   7%|▋         | 16/243 [00:00<00:10, 20.93it/s][A
evaluate val:   8%|▊         | 19/243 [00:00<00:10, 21.27it/s][A
evaluate val:   9%|▉         | 22/243 [00:01<00:10, 21.42it/s][A
evaluate val:  10%|█         | 25/243 [00:01<00:10, 21.38it/s][A
evaluate val:  12%|█▏        | 28/243 [00:01<00:10, 21.41it/s][A
evaluate val:  13%|█▎        | 31/243 [00:01<00:09, 21.57it/s][A
evaluate val:  14%|█▍        | 34/243 [00:01<00:09, 21.63it/s][A
evaluate val:  15%|█▌        | 37/243 [00:01<00:09, 21.69it/s][A
evaluate val:  16%|█▋        | 40/243 [00:01<00:09, 21.79it/s][A
evaluate val:  18%|█▊

Evaluated: 3874 / 3875 instances
+Epoch 1, Step 1563---+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   2.45   |   1.24   |   40.27   |   3.10   |   1.47   |   42.13   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   1 /  10: 100%|██████████| 1563/1563 [03:38<00:00,  7.16it/s]
Epoch   2 /  10:  50%|████▉     | 779/1563 [01:30<01:31,  8.60it/s]


Epoch:  2 | Step:  2343



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:36,  6.66it/s][A
evaluate val:   2%|▏         | 4/243 [00:00<00:15, 15.15it/s][A
evaluate val:   3%|▎         | 7/243 [00:00<00:13, 18.13it/s][A
evaluate val:   4%|▍         | 10/243 [00:00<00:11, 19.45it/s][A
evaluate val:   5%|▌         | 13/243 [00:00<00:11, 20.14it/s][A
evaluate val:   7%|▋         | 16/243 [00:00<00:10, 20.70it/s][A
evaluate val:   8%|▊         | 19/243 [00:00<00:10, 21.10it/s][A
evaluate val:   9%|▉         | 22/243 [00:01<00:10, 21.30it/s][A
evaluate val:  10%|█         | 25/243 [00:01<00:10, 21.23it/s][A
evaluate val:  12%|█▏        | 28/243 [00:01<00:10, 21.28it/s][A
evaluate val:  13%|█▎        | 31/243 [00:01<00:09, 21.41it/s][A
evaluate val:  14%|█▍        | 34/243 [00:01<00:09, 21.44it/s][A
evaluate val:  15%|█▌        | 37/243 [00:01<00:09, 21.51it/s][A
evaluate val:  16%|█▋        | 40/243 [00:01<00:09, 21.64it/s][A
evaluate val:  18%|█▊

Evaluated: 3874 / 3875 instances
+Epoch 2, Step 2343---+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   2.09   |   1.11   |   40.11   |   2.37   |   1.26   |   40.63   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   2 /  10: 100%|█████████▉| 1560/1563 [03:12<00:00,  8.62it/s]


Epoch:  2 | Step:  3124



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:37,  6.38it/s][A
evaluate val:   2%|▏         | 4/243 [00:00<00:16, 14.83it/s][A
evaluate val:   3%|▎         | 7/243 [00:00<00:13, 17.87it/s][A
evaluate val:   4%|▍         | 10/243 [00:00<00:12, 19.30it/s][A
evaluate val:   5%|▌         | 13/243 [00:00<00:11, 20.09it/s][A
evaluate val:   7%|▋         | 16/243 [00:00<00:10, 20.69it/s][A
evaluate val:   8%|▊         | 19/243 [00:00<00:10, 21.06it/s][A
evaluate val:   9%|▉         | 22/243 [00:01<00:10, 21.28it/s][A
evaluate val:  10%|█         | 25/243 [00:01<00:10, 21.23it/s][A
evaluate val:  12%|█▏        | 28/243 [00:01<00:10, 21.29it/s][A
evaluate val:  13%|█▎        | 31/243 [00:01<00:09, 21.46it/s][A
evaluate val:  14%|█▍        | 34/243 [00:01<00:09, 21.50it/s][A
evaluate val:  15%|█▌        | 37/243 [00:01<00:09, 21.62it/s][A
evaluate val:  16%|█▋        | 40/243 [00:01<00:09, 21.73it/s][A
evaluate val:  18%|█▊

Evaluated: 3874 / 3875 instances
+Epoch 2, Step 3124---+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   2.12   |   1.11   |   40.11   |   2.66   |   1.32   |   40.97   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   2 /  10: 100%|█████████▉| 1562/1563 [03:25<00:02,  2.63s/it]


Epoch:  2 | Step:  3126



evaluate val:   0%|          | 0/243 [00:00<?, ?it/s][A
evaluate val:   0%|          | 1/243 [00:00<00:34,  6.93it/s][A
evaluate val:   2%|▏         | 4/243 [00:00<00:15, 15.36it/s][A
evaluate val:   3%|▎         | 7/243 [00:00<00:12, 18.26it/s][A
evaluate val:   4%|▍         | 10/243 [00:00<00:11, 19.63it/s][A
evaluate val:   5%|▌         | 13/243 [00:00<00:11, 20.29it/s][A
evaluate val:   7%|▋         | 16/243 [00:00<00:10, 20.86it/s][A
evaluate val:   8%|▊         | 19/243 [00:00<00:10, 21.16it/s][A
evaluate val:   9%|▉         | 22/243 [00:01<00:10, 21.38it/s][A
evaluate val:  10%|█         | 25/243 [00:01<00:10, 21.29it/s][A
evaluate val:  12%|█▏        | 28/243 [00:01<00:10, 21.33it/s][A
evaluate val:  13%|█▎        | 31/243 [00:01<00:09, 21.50it/s][A
evaluate val:  14%|█▍        | 34/243 [00:01<00:09, 21.54it/s][A
evaluate val:  15%|█▌        | 37/243 [00:01<00:09, 21.64it/s][A
evaluate val:  16%|█▋        | 40/243 [00:01<00:09, 21.69it/s][A
evaluate val:  18%|█▊

Evaluated: 3874 / 3875 instances
+Epoch 2, Step 3126---+-----------+----------+----------+-----------+----------+----------+-----------+------+
|  Rank@1  |  Rank@1  |   Rank@1  |  Rank@3  |  Rank@3  |   Rank@3  |  Rank@5  |  Rank@5  |   Rank@5  | mIoU |
| mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 | mIoU@0.3 | mIoU@0.5 | mIoU@0.01 |      |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+
|   1.65   |   0.96   |    4.21   |   2.14   |   1.11   |   40.11   |   2.68   |   1.34   |   40.99   | 1.30 |
+----------+----------+-----------+----------+----------+-----------+----------+----------+-----------+------+


Epoch   2 /  10: 100%|██████████| 1563/1563 [03:37<00:00,  7.19it/s]
Epoch   3 /  10:   2%|▏         | 27/1563 [00:03<02:57,  8.65it/s]HW Exception by GPU node-1 (Agent handle: 0x63038e5f7890) reason :GPU Hang
bash: line 39: 139975 Aborted                 (core dumped) python main.py --model_name vslnet --task nlq --predictor bert --dim $DIM --mode train --video_feature_dim 1536 --max_pos_len $MAX_POS_LEN --init_lr $INIT_LR --epochs $NUM_EPOCH --batch_size $BATCH_SIZE --fv official --num_workers $NUM_WORKERS --data_loader_workers $DATALOADER_WORKERS --model_dir "$MODEL_SUBDIR" --eval_gt_json "$VAL_JSON_PATH" --log_to_tensorboard $TB_LOG_NAME --tb_log_freq 5 --remove_empty_queries_from train


CalledProcessError: Command 'b'\nexport HSA_OVERRIDE_GFX_VERSION=11.0.0\nexport HIP_VISIBLE_DEVICES=0\nexport HIP_LAUNCH_BLOCKING=1\nexport AMD_SERIALIZE_KERNEL=3\n\nREPO="/home/nicolo/ingegneriaMatematica/machineLearning/episodic-memory/NLQ/VSLNet"\nMODEL_SUBDIR="/home/nicolo/ingegneriaMatematica/machineLearning/checkpoints/omnivore_vslnet_pretrain"\n\nexport DATALOADER_WORKERS=1\nexport NUM_WORKERS=2\nexport BATCH_SIZE=16\nexport DIM=128\nexport NUM_EPOCH=10      \nexport MAX_POS_LEN=128\nexport INIT_LR=0.0001    # LR tipica per pretrain su narrations\nexport VAL_JSON_PATH="/home/nicolo/ingegneriaMatematica/machineLearning/ego4d_data/v1/annotations/nlq_val.json"\nexport TB_LOG_NAME="pretrain_bs${BATCH_SIZE}_dim${DIM}_ep${NUM_EPOCH}_lr${INIT_LR}"\n\ncd "$REPO"\npython main.py \\\n  --model_name vslnet \\\n  --task nlq \\\n  --predictor bert \\\n  --dim $DIM \\\n  --mode train \\\n  --video_feature_dim 1536 \\\n  --max_pos_len $MAX_POS_LEN \\\n  --init_lr $INIT_LR \\\n  --epochs $NUM_EPOCH \\\n  --batch_size $BATCH_SIZE \\\n  --fv official \\\n  --num_workers $NUM_WORKERS \\\n  --data_loader_workers $DATALOADER_WORKERS \\\n  --model_dir "$MODEL_SUBDIR" \\\n  --eval_gt_json "$VAL_JSON_PATH" \\\n  --log_to_tensorboard $TB_LOG_NAME \\\n  --tb_log_freq 5 \\\n  --remove_empty_queries_from train\n\n'' returned non-zero exit status 134.

In [14]:
%load_ext tensorboard
%tensorboard --logdir episodic-memory/NLQ/VSLNet/runs/


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
