In [2]:
import os, re, csv, json, random
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

PROJECT_ROOT = "."
DATA_DIR = os.path.join(PROJECT_ROOT, "Amazon_products")

TRAIN_CORPUS_PATH = os.path.join(DATA_DIR, "train", "train_corpus.txt")
TEST_CORPUS_PATH  = os.path.join(DATA_DIR, "test",  "test_corpus.txt")

CLASSES_PATH = os.path.join(DATA_DIR, "classes.txt")
HIER_PATH    = os.path.join(DATA_DIR, "class_hierarchy.txt")
KEYWORD_PATH = os.path.join(DATA_DIR, "class_related_keywords.txt")

ART_DIR = os.path.join(PROJECT_ROOT, "artifacts")
os.makedirs(ART_DIR, exist_ok=True)

NUM_CLASSES = 531
MIN_LABELS = 2
MAX_LABELS = 3
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f5f205516b0>

In [3]:
# Clean + Load Corpus

TAG_RE = re.compile(r"<[^>]+>")

def clean_text(s: str) -> str:
    s = s or ""
    s = TAG_RE.sub(" ", s)
    s = s.lower()
    s = re.sub(r"\s+", " ", s).strip()
    return s

def load_corpus(path: str):
    pid2text = {}
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split("\t", 1)
            if len(parts) == 2:
                pid, text = parts
                pid2text[pid] = clean_text(text)
    return pid2text

pid2text_train = load_corpus(TRAIN_CORPUS_PATH)
pid2text_test  = load_corpus(TEST_CORPUS_PATH)
id_list_test   = list(pid2text_test.keys())

print("train:", len(pid2text_train), "test:", len(pid2text_test))


train: 29487 test: 19658


In [4]:
def load_classes(path):
    name2id, id2name = {}, {}
    next_id = 0
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            parts = re.split(r"[\t, ]+", line)
            if parts[0].isdigit():
                cid = int(parts[0])
                cname = parts[1] if len(parts) > 1 else str(cid)
            else:
                cid = next_id
                cname = parts[0]
                next_id += 1
            name2id[cname] = cid
            id2name[cid] = cname
    return name2id, id2name

name2id, id2name = load_classes(CLASSES_PATH)
print("num classes loaded:", len(name2id))

num classes loaded: 531


In [5]:
def load_hierarchy(path):
    parents = defaultdict(set)  # child -> parents
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            p_str, c_str = line.split("\t")
            p, c = int(p_str), int(c_str)
            parents[c].add(p)
    return parents

parents = load_hierarchy(HIER_PATH)

def get_ancestors(cid: int) -> set:
    anc = set()
    stack = [cid]
    while stack:
        x = stack.pop()
        for p in parents.get(x, []):
            if p not in anc:
                anc.add(p)
                stack.append(p)
    return anc

def compute_depths(num_classes: int):
    depth = [-1] * num_classes
    def dfs(x):
        if depth[x] != -1:
            return depth[x]
        ps = parents.get(x, [])
        if not ps:
            depth[x] = 0
            return 0
        depth[x] = 1 + max(dfs(p) for p in ps)
        return depth[x]
    for i in range(num_classes):
        dfs(i)
    return depth

depths = compute_depths(NUM_CLASSES)
print("max depth:", max(depths))


max depth: 3


In [6]:
def build_label_adj(num_classes, parents_dict, add_self_loop=True, undirected=True):
    A = np.zeros((num_classes, num_classes), dtype=np.float32)
    for child, ps in parents_dict.items():
        for p in ps:
            A[p, child] = 1.0
            if undirected:
                A[child, p] = 1.0
    if add_self_loop:
        np.fill_diagonal(A, 1.0)

    deg = A.sum(axis=1)
    deg_inv_sqrt = np.power(deg, -0.5, where=deg>0)
    deg_inv_sqrt[deg == 0] = 0.0
    D_inv_sqrt = np.diag(deg_inv_sqrt)
    A_hat = D_inv_sqrt @ A @ D_inv_sqrt
    return torch.tensor(A_hat, dtype=torch.float32)

A_hat = build_label_adj(NUM_CLASSES, parents)
print("A_hat:", A_hat.shape)


A_hat: torch.Size([531, 531])


In [7]:
def load_keywords_name_format(path, name2id):
    kw2cids = defaultdict(set)
    missing = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line or ":" not in line:
                continue
            cname, rest = line.split(":", 1)
            cname = cname.strip()
            if cname not in name2id:
                missing.append(cname)
                continue
            cid = name2id[cname]
            kws = [clean_text(k) for k in rest.split(",")]
            kws = [k for k in kws if k]
            for kw in kws:
                kw2cids[kw].add(cid)
    if missing:
        print("WARNING missing class names (showing 10):", missing[:10])
    return kw2cids

kw2cids = load_keywords_name_format(KEYWORD_PATH, name2id)
print("num keywords:", len(kw2cids))

num keywords: 4634


In [8]:
_kw_regex_cache = {}

def kw_match(text, kw):
    if " " in kw:
        return kw in text
    if kw not in _kw_regex_cache:
        _kw_regex_cache[kw] = re.compile(rf"\b{re.escape(kw)}\b")
    return _kw_regex_cache[kw].search(text) is not None

def score_doc(text):
    scores = defaultdict(float)
    for kw, cids in kw2cids.items():
        if kw_match(text, kw):
            w = 1.0 / (1.0 + np.log(1 + len(cids)))  # generic penalty
            for cid in cids:
                scores[cid] += w
    for cid in list(scores.keys()):
        scores[cid] += 0.02 * depths[cid]  # specificity bonus
    return scores

def predict_labels(text, k_min=2, k_max=3):
    scores = score_doc(text)
    if not scores:
        return []

    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    seed_top = [cid for cid, _ in ranked[:k_max]]

    cand = set(seed_top)
    for cid in seed_top:
        cand |= get_ancestors(cid)

    cand_ranked = sorted(list(cand), key=lambda c: (scores.get(c, 0.0), depths[c]), reverse=True)

    chosen = []
    for cid in cand_ranked:
        if cid not in chosen:
            chosen.append(cid)
        if len(chosen) >= k_max:
            break

    if len(chosen) < k_min:
        for cid, _ in ranked:
            if cid not in chosen:
                chosen.append(cid)
            if len(chosen) >= k_min:
                break

    return chosen[:k_max]

In [9]:
TRAIN_SILVER_PATH = os.path.join(ART_DIR, "train_silver_labels.csv")

train_rows = []
for pid, text in tqdm(pid2text_train.items(), desc="Generating train silver labels"):
    labels = predict_labels(text, k_min=MIN_LABELS, k_max=MAX_LABELS)
    if not labels:
        continue
    train_rows.append({"pid": pid, "text": text, "labels": ",".join(map(str, labels))})

train_silver_df = pd.DataFrame(train_rows)
train_silver_df.to_csv(TRAIN_SILVER_PATH, index=False)

print("Saved:", TRAIN_SILVER_PATH)
print("Train silver samples:", len(train_silver_df))
train_silver_df.head()

Generating train silver labels: 100%|██████████| 29487/29487 [15:18<00:00, 32.09it/s]


Saved: ./artifacts/train_silver_labels.csv
Train silver samples: 19500


Unnamed: 0,pid,text,labels
0,0,omron hem 790it automatic blood pressure monit...,502493145
1,1,natural factors whey factors chocolate works w...,355455271
2,2,"clif bar builder 's bar , 2 . 4 ounce bars i l...",4988366
3,4,clif bar energy bars these were cheaper than w...,480449376
4,9,lumiscope stirrup stockings pair these are ver...,382199169


In [10]:
train_silver_df = pd.read_csv(TRAIN_SILVER_PATH)
label_freq = Counter()

for s in train_silver_df["labels"]:
    for x in str(s).split(","):
        label_freq[int(x)] += 1

fallback_default = [cid for cid, _ in label_freq.most_common(2)]
print("Fallback default:", fallback_default)


Fallback default: [0, 455]


In [11]:
from transformers import AutoTokenizer, AutoModel

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

MODEL_NAME = "bert-base-uncased"  # general pretrained, not Amazon-finetuned
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bert = AutoModel.from_pretrained(MODEL_NAME).to(device)
bert.eval()

@torch.no_grad()
def embed_texts(texts, batch_size=32, max_length=256):
    vecs = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Embedding"):
        batch = texts[i:i+batch_size]
        enc = tokenizer(batch, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
        enc = {k: v.to(device) for k, v in enc.items()}
        out = bert(**enc).last_hidden_state
        mask = enc["attention_mask"].unsqueeze(-1)
        pooled = (out * mask).sum(1) / mask.sum(1).clamp(min=1)
        vecs.append(pooled.cpu().numpy())
    return np.vstack(vecs)

train_texts = train_silver_df["text"].astype(str).tolist()
test_texts  = [pid2text_test[i] for i in id_list_test]

X_train = embed_texts(train_texts, batch_size=32, max_length=256)
X_test  = embed_texts(test_texts,  batch_size=32, max_length=256)

np.save(os.path.join(ART_DIR, "train_bert.npy"), X_train)
np.save(os.path.join(ART_DIR, "test_bert.npy"),  X_test)
json.dump(train_silver_df["pid"].astype(str).tolist(), open(os.path.join(ART_DIR, "train_ids.json"), "w"))
json.dump(id_list_test, open(os.path.join(ART_DIR, "test_ids.json"), "w"))

print("Saved embeddings:", X_train.shape, X_test.shape)


Device: cuda


Embedding: 100%|██████████| 610/610 [02:02<00:00,  4.98it/s]
Embedding: 100%|██████████| 615/615 [02:10<00:00,  4.72it/s]

Saved embeddings: (19500, 768) (19658, 768)





In [12]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

def labels_str_to_target(labels_str, num_classes=NUM_CLASSES):
    y = torch.zeros(num_classes, dtype=torch.float32)
    for t in str(labels_str).split(","):
        t = t.strip()
        if t:
            y[int(t)] = 1.0
    return y

class SilverEmbDataset(Dataset):
    def __init__(self, X, label_strs):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.label_strs = list(label_strs)

    def __len__(self): return len(self.X)

    def __getitem__(self, i):
        return {"X": self.X[i], "y": labels_str_to_target(self.label_strs[i])}

X_train = np.load(os.path.join(ART_DIR, "train_bert.npy"))
X_test  = np.load(os.path.join(ART_DIR, "test_bert.npy"))

label_strs = train_silver_df["labels"].astype(str).tolist()
X_tr, X_va, s_tr, s_va = train_test_split(X_train, label_strs, test_size=0.1, random_state=42)

train_loader = DataLoader(SilverEmbDataset(X_tr, s_tr), batch_size=128, shuffle=True)
val_loader   = DataLoader(SilverEmbDataset(X_va, s_va), batch_size=256, shuffle=False)

print("train batches:", len(train_loader), "val batches:", len(val_loader))


train batches: 138 val batches: 8


In [13]:
class LabelGCN(nn.Module):
    def __init__(self, emb_dim, num_layers=2, dropout=0.2):
        super().__init__()
        self.weights = nn.ParameterList([
            nn.Parameter(torch.empty(emb_dim, emb_dim)) for _ in range(num_layers)
        ])
        for W in self.weights:
            nn.init.xavier_uniform_(W)
        self.num_layers = num_layers
        self.dropout = dropout

    def forward(self, H, A_hat):
        for i, W in enumerate(self.weights):
            H = A_hat @ H
            H = H @ W
            if i < self.num_layers - 1:
                H = F.relu(H)
                H = F.dropout(H, p=self.dropout, training=self.training)
        return H

class GCNEnhancedClassifier(nn.Module):
    def __init__(self, input_dim, num_labels, emb_dim=256, num_gcn_layers=2, dropout=0.2, A_hat=None):
        super().__init__()
        self.proj = nn.Linear(input_dim, emb_dim)
        self.label_init_emb = nn.Parameter(torch.empty(num_labels, emb_dim))
        nn.init.xavier_uniform_(self.label_init_emb)

        self.gcn = LabelGCN(emb_dim=emb_dim, num_layers=num_gcn_layers, dropout=dropout)
        self.dropout = dropout

        self.register_buffer("A_hat", A_hat)

    def forward(self, x):
        label_emb = self.gcn(self.label_init_emb, self.A_hat)   # (C, d)
        x_proj = self.proj(x)                                   # (B, d)
        x_proj = F.dropout(x_proj, p=self.dropout, training=self.training)
        logits = x_proj @ label_emb.t()                         # (B, C)
        return logits

model = GCNEnhancedClassifier(
    input_dim=X_train.shape[1],
    num_labels=NUM_CLASSES,
    emb_dim=256,
    num_gcn_layers=2,
    dropout=0.2,
    A_hat=A_hat.to(device),
).to(device)

print(model)


GCNEnhancedClassifier(
  (proj): Linear(in_features=768, out_features=256, bias=True)
  (gcn): LabelGCN(
    (weights): ParameterList(
        (0): Parameter containing: [torch.float32 of size 256x256 (cuda:0)]
        (1): Parameter containing: [torch.float32 of size 256x256 (cuda:0)]
    )
  )
)


In [15]:
import copy

def sample_f1_score(y_true, y_pred):
    eps = 1e-9
    tp = (y_true * y_pred).sum(axis=1)
    fp = ((1 - y_true) * y_pred).sum(axis=1)
    fn = (y_true * (1 - y_pred)).sum(axis=1)
    f1 = (2 * tp) / (2 * tp + fp + fn + eps)
    return float(np.mean(f1))

@torch.no_grad()
def evaluate_sample_f1(model, loader, device, thr=0.35, min_l=2, max_l=3):
    model.eval()
    all_true, all_pred = [], []
    for batch in loader:
        Xb = batch["X"].to(device)
        yb = batch["y"].cpu().numpy()

        logits = model(Xb).cpu().numpy()
        probs = 1 / (1 + np.exp(-logits))

        yhat = np.zeros_like(probs, dtype=np.float32)
        for i in range(probs.shape[0]):
            p = probs[i]
            idx = np.argsort(p)[::-1]
            chosen = [j for j in idx[:50] if p[j] >= thr]
            if len(chosen) < min_l:
                chosen = idx[:min_l].tolist()
            chosen = chosen[:max_l]
            yhat[i, chosen] = 1.0

        all_true.append(yb)
        all_pred.append(yhat)

    YT = np.vstack(all_true)
    YP = np.vstack(all_pred)
    return sample_f1_score(YT, YP)

crit = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-3)

THR = 0.35  # try 0.25~0.40 later
EPOCHS = 50
patience = 5
pat = 0
best_val = -1.0
best_state = None

for epoch in range(1, EPOCHS + 1):
    model.train()
    total = 0.0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch}"):
        Xb = batch["X"].to(device)
        yb = batch["y"].to(device)

        loss = crit(model(Xb), yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total += loss.item()

    train_loss = total / len(train_loader)
    val_f1 = evaluate_sample_f1(model, val_loader, device, thr=THR, min_l=MIN_LABELS, max_l=MAX_LABELS)

    print(f"[Epoch {epoch}] train loss: {train_loss:.4f} | val sample-F1: {val_f1:.4f}")

    if val_f1 > best_val:
        best_val = val_f1
        best_state = copy.deepcopy(model.state_dict())
        pat = 0
        print("  ✅ improved")
    else:
        pat += 1
        print(f"  ⏳ no improvement ({pat}/{patience})")
        if pat >= patience:
            print("[Early Stopping]")
            break

if best_state is not None:
    model.load_state_dict(best_state)
    print("Loaded best model. best val sample-F1:", best_val)


Epoch 1: 100%|██████████| 138/138 [00:00<00:00, 262.60it/s]
  probs = 1 / (1 + np.exp(-logits))


[Epoch 1] train loss: 0.0200 | val sample-F1: 0.3288
  ✅ improved


Epoch 2: 100%|██████████| 138/138 [00:00<00:00, 288.00it/s]


[Epoch 2] train loss: 0.0197 | val sample-F1: 0.3135
  ⏳ no improvement (1/5)


Epoch 3: 100%|██████████| 138/138 [00:00<00:00, 299.39it/s]


[Epoch 3] train loss: 0.0196 | val sample-F1: 0.3237
  ⏳ no improvement (2/5)


Epoch 4: 100%|██████████| 138/138 [00:00<00:00, 302.63it/s]


[Epoch 4] train loss: 0.0196 | val sample-F1: 0.3303
  ✅ improved


Epoch 5: 100%|██████████| 138/138 [00:00<00:00, 304.26it/s]


[Epoch 5] train loss: 0.0198 | val sample-F1: 0.3306
  ✅ improved


Epoch 6: 100%|██████████| 138/138 [00:00<00:00, 299.61it/s]


[Epoch 6] train loss: 0.0195 | val sample-F1: 0.3498
  ✅ improved


Epoch 7: 100%|██████████| 138/138 [00:00<00:00, 278.99it/s]


[Epoch 7] train loss: 0.0193 | val sample-F1: 0.3420
  ⏳ no improvement (1/5)


Epoch 8: 100%|██████████| 138/138 [00:00<00:00, 298.46it/s]


[Epoch 8] train loss: 0.0194 | val sample-F1: 0.3486
  ⏳ no improvement (2/5)


Epoch 9: 100%|██████████| 138/138 [00:00<00:00, 304.56it/s]


[Epoch 9] train loss: 0.0194 | val sample-F1: 0.3437
  ⏳ no improvement (3/5)


Epoch 10: 100%|██████████| 138/138 [00:00<00:00, 303.47it/s]


[Epoch 10] train loss: 0.0196 | val sample-F1: 0.3446
  ⏳ no improvement (4/5)


Epoch 11: 100%|██████████| 138/138 [00:00<00:00, 304.29it/s]


[Epoch 11] train loss: 0.0193 | val sample-F1: 0.3404
  ⏳ no improvement (5/5)
[Early Stopping]
Loaded best model. best val sample-F1: 0.34977778792381287


In [17]:
id_list_test = json.load(open(os.path.join(ART_DIR, "test_ids.json")))

model.eval()
with torch.no_grad():
    probs = torch.sigmoid(model(torch.tensor(X_test, dtype=torch.float32).to(device))).cpu().numpy()

def probs_to_labels_adaptive(p, thr=0.35, min_l=2, max_l=3):
    idx = np.argsort(p)[::-1]
    chosen = [i for i in idx[:50] if p[i] >= thr]
    if len(chosen) < min_l:
        chosen = idx[:min_l].tolist()
    chosen = chosen[:max_l]
    return sorted(chosen)

pred_labels = [probs_to_labels_adaptive(p, thr=THR, min_l=MIN_LABELS, max_l=MAX_LABELS) for p in probs]

SUBMISSION_PATH = "submission_final_bert_labelgcn.csv"
with open(SUBMISSION_PATH, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["id", "label"])  # competition-required header
    for id_, labs in zip(id_list_test, pred_labels):
        w.writerow([id_, ",".join(map(str, labs))])

print("Saved:", SUBMISSION_PATH, "rows:", len(pred_labels))


Saved: submission_final_bert_labelgcn.csv rows: 19658


In [18]:
import copy
import numpy as np
from tqdm.auto import tqdm

crit = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-3)

THR = 0.35      # later we will try 0.25~0.40
EPOCHS = 50
patience = 5
pat = 0
best_val = -1.0
best_state = None

def sample_f1_score(y_true, y_pred):
    eps = 1e-9
    tp = (y_true * y_pred).sum(axis=1)
    fp = ((1 - y_true) * y_pred).sum(axis=1)
    fn = (y_true * (1 - y_pred)).sum(axis=1)
    f1 = (2 * tp) / (2 * tp + fp + fn + eps)
    return float(np.mean(f1))

@torch.no_grad()
def evaluate_sample_f1(model, loader, device, thr=0.35, min_l=2, max_l=3):
    model.eval()
    all_true, all_pred = [], []

    for batch in loader:
        Xb = batch["X"].to(device)
        yb = batch["y"].cpu().numpy()

        logits = model(Xb).cpu().numpy()
        probs = 1 / (1 + np.exp(-logits))

        yhat = np.zeros_like(probs, dtype=np.float32)
        for i in range(probs.shape[0]):
            p = probs[i]
            idx = np.argsort(p)[::-1]
            chosen = [j for j in idx[:50] if p[j] >= thr]
            if len(chosen) < min_l:
                chosen = idx[:min_l].tolist()
            chosen = chosen[:max_l]
            yhat[i, chosen] = 1.0

        all_true.append(yb)
        all_pred.append(yhat)

    YT = np.vstack(all_true)
    YP = np.vstack(all_pred)
    return sample_f1_score(YT, YP)

for epoch in range(1, EPOCHS + 1):
    model.train()
    total = 0.0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch}"):
        Xb = batch["X"].to(device)
        yb = batch["y"].to(device)

        loss = crit(model(Xb), yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total += loss.item()

    train_loss = total / len(train_loader)
    val_f1 = evaluate_sample_f1(model, val_loader, device, thr=THR, min_l=MIN_LABELS, max_l=MAX_LABELS)
    print(f"[Epoch {epoch}] train loss: {train_loss:.4f} | val sample-F1: {val_f1:.4f}")

    if val_f1 > best_val:
        best_val = val_f1
        best_state = copy.deepcopy(model.state_dict())
        pat = 0
        print("  ✅ improved")
    else:
        pat += 1
        print(f"  ⏳ no improvement ({pat}/{patience})")
        if pat >= patience:
            print("[Early Stopping]")
            break

if best_state is not None:
    model.load_state_dict(best_state)
    print("Loaded best model. best val sample-F1:", best_val)


Epoch 1: 100%|██████████| 138/138 [00:00<00:00, 306.40it/s]
  probs = 1 / (1 + np.exp(-logits))


[Epoch 1] train loss: 0.0198 | val sample-F1: 0.3456
  ✅ improved


Epoch 2: 100%|██████████| 138/138 [00:00<00:00, 304.28it/s]


[Epoch 2] train loss: 0.0193 | val sample-F1: 0.3345
  ⏳ no improvement (1/5)


Epoch 3: 100%|██████████| 138/138 [00:00<00:00, 300.33it/s]


[Epoch 3] train loss: 0.0192 | val sample-F1: 0.3247
  ⏳ no improvement (2/5)


Epoch 4: 100%|██████████| 138/138 [00:00<00:00, 292.74it/s]


[Epoch 4] train loss: 0.0193 | val sample-F1: 0.3453
  ⏳ no improvement (3/5)


Epoch 5: 100%|██████████| 138/138 [00:00<00:00, 296.43it/s]


[Epoch 5] train loss: 0.0194 | val sample-F1: 0.3318
  ⏳ no improvement (4/5)


Epoch 6: 100%|██████████| 138/138 [00:00<00:00, 295.71it/s]


[Epoch 6] train loss: 0.0194 | val sample-F1: 0.3387
  ⏳ no improvement (5/5)
[Early Stopping]
Loaded best model. best val sample-F1: 0.34564104676246643


In [19]:
import csv, json
import numpy as np
import torch

X_test = np.load("artifacts/test_bert.npy")
id_list_test = json.load(open("artifacts/test_ids.json"))

model.eval()
with torch.no_grad():
    probs = torch.sigmoid(
        model(torch.tensor(X_test, dtype=torch.float32).to(device))
    ).cpu().numpy()

def probs_to_labels_adaptive(p, thr=0.35, min_l=2, max_l=3):
    idx = np.argsort(p)[::-1]
    chosen = [i for i in idx[:50] if p[i] >= thr]
    if len(chosen) < min_l:
        chosen = idx[:min_l].tolist()
    chosen = chosen[:max_l]
    return sorted(chosen)

pred_labels = [probs_to_labels_adaptive(p, thr=THR, min_l=MIN_LABELS, max_l=MAX_LABELS) for p in probs]

SUBMISSION_PATH = "submission_labelgcn.csv"
with open(SUBMISSION_PATH, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["id", "label"])
    for id_, labs in zip(id_list_test, pred_labels):
        w.writerow([id_, ",".join(map(str, labs))])

print("Saved:", SUBMISSION_PATH, "rows:", len(pred_labels))


Saved: submission_labelgcn.csv rows: 19658
