In [None]:
!pip -q uninstall -y transformers sentence-transformers accelerate tokenizers safetensors
!pip -q install --no-cache-dir -U \
  "transformers==4.41.2" \
  "sentence-transformers==2.7.0" \
  "accelerate==0.30.1" \
  "tokenizers==0.19.1" \
  "safetensors>=0.4.3"

import transformers, sentence_transformers
print("transformers:", transformers.__version__)
print("sentence-transformers:", sentence_transformers.__version__)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m114.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m302.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m314.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.2/507.2 kB[0m [31m321.7 MB/s[0m eta [36m0:00:00[0m
[?25htransformers: 4.41.2
sentence-transformers: 2.7.0


In [None]:
import os, gc, json, random, traceback
import torch
from torch.utils.data import DataLoader
import pandas as pd

from datasets import load_dataset
from sentence_transformers import SentenceTransformer, InputExample, losses, evaluation
from safetensors.torch import load_file, save_file

from google.colab import drive
drive.mount('/content/drive')

# ----------------------------
# CONFIG
# ----------------------------
PROJE_PATH = "/content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated"
os.makedirs(PROJE_PATH, exist_ok=True)

MODEL_NAME = "ytu-ce-cosmos/turkish-e5-large"

SEEDS = [42, 123, 999]
SUBSET_SIZE = 2500
EPOCHS = 3
LR = 2e-5

TRAIN_BATCH = 4
EVAL_QA_COUNT = 500
EVAL_BATCH = 4

task_instruct = "Verilen Türkçe arama sorgusu ile ilgili pasajları getir."

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

torch.manual_seed(0)
random.seed(0)

# ----------------------------
# ----------------------------
def clear_mem():
    torch.cuda.empty_cache()
    gc.collect()

def build_model(model_name_or_path, device="cuda"):
    m = SentenceTransformer(model_name_or_path, device=device)
    try:
        first = m._first_module()
        if hasattr(first, "auto_model") and hasattr(first.auto_model, "gradient_checkpointing_enable"):
            first.auto_model.gradient_checkpointing_enable()
            if hasattr(first.auto_model, "config"):
                first.auto_model.config.use_cache = False
            print("✅ gradient checkpointing enabled")
    except Exception as e:
        print(" checkpointing açılamadı:", e)
    return m

def is_valid_pair(q, p):
    if (q is None) or (p is None): return False
    if len(str(q)) < 5: return False
    if len(str(p)) < 20: return False
    return True

def make_query_text(q):
    return f"Instruct: {task_instruct}\nQuery: {q}"

def build_examples_from_indices(ds, indices):
    ex = []
    kept = []
    for i in indices:
        row = ds[i]
        q = row.get("question", None)
        p = row.get("context", None)
        if not is_valid_pair(q, p):
            continue
        ex.append(InputExample(texts=[make_query_text(q), p]))  # (q, pos) only
        kept.append(i)
    return ex, kept

def sample_subset_indices(ds, seed, subset_size):
    N = len(ds)
    perm = list(range(N))
    rng = random.Random(seed)
    rng.shuffle(perm)

    chosen = []
    for idx in perm:
        row = ds[idx]
        q = row.get("question", None)
        p = row.get("context", None)
        if is_valid_pair(q, p):
            chosen.append(idx)
            if len(chosen) >= subset_size:
                break

    if len(chosen) < subset_size:
        raise RuntimeError(f"Seed {seed}: Only {len(chosen)} valid samples found, need {subset_size}.")
    return chosen

def train_with_oom_fallback(model, train_examples, out_path, epochs, lr, prefer_batch):
    for bs in [prefer_batch, 2, 1]:
        try:
            print(f"🚀 Training -> {out_path} | batch={bs} | epochs={epochs} | lr={lr}")
            train_loader = DataLoader(train_examples, shuffle=True, batch_size=bs)
            train_loss = losses.MultipleNegativesRankingLoss(model=model)

            model.fit(
                train_objectives=[(train_loader, train_loss)],
                epochs=epochs,
                warmup_steps=int(len(train_loader) * 0.1),
                output_path=out_path,
                optimizer_params={"lr": lr},
                use_amp=True,
                show_progress_bar=True
            )
            return bs
        except RuntimeError as e:
            msg = str(e).lower()
            if "out of memory" in msg or "cuda out of memory" in msg:
                print(f"⚠️ OOM with batch={bs}. Retrying with smaller batch...")
                clear_mem()
                continue
            raise
    raise RuntimeError("OOM: Even batch=1 failed.")

def load_state_dict_any(path):
    st_path = os.path.join(path, "model.safetensors")
    bin_path = os.path.join(path, "pytorch_model.bin")
    if os.path.exists(st_path):
        return load_file(st_path)
    if os.path.exists(bin_path):
        return torch.load(bin_path, map_location="cpu")
    raise FileNotFoundError(f"No weights found in {path}")

def merge_models_average(model_paths, out_path, base_model_name):
    """
    Model soup: arithmetic mean of weights.
    """
    print("\n Merging models (weight average)...")
    merged = None
    for i, p in enumerate(model_paths):
        sd = load_state_dict_any(p)
        if merged is None:
            merged = {k: v.float().clone() for k, v in sd.items()}
        else:
            for k in merged:
                merged[k] += sd[k].float()

    K = len(model_paths)
    for k in merged:
        merged[k] /= K

    final_model = SentenceTransformer(base_model_name)
    final_model[0].auto_model.load_state_dict(merged, strict=False)
    final_model.save(out_path, safe_serialization=True)
    print(f"✅ MERGED saved: {out_path}")

# ----------------------------
# 1) LOAD DATA
# ----------------------------
print(" Loading dataset: boun-tabilab/TQuad-2 ...")
train_ds = load_dataset("boun-tabilab/TQuad-2", split="train")
val_ds   = load_dataset("boun-tabilab/TQuad-2", split="validation")
print("Train size:", len(train_ds), "| Val size:", len(val_ds))

# ----------------------------
# 2) BUILD EVALUATOR (500 validation queries)
# ----------------------------
print("\n🧪 Building evaluator (500 val queries)...")
val_sub = val_ds.select(range(min(EVAL_QA_COUNT, len(val_ds))))

queries, corpus, relevant_docs = {}, {}, {}
for row in val_sub:
    q_id = str(row.get("id", hash(row["question"])))
    doc_id = q_id + "_doc"
    queries[q_id] = make_query_text(row["question"])
    corpus[doc_id] = row["context"]
    relevant_docs[q_id] = {doc_id}

evaluator = evaluation.InformationRetrievalEvaluator(
    queries=queries,
    corpus=corpus,
    relevant_docs=relevant_docs,
    name="TQuad_Benchmark",
    show_progress_bar=False,
    mrr_at_k=[10],
    accuracy_at_k=[1, 5],
    batch_size=EVAL_BATCH
)

def eval_model(name, path_or_name):
    clear_mem()
    print(f"\n🔍 Evaluating: {name} -> {path_or_name}")
    m = SentenceTransformer(path_or_name, device="cuda")
    with torch.no_grad():
        s = evaluator(m)
    return s

def pick_metrics(scores):
    return {
        "MRR@10": scores["TQuad_Benchmark_cosine_mrr@10"],
        "Acc@1":  scores["TQuad_Benchmark_cosine_accuracy@1"],
        "Acc@5":  scores["TQuad_Benchmark_cosine_accuracy@5"],
        "nDCG@10": scores["TQuad_Benchmark_cosine_ndcg@10"],
        "MAP@100": scores["TQuad_Benchmark_cosine_map@100"],
    }

# ----------------------------
# 3) SAMPLE 3 TRUE SUBSETS + TRAIN 3 MODELS
# ----------------------------
print("\n🧩 Sampling 3 subsets (true subsets, seed affects chosen examples)...")
subset_indices = {}
seed_train_paths = []
seed_used_batches = {}

for seed in SEEDS:
    idx = sample_subset_indices(train_ds, seed, SUBSET_SIZE)
    subset_indices[seed] = idx

subset_path = os.path.join(PROJE_PATH, "subset_indices.json")
with open(subset_path, "w") as f:
    json.dump(subset_indices, f)
print("✅ Saved subset indices:", subset_path)
print("Subset sizes:", {s: len(subset_indices[s]) for s in SEEDS})

for seed in SEEDS:
    print(f"\n==============================")
    print(f" TRAINING SEED MODEL: {seed}")
    print(f"==============================")

    # build examples for that subset (with filtering)
    train_ex, kept = build_examples_from_indices(train_ds, subset_indices[seed])
    print(f"Subset {seed}: requested={len(subset_indices[seed])}, kept(valid)={len(kept)}")

    save_path = os.path.join(PROJE_PATH, f"model_seed_{seed}")
    seed_train_paths.append(save_path)

    clear_mem()
    model = build_model(MODEL_NAME, device="cuda")

    used_bs = train_with_oom_fallback(
        model=model,
        train_examples=train_ex,
        out_path=save_path,
        epochs=EPOCHS,
        lr=LR,
        prefer_batch=TRAIN_BATCH
    )
    seed_used_batches[seed] = used_bs
    print(f"✅ Seed {seed} trained. Used batch={used_bs}. Saved: {save_path}")

    del model
    clear_mem()

# ----------------------------
# 4) MERGE (MODEL SOUP)
# ----------------------------
MERGED_PATH = os.path.join(PROJE_PATH, "cosmos_merged_final")
merge_models_average(seed_train_paths, MERGED_PATH, MODEL_NAME)

# ----------------------------
# 5) UNION MODEL (union of the 3 subsets)
# ----------------------------
print("\n Building UNION indices and training a single union model...")
union_idx = sorted(set().union(*[set(subset_indices[s]) for s in SEEDS]))
union_ex, union_kept = build_examples_from_indices(train_ds, union_idx)
print(f"Union indices size={len(union_idx)} | kept(valid)={len(union_kept)}")

UNION_PATH = os.path.join(PROJE_PATH, "model_union_3subsets")
clear_mem()
union_model = build_model(MODEL_NAME, device="cuda")
union_used_bs = train_with_oom_fallback(
    model=union_model,
    train_examples=union_ex,
    out_path=UNION_PATH,
    epochs=EPOCHS,
    lr=LR,
    prefer_batch=TRAIN_BATCH
)
print(f" Union trained. Used batch={union_used_bs}. Saved: {UNION_PATH}")
del union_model
clear_mem()

# 6) EVALUATE ALL
print("\n Evaluating BASE + SEEDS + MERGED + UNION (same evaluator)...")

results = []

# BASE
scores_base = eval_model("BASE (cosmos)", MODEL_NAME)
results.append({"Model": "BASE (cosmos)", **pick_metrics(scores_base)})

# SEED models
for seed in SEEDS:
    p = os.path.join(PROJE_PATH, f"model_seed_{seed}")
    sc = eval_model(f"Model (seed {seed})", p)
    results.append({"Model": f"Model (seed {seed})", **pick_metrics(sc)})

# MERGED
scores_merged = eval_model("MERGED ENSEMBLE", MERGED_PATH)
results.append({"Model": "MERGED ENSEMBLE", **pick_metrics(scores_merged)})

# UNION
scores_union = eval_model("UNION (3 subsets)", UNION_PATH)
results.append({"Model": "UNION (3 subsets)", **pick_metrics(scores_union)})

df = pd.DataFrame(results).sort_values("MRR@10", ascending=False)

print("\n✅ RESULTS (same evaluator):")
print(df.to_string(index=False))

# Save results
csv_path = os.path.join(PROJE_PATH, "results_same_evaluator.csv")
json_path = os.path.join(PROJE_PATH, "results_same_evaluator.json")
df.to_csv(csv_path, index=False)
with open(json_path, "w") as f:
    json.dump(results, f, indent=2)

print("\n Saved:")
print(" -", csv_path)
print(" -", json_path)

print(" DONE.")


Mounted at /content/drive
 Loading dataset: boun-tabilab/TQuad-2 ...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/8.86M [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/921k [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/422k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11803 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2418 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2520 [00:00<?, ? examples/s]

Train size: 11803 | Val size: 2418

🧪 Building evaluator (500 val queries)...

🧩 Sampling 3 subsets (true subsets, seed affects chosen examples)...
✅ Saved subset indices: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/subset_indices.json
Subset sizes: {42: 2500, 123: 2500, 999: 2500}

 TRAINING SEED MODEL: 42
Subset 42: requested=2500, kept(valid)=2500


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/205 [00:00<?, ?B/s]






README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

✅ gradient checkpointing enabled
🚀 Training -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_42 | batch=4 | epochs=3 | lr=2e-05


  scaler = torch.cuda.amp.GradScaler()


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

✅ Seed 42 trained. Used batch=4. Saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_42

 TRAINING SEED MODEL: 123
Subset 123: requested=2500, kept(valid)=2500







✅ gradient checkpointing enabled
🚀 Training -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_123 | batch=4 | epochs=3 | lr=2e-05


  scaler = torch.cuda.amp.GradScaler()


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

✅ Seed 123 trained. Used batch=4. Saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_123

 TRAINING SEED MODEL: 999
Subset 999: requested=2500, kept(valid)=2500







✅ gradient checkpointing enabled
🚀 Training -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_999 | batch=4 | epochs=3 | lr=2e-05


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

Iteration:   0%|          | 0/625 [00:00<?, ?it/s]

✅ Seed 999 trained. Used batch=4. Saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_999

 Merging models (weight average)...







✅ MERGED saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/cosmos_merged_final

 Building UNION indices and training a single union model...
Union indices size=6052 | kept(valid)=6052







✅ gradient checkpointing enabled
🚀 Training -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_union_3subsets | batch=4 | epochs=3 | lr=2e-05


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

In [None]:
!pip -q uninstall -y transformers sentence-transformers accelerate tokenizers safetensors
!pip -q install --no-cache-dir -U \
  "transformers==4.41.2" \
  "sentence-transformers==2.7.0" \
  "accelerate==0.30.1" \
  "tokenizers==0.19.1" \
  "safetensors>=0.4.3"

import transformers, sentence_transformers
print("transformers:", transformers.__version__)
print("sentence-transformers:", sentence_transformers.__version__)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m83.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m429.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m424.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m369.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.2/507.2 kB[0m [31m443.7 MB/s[0m eta [36m0:00:00[0m
[?25htransformers: 4.41.2
sentence-transformers: 2.7.0


In [None]:
import os, gc, json
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, InputExample, losses

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ---- CONFIG ----
PROJE_PATH = "/content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated"
MODEL_NAME = "ytu-ce-cosmos/turkish-e5-large"
SEEDS = [42, 123, 999]

EPOCHS = 3
LR = 2e-5
TRAIN_BATCH = 4
task_instruct = "Verilen Türkçe arama sorgusu ile ilgili pasajları getir."

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

def clear_mem():
    torch.cuda.empty_cache()
    gc.collect()

def exists_model(path):
    return os.path.exists(os.path.join(path, "modules.json")) and (
        os.path.exists(os.path.join(path, "model.safetensors")) or
        os.path.exists(os.path.join(path, "pytorch_model.bin"))
    )

def is_valid_pair(q, p):
    return (q is not None) and (p is not None) and len(str(q)) >= 5 and len(str(p)) >= 20

def make_query_text(q):
    return f"Instruct: {task_instruct}\nQuery: {q}"

def build_examples_from_indices(ds, indices):
    ex, kept = [], []
    for i in indices:
        row = ds[i]
        q = row.get("question")
        p = row.get("context")
        if not is_valid_pair(q, p):
            continue
        ex.append(InputExample(texts=[make_query_text(q), p]))
        kept.append(i)
    return ex, kept

def build_model(model_name_or_path, device="cuda"):
    m = SentenceTransformer(model_name_or_path, device=device)
    try:
        first = m._first_module()
        if hasattr(first, "auto_model") and hasattr(first.auto_model, "gradient_checkpointing_enable"):
            first.auto_model.gradient_checkpointing_enable()
            if hasattr(first.auto_model, "config"):
                first.auto_model.config.use_cache = False
            print("✅ gradient checkpointing enabled")
    except Exception as e:
        print("⚠️ checkpointing açılamadı:", e)
    return m

def train_with_oom_fallback(model, train_examples, out_path, epochs, lr, prefer_batch):
    for bs in [prefer_batch, 2, 1]:
        try:
            print(f"🚀 Training UNION -> {out_path} | batch={bs} | epochs={epochs} | lr={lr}")
            dl = DataLoader(train_examples, shuffle=True, batch_size=bs)
            loss = losses.MultipleNegativesRankingLoss(model=model)
            model.fit(
                train_objectives=[(dl, loss)],
                epochs=epochs,
                warmup_steps=int(len(dl) * 0.1),
                output_path=out_path,
                optimizer_params={"lr": lr},
                use_amp=True,
                show_progress_bar=True
            )
            return bs
        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                print(f"⚠️ OOM with batch={bs}, retry smaller...")
                clear_mem()
                continue
            raise
    raise RuntimeError("OOM even with batch=1")

print("📦 Loading train split...")
train_ds = load_dataset("boun-tabilab/TQuad-2", split="train")

subset_path = os.path.join(PROJE_PATH, "subset_indices.json")
with open(subset_path, "r") as f:
    subset_indices = json.load(f)
subset_indices = {int(k): v for k, v in subset_indices.items()}

union_idx = sorted(set().union(*[set(subset_indices[s]) for s in SEEDS]))
union_ex, kept = build_examples_from_indices(train_ds, union_idx)
print(f"Union indices size={len(union_idx)} | kept(valid)={len(kept)}")

UNION_PATH = os.path.join(PROJE_PATH, "model_union_3subsets")

if exists_model(UNION_PATH):
    print("✅ UNION already exists, skipping:", UNION_PATH)
else:
    clear_mem()
    model = build_model(MODEL_NAME, device="cuda")
    used_bs = train_with_oom_fallback(model, union_ex, UNION_PATH, EPOCHS, LR, TRAIN_BATCH)
    print(f"✅ UNION DONE. Used batch={used_bs}. Saved:", UNION_PATH)
    del model
    clear_mem()

📦 Loading train split...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/8.86M [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/921k [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/422k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11803 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2418 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2520 [00:00<?, ? examples/s]

Union indices size=6052 | kept(valid)=6052


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/205 [00:00<?, ?B/s]






README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

✅ gradient checkpointing enabled
🚀 Training UNION -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_union_3subsets | batch=4 | epochs=3 | lr=2e-05


  scaler = torch.cuda.amp.GradScaler()


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1513 [00:00<?, ?it/s]

✅ UNION DONE. Used batch=4. Saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_union_3subsets


In [None]:
!pip -q uninstall -y sentence-transformers transformers accelerate tokenizers
!pip -q cache purge

!pip -q install --no-cache-dir -U \
  "transformers==4.47.1" \
  "sentence-transformers==3.1.1" \
  "accelerate>=0.26.0" \
  "tokenizers>=0.20.0" \
  "safetensors>=0.4.3"

import transformers, sentence_transformers
print("transformers:", transformers.__version__)
print("sentence-transformers:", sentence_transformers.__version__)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.9/380.9 kB[0m [31m297.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m89.4 MB/s[0m eta [36m0:00:00[0m
[?25htransformers: 4.47.1
sentence-transformers: 3.1.1


In [None]:
import os, gc
import torch, pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, evaluation

PROJE_PATH = "/content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated"
MODEL_NAME = "ytu-ce-cosmos/turkish-e5-large"
task_instruct = "Verilen Türkçe arama sorgusu ile ilgili pasajları getir."

EVAL_QA_COUNT = 500
EVAL_BATCH = 4

def clear_mem():
    torch.cuda.empty_cache(); gc.collect()

def make_query_text(q):
    return f"Instruct: {task_instruct}\nQuery: {q}"

# --- Build evaluator once ---
val_ds = load_dataset("boun-tabilab/TQuad-2", split="validation").select(range(EVAL_QA_COUNT))
queries, corpus, relevant_docs = {}, {}, {}
for row in val_ds:
    q_id = str(row.get("id", hash(row["question"])))
    doc_id = q_id + "_doc"
    queries[q_id] = make_query_text(row["question"])
    corpus[doc_id] = row["context"]
    relevant_docs[q_id] = {doc_id}

evaluator = evaluation.InformationRetrievalEvaluator(
    queries=queries, corpus=corpus, relevant_docs=relevant_docs,
    name="TQuad_Benchmark", show_progress_bar=False,
    mrr_at_k=[10], accuracy_at_k=[1, 5], batch_size=EVAL_BATCH
)

def ensure_metrics_dict(out, name="TQuad_Benchmark"):
    """
    evaluator(model) dönüşü dict değilse (float vs.) yakala ve anlamlı hata ver.
    """
    if isinstance(out, dict):
        return out
    raise TypeError(f"Evaluator output is not a dict (got {type(out)}: {out}). "
                    f"This usually indicates version mismatch. Please ensure sentence-transformers>=3.x.")

def pick(scores):
    # ST 3.x key formatı bu şekilde geliyor (senin eski çıktın gibi)
    return {
        "MRR@10": scores[f"{evaluator.name}_cosine_mrr@10"],
        "Acc@1":  scores[f"{evaluator.name}_cosine_accuracy@1"],
        "Acc@5":  scores[f"{evaluator.name}_cosine_accuracy@5"],
        "nDCG@10": scores[f"{evaluator.name}_cosine_ndcg@10"],
        "MAP@100": scores[f"{evaluator.name}_cosine_map@100"],
    }

models_to_test = {
    "BASE (cosmos)": MODEL_NAME,
    "Model (seed 42)": os.path.join(PROJE_PATH, "model_seed_42"),
    "Model (seed 123)": os.path.join(PROJE_PATH, "model_seed_123"),
    "Model (seed 999)": os.path.join(PROJE_PATH, "model_seed_999"),
    "MERGED ENSEMBLE": os.path.join(PROJE_PATH, "cosmos_merged_final"),
    "UNION (3 subsets)": os.path.join(PROJE_PATH, "model_union_3subsets"),
}

rows = []
for name, path in models_to_test.items():
    print("\n🔍", name, "->", path)
    clear_mem()
    m = SentenceTransformer(path, device="cuda")

    with torch.no_grad():
        out = evaluator(m)

    scores = ensure_metrics_dict(out)
    rows.append({"Model": name, **pick(scores)})

    del m
    clear_mem()

df = pd.DataFrame(rows).sort_values("MRR@10", ascending=False)
print("\n✅ RESULTS:")
print(df.to_string(index=False))

csv_path = os.path.join(PROJE_PATH, "results_same_evaluator_updated.csv")
df.to_csv(csv_path, index=False)
print("\n💾 Saved:", csv_path)



🔍 BASE (cosmos) -> ytu-ce-cosmos/turkish-e5-large








🔍 Model (seed 42) -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_42








🔍 Model (seed 123) -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_123








🔍 Model (seed 999) -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_seed_999








🔍 MERGED ENSEMBLE -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/cosmos_merged_final








🔍 UNION (3 subsets) -> /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/model_union_3subsets








✅ RESULTS:
            Model   MRR@10  Acc@1  Acc@5  nDCG@10  MAP@100
UNION (3 subsets) 0.725337  0.594  0.906 0.779073 0.728196
    BASE (cosmos) 0.716844  0.578  0.894 0.771271 0.719686
  MERGED ENSEMBLE 0.713148  0.576  0.882 0.769994 0.715460
 Model (seed 999) 0.700317  0.566  0.874 0.755961 0.703642
  Model (seed 42) 0.699829  0.558  0.890 0.754731 0.703573
 Model (seed 123) 0.694051  0.554  0.872 0.750406 0.697480

💾 Saved: /content/drive/MyDrive/Contrastive_Ensembles_Proje_Final_Updated/results_same_evaluator_updated.csv
