In [2]:
import os
from pathlib import Path
import joblib

CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)
VECTORS_PATH = CACHE_DIR / "dense_vectors.pkl"
FILENAMES_PATH = CACHE_DIR / "filenames.pkl"
VECTORIZER_PATH = CACHE_DIR / "vectorizer.pkl"
LSH_CACHE_PATH = CACHE_DIR / "lsh_index.pkl"

def save_to_cache(dense_vectors, filenames, vectorizer):
    joblib.dump(dense_vectors, VECTORS_PATH)
    joblib.dump(filenames, FILENAMES_PATH)
    joblib.dump(vectorizer, VECTORIZER_PATH)

def load_from_cache():
    dense_vectors = joblib.load(VECTORS_PATH)
    filenames = joblib.load(FILENAMES_PATH)
    vectorizer = joblib.load(VECTORIZER_PATH)
    return dense_vectors, filenames, vectorizer

def is_cache_available():
    return VECTORS_PATH.exists() and FILENAMES_PATH.exists() and VECTORIZER_PATH.exists()


In [3]:
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

def load_license_texts_parallel(data_dir, max_workers=16):
    data_dir = Path(data_dir)
    files = list(data_dir.rglob("*.txt"))
    
    texts = []
    filenames = []

    def read_file(file_path):
        try:
            text = file_path.read_text(encoding="utf-8")
            return str(file_path.relative_to(data_dir)), text
        except Exception as e:
            return None

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for result in tqdm(executor.map(read_file, files), total=len(files), desc="Reading files"):
            if result:
                rel_path, text = result
                filenames.append(rel_path)
                texts.append(text)

    return filenames, texts

In [4]:
import time
from pathlib import Path
from sentence_transformers import SentenceTransformer

project_root = Path("..").resolve()
output_base = project_root / "Combined-Licenses"

start_time = time.time()

if is_cache_available():
    print("[CACHE HIT] Loading Dense vectors...")
    dense_vectors, filenames, vectorizer = load_from_cache()
else:
    print("[CACHE MISS] Processing license texts...")
    t0 = time.time()
    filenames, texts = load_license_texts_parallel(output_base)
    sample_texts = texts[:10000]
    sample_filenames = filenames[:10000]
    
    model = SentenceTransformer("all-MiniLM-L6-v2")
    print(" Creating vectors on 10,000 samples only...")
    dense_vectors = model.encode(sample_texts, show_progress_bar=True)
    print("Vectors created for 10,000 samples.")
    save_to_cache(dense_vectors, sample_filenames, model)
    print(f"--- TF-IDF built in {time.time() - t0:.2f} sec")

[CACHE MISS] Processing license texts...


Reading files: 100%|██████████████████| 162835/162835 [00:21<00:00, 7570.79it/s]


 Creating vectors on 10,000 samples only...


Batches:   0%|          | 0/313 [00:00<?, ?it/s]

Vectors created for 10,000 samples.
--- TF-IDF built in 291.19 sec


In [7]:
# dense_vectors.shape
# len(sample_filenames)
# len(filenames)

162835

In [8]:
import sys
import time
import joblib
from tqdm import tqdm
from pathlib import Path

# Setup paths and import
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))
from LSH import LSH

start_time = time.time()

if LSH_CACHE_PATH.exists():
    print("[CACHE HIT] Loading existing LSH index...")
    lsh = joblib.load(LSH_CACHE_PATH)
else:
    print("[CACHE MISS] Building LSH index...")
    hash_size = 32
    input_dim = dense_vectors.shape[1]
    num_tables = 30
    lsh = LSH(hash_size, input_dim, num_tables)

    BATCH_SIZE = 500
    num_vectors = dense_vectors.shape[0]
    total_added = 0

    for start in tqdm(range(0, num_vectors, BATCH_SIZE), desc=" Adding vectors to LSH"):
        end = min(start + BATCH_SIZE, num_vectors)
        dense_batch = dense_vectors[start:end]
        for i, vector in enumerate(dense_batch):
            lsh.add(vector, sample_filenames[start + i])
            total_added += 1
            if total_added % 10000 == 0:
                print(f"[DEBUG] Added {total_added} vectors...")

    print("Saving LSH index to cache...")
    LSH_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
    joblib.dump(lsh, LSH_CACHE_PATH)

print(f"Done! Total time: {time.time() - start_time:.2f} sec")


[CACHE MISS] Building LSH index...


🔗 Adding vectors to LSH:   0%|                          | 0/20 [00:00<?, ?it/s]

[DEBUG] Collision in table 0, bucket 10010110... → 2 items
[DEBUG] Collision in table 1, bucket 00100100... → 2 items
[DEBUG] Collision in table 2, bucket 10100000... → 2 items
[DEBUG] Collision in table 3, bucket 00000100... → 2 items
[DEBUG] Collision in table 4, bucket 00110000... → 2 items
[DEBUG] Collision in table 5, bucket 00011000... → 2 items
[DEBUG] Collision in table 6, bucket 00001011... → 2 items
[DEBUG] Collision in table 7, bucket 00000010... → 2 items
[DEBUG] Collision in table 8, bucket 10000000... → 2 items
[DEBUG] Collision in table 9, bucket 10100110... → 2 items
[DEBUG] Collision in table 10, bucket 00010000... → 2 items
[DEBUG] Collision in table 11, bucket 00000001... → 2 items
[DEBUG] Collision in table 12, bucket 00001100... → 2 items
[DEBUG] Collision in table 13, bucket 01000001... → 2 items
[DEBUG] Collision in table 14, bucket 00000000... → 2 items
[DEBUG] Collision in table 15, bucket 01010000... → 2 items
[DEBUG] Collision in table 16, bucket 00100000... 

🔗 Adding vectors to LSH:   5%|▉                 | 1/20 [00:00<00:04,  3.99it/s]

[DEBUG] Collision in table 0, bucket 00010011... → 2 items
[DEBUG] Collision in table 1, bucket 00000010... → 2 items
[DEBUG] Collision in table 2, bucket 10110001... → 2 items
[DEBUG] Collision in table 3, bucket 00111000... → 2 items
[DEBUG] Collision in table 4, bucket 00100000... → 2 items
[DEBUG] Collision in table 5, bucket 11000001... → 2 items
[DEBUG] Collision in table 6, bucket 00000000... → 2 items
[DEBUG] Collision in table 7, bucket 00000000... → 2 items
[DEBUG] Collision in table 8, bucket 10000000... → 2 items
[DEBUG] Collision in table 9, bucket 11100110... → 2 items
[DEBUG] Collision in table 10, bucket 01011011... → 2 items
[DEBUG] Collision in table 11, bucket 00000101... → 2 items
[DEBUG] Collision in table 12, bucket 10001001... → 2 items
[DEBUG] Collision in table 13, bucket 01000000... → 2 items
[DEBUG] Collision in table 14, bucket 00100001... → 2 items
[DEBUG] Collision in table 15, bucket 01010100... → 2 items
[DEBUG] Collision in table 16, bucket 00000000... 

🔗 Adding vectors to LSH:  15%|██▋               | 3/20 [00:00<00:04,  4.04it/s]

[DEBUG] Collision in table 21, bucket 10000000... → 2 items
[DEBUG] Collision in table 25, bucket 00000110... → 2 items
[DEBUG] Collision in table 4, bucket 00000000... → 2 items
[DEBUG] Collision in table 0, bucket 10000011... → 2 items
[DEBUG] Collision in table 1, bucket 00010010... → 2 items
[DEBUG] Collision in table 2, bucket 00000001... → 2 items
[DEBUG] Collision in table 3, bucket 00000000... → 2 items
[DEBUG] Collision in table 4, bucket 00000000... → 3 items
[DEBUG] Collision in table 5, bucket 10000001... → 2 items
[DEBUG] Collision in table 6, bucket 00001000... → 2 items
[DEBUG] Collision in table 7, bucket 01001000... → 2 items
[DEBUG] Collision in table 8, bucket 00100000... → 2 items
[DEBUG] Collision in table 9, bucket 00001010... → 2 items
[DEBUG] Collision in table 10, bucket 00011111... → 2 items
[DEBUG] Collision in table 11, bucket 01000001... → 2 items
[DEBUG] Collision in table 12, bucket 10000100... → 2 items
[DEBUG] Collision in table 13, bucket 11100000... →

🔗 Adding vectors to LSH:  20%|███▌              | 4/20 [00:01<00:03,  4.00it/s]

[DEBUG] Collision in table 3, bucket 00000000... → 2 items
[DEBUG] Collision in table 0, bucket 10000001... → 3 items
[DEBUG] Collision in table 1, bucket 00100110... → 3 items
[DEBUG] Collision in table 2, bucket 01100001... → 3 items
[DEBUG] Collision in table 3, bucket 00001000... → 3 items
[DEBUG] Collision in table 4, bucket 00000000... → 3 items
[DEBUG] Collision in table 5, bucket 01111100... → 4 items
[DEBUG] Collision in table 6, bucket 00001000... → 3 items
[DEBUG] Collision in table 7, bucket 00000100... → 3 items
[DEBUG] Collision in table 8, bucket 00000000... → 3 items
[DEBUG] Collision in table 9, bucket 00100110... → 3 items
[DEBUG] Collision in table 10, bucket 00011110... → 3 items
[DEBUG] Collision in table 11, bucket 01000001... → 3 items
[DEBUG] Collision in table 12, bucket 10000001... → 3 items
[DEBUG] Collision in table 13, bucket 11100001... → 3 items
[DEBUG] Collision in table 14, bucket 01000001... → 3 items
[DEBUG] Collision in table 15, bucket 10010100... →

🔗 Adding vectors to LSH:  25%|████▌             | 5/20 [00:01<00:03,  3.84it/s]

[DEBUG] Collision in table 22, bucket 00011000... → 2 items
[DEBUG] Collision in table 0, bucket 10000111... → 2 items
[DEBUG] Collision in table 1, bucket 01000010... → 2 items
[DEBUG] Collision in table 2, bucket 11010001... → 2 items
[DEBUG] Collision in table 3, bucket 10010000... → 2 items
[DEBUG] Collision in table 4, bucket 00100000... → 2 items
[DEBUG] Collision in table 5, bucket 10010010... → 2 items
[DEBUG] Collision in table 6, bucket 01000000... → 2 items
[DEBUG] Collision in table 7, bucket 00000001... → 2 items
[DEBUG] Collision in table 8, bucket 00010000... → 2 items
[DEBUG] Collision in table 9, bucket 11001010... → 2 items
[DEBUG] Collision in table 10, bucket 01011110... → 2 items
[DEBUG] Collision in table 11, bucket 00000101... → 2 items
[DEBUG] Collision in table 12, bucket 10001010... → 2 items
[DEBUG] Collision in table 13, bucket 01010001... → 2 items
[DEBUG] Collision in table 14, bucket 01100000... → 2 items
[DEBUG] Collision in table 15, bucket 11000000... 

🔗 Adding vectors to LSH:  30%|█████▍            | 6/20 [00:01<00:03,  3.89it/s]

[DEBUG] Collision in table 1, bucket 00000000... → 2 items
[DEBUG] Collision in table 14, bucket 00100001... → 2 items
[DEBUG] Collision in table 1, bucket 01000000... → 2 items
[DEBUG] Collision in table 28, bucket 10010000... → 2 items
[DEBUG] Collision in table 21, bucket 00010010... → 2 items
[DEBUG] Collision in table 0, bucket 11000111... → 2 items
[DEBUG] Collision in table 13, bucket 11000011... → 2 items
[DEBUG] Collision in table 21, bucket 10010000... → 2 items
[DEBUG] Collision in table 27, bucket 00000010... → 2 items
[DEBUG] Collision in table 0, bucket 11000111... → 3 items
[DEBUG] Collision in table 1, bucket 00011100... → 2 items
[DEBUG] Collision in table 3, bucket 00110010... → 2 items
[DEBUG] Collision in table 5, bucket 00101100... → 2 items
[DEBUG] Collision in table 7, bucket 10000000... → 2 items
[DEBUG] Collision in table 9, bucket 10001010... → 2 items
[DEBUG] Collision in table 10, bucket 01110110... → 2 items
[DEBUG] Collision in table 12, bucket 00000101...

🔗 Adding vectors to LSH:  35%|██████▎           | 7/20 [00:01<00:03,  3.90it/s]

[DEBUG] Collision in table 26, bucket 01011111... → 2 items
[DEBUG] Collision in table 28, bucket 00111000... → 2 items
[DEBUG] Collision in table 0, bucket 00001111... → 2 items
[DEBUG] Collision in table 1, bucket 00011001... → 2 items
[DEBUG] Collision in table 2, bucket 01001001... → 2 items
[DEBUG] Collision in table 3, bucket 01010000... → 2 items
[DEBUG] Collision in table 4, bucket 10000000... → 2 items
[DEBUG] Collision in table 5, bucket 00100010... → 2 items
[DEBUG] Collision in table 6, bucket 11001000... → 2 items
[DEBUG] Collision in table 7, bucket 10000100... → 2 items
[DEBUG] Collision in table 8, bucket 00000101... → 2 items
[DEBUG] Collision in table 9, bucket 00000010... → 2 items
[DEBUG] Collision in table 10, bucket 00110100... → 2 items
[DEBUG] Collision in table 11, bucket 00000001... → 2 items
[DEBUG] Collision in table 12, bucket 10000000... → 2 items
[DEBUG] Collision in table 13, bucket 11000011... → 2 items
[DEBUG] Collision in table 14, bucket 00110000... 

🔗 Adding vectors to LSH:  40%|███████▏          | 8/20 [00:02<00:03,  3.96it/s]

[DEBUG] Collision in table 1, bucket 00011000... → 2 items
[DEBUG] Collision in table 2, bucket 01000001... → 2 items
[DEBUG] Collision in table 11, bucket 01000001... → 2 items
[DEBUG] Collision in table 23, bucket 10000100... → 3 items
[DEBUG] Collision in table 17, bucket 00000000... → 2 items
[DEBUG] Collision in table 14, bucket 01100000... → 2 items
[DEBUG] Collision in table 1, bucket 00011000... → 2 items
[DEBUG] Collision in table 1, bucket 00011000... → 3 items
[DEBUG] Collision in table 5, bucket 10100000... → 2 items
[DEBUG] Collision in table 17, bucket 00000000... → 2 items
[DEBUG] Collision in table 7, bucket 00000100... → 2 items
[DEBUG] Collision in table 1, bucket 00011000... → 2 items
[DEBUG] Collision in table 10, bucket 00011100... → 2 items
[DEBUG] Collision in table 12, bucket 10000000... → 2 items
[DEBUG] Collision in table 11, bucket 00100001... → 2 items
[DEBUG] Collision in table 24, bucket 00000010... → 2 items
[DEBUG] Collision in table 5, bucket 00100101..

🔗 Adding vectors to LSH:  45%|████████          | 9/20 [00:02<00:02,  3.98it/s]

[DEBUG] Collision in table 3, bucket 01010000... → 2 items
[DEBUG] Collision in table 8, bucket 00000101... → 2 items
[DEBUG] Collision in table 12, bucket 10010000... → 2 items
[DEBUG] Collision in table 18, bucket 01001111... → 2 items
[DEBUG] Collision in table 0, bucket 10001111... → 2 items
[DEBUG] Collision in table 1, bucket 01000001... → 2 items
[DEBUG] Collision in table 2, bucket 01000001... → 2 items
[DEBUG] Collision in table 3, bucket 01010000... → 2 items
[DEBUG] Collision in table 4, bucket 00001100... → 2 items
[DEBUG] Collision in table 5, bucket 00100101... → 4 items
[DEBUG] Collision in table 6, bucket 01001000... → 2 items
[DEBUG] Collision in table 7, bucket 00000100... → 2 items
[DEBUG] Collision in table 8, bucket 00000101... → 2 items
[DEBUG] Collision in table 9, bucket 00001000... → 2 items
[DEBUG] Collision in table 10, bucket 01110100... → 2 items
[DEBUG] Collision in table 11, bucket 00000001... → 4 items
[DEBUG] Collision in table 12, bucket 10010000... → 

🔗 Adding vectors to LSH:  50%|████████▌        | 10/20 [00:02<00:02,  3.99it/s]

[DEBUG] Collision in table 3, bucket 00010000... → 4 items
[DEBUG] Collision in table 9, bucket 01100111... → 4 items
[DEBUG] Collision in table 14, bucket 01000000... → 4 items
[DEBUG] Collision in table 20, bucket 00000100... → 4 items
[DEBUG] Collision in table 22, bucket 00010100... → 4 items
[DEBUG] Collision in table 24, bucket 00101000... → 4 items
[DEBUG] Collision in table 28, bucket 10000000... → 4 items
[DEBUG] Collision in table 0, bucket 11010101... → 2 items
[DEBUG] Collision in table 1, bucket 00000001... → 2 items
[DEBUG] Collision in table 2, bucket 10000000... → 2 items
[DEBUG] Collision in table 3, bucket 01010000... → 2 items
[DEBUG] Collision in table 4, bucket 00001000... → 2 items
[DEBUG] Collision in table 5, bucket 00000000... → 2 items
[DEBUG] Collision in table 6, bucket 00000000... → 2 items
[DEBUG] Collision in table 7, bucket 00000100... → 2 items
[DEBUG] Collision in table 8, bucket 10000001... → 2 items
[DEBUG] Collision in table 9, bucket 00100000... → 

🔗 Adding vectors to LSH:  55%|█████████▎       | 11/20 [00:02<00:02,  4.03it/s]

[DEBUG] Collision in table 0, bucket 11000011... → 2 items
[DEBUG] Collision in table 1, bucket 00000001... → 2 items
[DEBUG] Collision in table 3, bucket 01010000... → 4 items
[DEBUG] Collision in table 4, bucket 00101000... → 2 items
[DEBUG] Collision in table 5, bucket 00000010... → 4 items
[DEBUG] Collision in table 6, bucket 01001000... → 2 items
[DEBUG] Collision in table 7, bucket 00010000... → 2 items
[DEBUG] Collision in table 8, bucket 00000001... → 4 items
[DEBUG] Collision in table 9, bucket 00100100... → 3 items
[DEBUG] Collision in table 10, bucket 00011110... → 2 items
[DEBUG] Collision in table 11, bucket 00000101... → 2 items
[DEBUG] Collision in table 12, bucket 00000000... → 2 items
[DEBUG] Collision in table 13, bucket 11100011... → 2 items
[DEBUG] Collision in table 15, bucket 10010010... → 2 items
[DEBUG] Collision in table 16, bucket 00100000... → 2 items
[DEBUG] Collision in table 17, bucket 01001000... → 2 items
[DEBUG] Collision in table 18, bucket 00001000...

🔗 Adding vectors to LSH:  60%|██████████▏      | 12/20 [00:03<00:02,  3.94it/s]

[DEBUG] Collision in table 9, bucket 00101001... → 3 items
[DEBUG] Collision in table 0, bucket 10000001... → 2 items
[DEBUG] Collision in table 1, bucket 10000000... → 2 items
[DEBUG] Collision in table 2, bucket 01100001... → 2 items
[DEBUG] Collision in table 3, bucket 10100000... → 2 items
[DEBUG] Collision in table 4, bucket 00101100... → 2 items
[DEBUG] Collision in table 5, bucket 01101010... → 2 items
[DEBUG] Collision in table 6, bucket 01000001... → 2 items
[DEBUG] Collision in table 7, bucket 00000100... → 2 items
[DEBUG] Collision in table 8, bucket 10000001... → 2 items
[DEBUG] Collision in table 9, bucket 11100000... → 2 items
[DEBUG] Collision in table 10, bucket 01010110... → 2 items
[DEBUG] Collision in table 11, bucket 00001001... → 2 items
[DEBUG] Collision in table 12, bucket 00001001... → 2 items
[DEBUG] Collision in table 13, bucket 10101000... → 2 items
[DEBUG] Collision in table 14, bucket 00110000... → 2 items
[DEBUG] Collision in table 15, bucket 00010001... →

🔗 Adding vectors to LSH:  65%|███████████      | 13/20 [00:03<00:01,  3.97it/s]

[DEBUG] Collision in table 0, bucket 10000110... → 2 items
[DEBUG] Collision in table 1, bucket 00000100... → 2 items
[DEBUG] Collision in table 2, bucket 10100000... → 2 items
[DEBUG] Collision in table 3, bucket 00110000... → 2 items
[DEBUG] Collision in table 4, bucket 00101000... → 2 items
[DEBUG] Collision in table 5, bucket 00001011... → 2 items
[DEBUG] Collision in table 6, bucket 01000011... → 2 items
[DEBUG] Collision in table 7, bucket 00000000... → 2 items
[DEBUG] Collision in table 8, bucket 10110000... → 2 items
[DEBUG] Collision in table 9, bucket 10100110... → 2 items
[DEBUG] Collision in table 10, bucket 01011000... → 2 items
[DEBUG] Collision in table 11, bucket 00000001... → 2 items
[DEBUG] Collision in table 12, bucket 01001000... → 2 items
[DEBUG] Collision in table 13, bucket 10110001... → 2 items
[DEBUG] Collision in table 14, bucket 00000000... → 2 items
[DEBUG] Collision in table 15, bucket 01000001... → 2 items
[DEBUG] Collision in table 16, bucket 00010000... 

🔗 Adding vectors to LSH:  70%|███████████▉     | 14/20 [00:03<00:01,  3.89it/s]

[DEBUG] Collision in table 0, bucket 10000011... → 2 items
[DEBUG] Collision in table 3, bucket 00011000... → 2 items
[DEBUG] Collision in table 6, bucket 00001000... → 3 items
[DEBUG] Collision in table 12, bucket 10000101... → 2 items
[DEBUG] Collision in table 17, bucket 00001000... → 2 items
[DEBUG] Collision in table 25, bucket 00000010... → 2 items
[DEBUG] Collision in table 29, bucket 00000110... → 3 items
[DEBUG] Collision in table 1, bucket 00000000... → 2 items
[DEBUG] Collision in table 24, bucket 00101000... → 2 items
[DEBUG] Collision in table 0, bucket 10000110... → 2 items
[DEBUG] Collision in table 1, bucket 00000000... → 2 items
[DEBUG] Collision in table 2, bucket 10100001... → 2 items
[DEBUG] Collision in table 3, bucket 00010000... → 2 items
[DEBUG] Collision in table 4, bucket 01100000... → 2 items
[DEBUG] Collision in table 5, bucket 00001000... → 2 items
[DEBUG] Collision in table 6, bucket 01001000... → 2 items
[DEBUG] Collision in table 7, bucket 00000000... → 

🔗 Adding vectors to LSH:  75%|████████████▊    | 15/20 [00:03<00:01,  3.75it/s]

[DEBUG] Collision in table 25, bucket 11000010... → 2 items
[DEBUG] Collision in table 26, bucket 00001000... → 2 items
[DEBUG] Collision in table 29, bucket 00000010... → 2 items
[DEBUG] Collision in table 0, bucket 10000010... → 2 items
[DEBUG] Collision in table 2, bucket 00000001... → 2 items
[DEBUG] Collision in table 3, bucket 00010100... → 2 items
[DEBUG] Collision in table 4, bucket 00001000... → 2 items
[DEBUG] Collision in table 5, bucket 00001111... → 2 items
[DEBUG] Collision in table 6, bucket 01000001... → 2 items
[DEBUG] Collision in table 7, bucket 00000000... → 2 items
[DEBUG] Collision in table 8, bucket 11000000... → 2 items
[DEBUG] Collision in table 9, bucket 00001011... → 2 items
[DEBUG] Collision in table 10, bucket 01011001... → 2 items
[DEBUG] Collision in table 11, bucket 10000101... → 2 items
[DEBUG] Collision in table 12, bucket 10001000... → 2 items
[DEBUG] Collision in table 13, bucket 11100100... → 2 items
[DEBUG] Collision in table 14, bucket 00100001...

🔗 Adding vectors to LSH:  80%|█████████████▌   | 16/20 [00:04<00:01,  3.83it/s]

[DEBUG] Collision in table 28, bucket 10010000... → 2 items
[DEBUG] Collision in table 0, bucket 01000001... → 5 items
[DEBUG] Collision in table 1, bucket 10101001... → 5 items
[DEBUG] Collision in table 2, bucket 11000001... → 5 items
[DEBUG] Collision in table 3, bucket 00111101... → 5 items
[DEBUG] Collision in table 4, bucket 01110111... → 5 items
[DEBUG] Collision in table 5, bucket 00001010... → 5 items
[DEBUG] Collision in table 6, bucket 10000001... → 5 items
[DEBUG] Collision in table 7, bucket 01101000... → 5 items
[DEBUG] Collision in table 8, bucket 00000010... → 5 items
[DEBUG] Collision in table 9, bucket 11000000... → 5 items
[DEBUG] Collision in table 10, bucket 00000010... → 5 items
[DEBUG] Collision in table 11, bucket 00100001... → 5 items
[DEBUG] Collision in table 12, bucket 10000000... → 5 items
[DEBUG] Collision in table 13, bucket 01100101... → 5 items
[DEBUG] Collision in table 14, bucket 00000000... → 5 items
[DEBUG] Collision in table 15, bucket 01000000... 

🔗 Adding vectors to LSH:  85%|██████████████▍  | 17/20 [00:04<00:00,  3.75it/s]

[DEBUG] Collision in table 28, bucket 00010000... → 2 items
[DEBUG] Collision in table 5, bucket 01001000... → 2 items
[DEBUG] Collision in table 8, bucket 00000000... → 2 items
[DEBUG] Collision in table 19, bucket 00000110... → 2 items
[DEBUG] Collision in table 26, bucket 01000001... → 2 items
[DEBUG] Collision in table 24, bucket 00101010... → 2 items
[DEBUG] Collision in table 22, bucket 00010100... → 2 items
[DEBUG] Collision in table 6, bucket 00000001... → 2 items
[DEBUG] Collision in table 7, bucket 00000110... → 2 items
[DEBUG] Collision in table 26, bucket 01000001... → 3 items
[DEBUG] Collision in table 2, bucket 00100001... → 2 items
[DEBUG] Collision in table 3, bucket 00010000... → 2 items
[DEBUG] Collision in table 6, bucket 01000000... → 2 items
[DEBUG] Collision in table 8, bucket 00000000... → 2 items
[DEBUG] Collision in table 13, bucket 11100011... → 2 items
[DEBUG] Collision in table 19, bucket 00000110... → 3 items
[DEBUG] Collision in table 22, bucket 00010000..

🔗 Adding vectors to LSH:  90%|███████████████▎ | 18/20 [00:04<00:00,  3.66it/s]

[DEBUG] Collision in table 0, bucket 00011110... → 2 items
[DEBUG] Collision in table 1, bucket 10100000... → 2 items
[DEBUG] Collision in table 2, bucket 10011010... → 2 items
[DEBUG] Collision in table 3, bucket 00001000... → 2 items
[DEBUG] Collision in table 4, bucket 00010000... → 2 items
[DEBUG] Collision in table 5, bucket 00011010... → 2 items
[DEBUG] Collision in table 6, bucket 00100000... → 2 items
[DEBUG] Collision in table 7, bucket 00011001... → 2 items
[DEBUG] Collision in table 8, bucket 00000000... → 2 items
[DEBUG] Collision in table 9, bucket 10100011... → 2 items
[DEBUG] Collision in table 10, bucket 01001011... → 2 items
[DEBUG] Collision in table 11, bucket 00100101... → 2 items
[DEBUG] Collision in table 12, bucket 00000000... → 2 items
[DEBUG] Collision in table 13, bucket 00110000... → 2 items
[DEBUG] Collision in table 14, bucket 00100000... → 2 items
[DEBUG] Collision in table 15, bucket 10010000... → 2 items
[DEBUG] Collision in table 16, bucket 00100101... 

🔗 Adding vectors to LSH:  95%|████████████████▏| 19/20 [00:04<00:00,  3.72it/s]

[DEBUG] Collision in table 0, bucket 10000000... → 2 items
[DEBUG] Collision in table 1, bucket 10000010... → 2 items
[DEBUG] Collision in table 2, bucket 00010010... → 2 items
[DEBUG] Collision in table 3, bucket 11010000... → 2 items
[DEBUG] Collision in table 4, bucket 00100100... → 2 items
[DEBUG] Collision in table 5, bucket 00011110... → 2 items
[DEBUG] Collision in table 6, bucket 01001000... → 2 items
[DEBUG] Collision in table 7, bucket 10000110... → 2 items
[DEBUG] Collision in table 8, bucket 10000010... → 2 items
[DEBUG] Collision in table 9, bucket 11100010... → 2 items
[DEBUG] Collision in table 10, bucket 01110110... → 2 items
[DEBUG] Collision in table 11, bucket 10000010... → 2 items
[DEBUG] Collision in table 12, bucket 01100100... → 2 items
[DEBUG] Collision in table 13, bucket 00000000... → 2 items
[DEBUG] Collision in table 14, bucket 00101000... → 2 items
[DEBUG] Collision in table 15, bucket 10110000... → 2 items
[DEBUG] Collision in table 16, bucket 00110000... 

🔗 Adding vectors to LSH: 100%|█████████████████| 20/20 [00:05<00:00,  3.88it/s]

[DEBUG] Collision in table 0, bucket 10010111... → 4 items
[DEBUG] Collision in table 1, bucket 00000000... → 4 items
[DEBUG] Collision in table 2, bucket 01110000... → 4 items
[DEBUG] Collision in table 4, bucket 00000000... → 4 items
[DEBUG] Collision in table 5, bucket 01101101... → 4 items
[DEBUG] Collision in table 6, bucket 00001010... → 4 items
[DEBUG] Collision in table 7, bucket 00000001... → 4 items
[DEBUG] Collision in table 8, bucket 10000000... → 4 items
[DEBUG] Collision in table 9, bucket 00100111... → 4 items
[DEBUG] Collision in table 10, bucket 00111110... → 4 items
[DEBUG] Collision in table 11, bucket 01000001... → 4 items
[DEBUG] Collision in table 12, bucket 00000101... → 4 items
[DEBUG] Collision in table 13, bucket 01101001... → 4 items
[DEBUG] Collision in table 14, bucket 01000000... → 5 items
[DEBUG] Collision in table 15, bucket 10010100... → 4 items
[DEBUG] Collision in table 16, bucket 00110001... → 4 items
[DEBUG] Collision in table 18, bucket 00001110...




Done! Total time: 5.62 sec


162835

In [11]:
import numpy as np
import time
from tqdm import tqdm

def query_lsh(
    lsh,
    dense_vectors,
    filenames,
    query_text,
    model,
    top_k=5,
    similarity_metric="euclidean"
):
    print("\n [STEP 1] Embedding query text...")
    t0 = time.time()

    query_vector = model.encode([query_text])[0]
    nonzero_features = np.count_nonzero(query_vector)

    print(f"  - Query vector shape: {query_vector.shape}")
    print(f"  - Non-zero embedding dimensions: {nonzero_features}")
    if nonzero_features == 0:
        print("Query vector is empty. Try another query.")
        return []

    print("\n [STEP 2] Querying LSH index...")
    candidate_ids = lsh.query(query_vector)
    t1 = time.time()

    print(f" - LSH returned {len(candidate_ids)} candidates in {t1 - t0:.3f} sec")

    if not candidate_ids:
        print(" No candidates found. Possibly due to hash size or vector sparsity.")
        return []

    print("\n [STEP 3] Matching candidate filenames to vectors...")
    candidate_indices = []
    for f in candidate_ids:
        try:
            idx = filenames.index(f)
            candidate_indices.append(idx)
        except ValueError:
            print(f" Candidate {f} not found in filenames list.")

    if not candidate_indices:
        print(" No valid candidate indices found.")
        return []

    candidate_vectors = dense_vectors[candidate_indices]
    print(f"  - Found {len(candidate_vectors)} vectors for comparison.")

    print("\n [STEP 4] Computing distances...")
    distances = []
    for i, vec in enumerate(tqdm(candidate_vectors, desc="Scoring")):
        if similarity_metric == "cosine":
            denom = np.linalg.norm(query_vector) * np.linalg.norm(vec) + 1e-8
            score = 1 - np.dot(query_vector, vec) / denom
        else:
            score = np.linalg.norm(query_vector - vec)
        distances.append((filenames[candidate_indices[i]], score))

    print("\n [STEP 5] Selecting top matches...")
    sorted_results = sorted(distances, key=lambda x: x[1])[:top_k]

    print(f"\n Top-{top_k} nearest results:")
    for name, score in sorted_results:
        print(f"  • {name} → Score: {score:.4f}")

    return sorted_results

In [18]:
query_text_mit = """
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
query_text_no_license = """
The gentle rustling of leaves echoed through the forest as the sun dipped below the horizon, casting long shadows across the mossy
ground. Birds chirped their final songs of the day while the air grew cooler, carrying the scent of pine and damp earth. It was a
moment of calm, a brief pause in the world's endless motion.
"""

In [19]:
results = query_lsh(lsh, dense_vectors, sample_filenames, query_text_mit, model, top_k=5)


 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 3 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 18 hit → 1 candidates
[DEBUG] Table 19 hit → 1 candidates
[DEBUG] Table 22 hit → 2 candidates
[DEBUG] Table 25 hit → 1 candidates
[DEBUG] Table 28 hit → 3 candidates
[DEBUG] Table 29 hit → 1 candidates
 - LSH returned 4 candidates in 0.592 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...


Scoring: 100%|███████████████████████████████████| 4/4 [00:00<00:00, 681.59it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6.txt → Score: 0.0705
  • Imlib2/Imlib2-20.txt → Score: 0.1726
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.2865
  • Intel/Intel-26.txt → Score: 0.5118





In [20]:
results = query_lsh(lsh, dense_vectors, sample_filenames, query_text_no_license, model, top_k=5)


 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.087 sec
 No candidates found. Possibly due to hash size or vector sparsity.


## Let's try on some more samples

In [13]:
import pandas as pd
df = pd.DataFrame(sample_filenames)

In [17]:
df

Unnamed: 0,0
0,OSL-2.1/OSL-2.1-37_Split_DB_Foss_Licenses.txt
1,OSL-2.1/OSL-2.1-117_Split_DB_Foss_Licenses.txt
2,OSL-2.1/OSL-2.1-141_Split_SPDX_Licenses.txt
3,OSL-2.1/OSL-2.1-206_Split_DB_Foss_Licenses.txt
4,OSL-2.1/OSL-2.1-65_Split_SPDX_Licenses.txt
...,...
9995,GFDL-1.2-only/GFDL-1.2-only-110.txt
9996,GFDL-1.2-only/GFDL-1.2-only-14.txt
9997,GFDL-1.2-only/GFDL-1.2-only-104.txt
9998,GFDL-1.2-only/GFDL-1.2-only-312.txt


In [26]:
unique_licenses = sorted({filename.split("/")[0] for filename in sample_filenames})

In [30]:
len(unique_licenses)

46

In [29]:
unique_licenses

['AAL',
 'BSD-3-Clause-Clear',
 'CATOSL',
 'CC-BY-3.0',
 'CC-BY-SA-3.0-AT',
 'CPL-0.5',
 'CrystalStacker',
 'FTL',
 'Ferris-1.2',
 'GFDL-1.1-no-invariants-or-later',
 'GFDL-1.2',
 'GFDL-1.2-invariants-or-later',
 'GFDL-1.2-only',
 'GFDL-1.3',
 'GFDL-1.3-invariants-or-later',
 'GPL-3.0-only',
 'GPL-3.0-with-bison-exception',
 'Imlib2',
 'Intel',
 'LiLiQ-P-1.1',
 'MITNFA',
 'MX4J',
 'NCGL-UK-2.0',
 'NLOD-1.0',
 'OASIS',
 'OLDAP-2.1',
 'OLDAP-2.6',
 'OLDAP-2.8',
 'OSL-2.1',
 'OpenMarket',
 'Parity-7.0.0',
 'PostgreSQL',
 'RealNetworks-EULA',
 'SCEA',
 'VIM',
 'W3C',
 'WashU',
 'YPL-1.0',
 'YPL-1.1',
 'ZPL-2.0',
 'ZPL-2.1',
 'Zed',
 'ZoneAlarm-EULA',
 'gnu-javamail-exception',
 'info-zip',
 'libtiff']

## Take samples for each license and non license too

In [33]:
import os
import random
from pathlib import Path

project_root = Path("..").resolve()
output_base = project_root / "Combined-Licenses"

def get_license_samples(base_dir):
    license_samples = []
    for subfolder in os.listdir(base_dir):
        full_path = os.path.join(base_dir, subfolder)
        if os.path.isdir(full_path):
            files = [f for f in os.listdir(full_path) if os.path.isfile(os.path.join(full_path, f))]
            if files:
                chosen_file = random.choice(files)
                file_path = os.path.join(full_path, chosen_file)
                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                    content = f.read().strip()
                    license_samples.append([content, 1, subfolder])  # Include folder name
    return license_samples

# Non-license texts: label = 0, folder name = None
non_license_texts = [
    "The quick brown fox jumps over the lazy dog.",
    "Artificial intelligence is transforming the world rapidly.",
    "He bought some apples and oranges from the supermarket.",
    "Python is a popular programming language for data science.",
    "The concert will be held at the downtown auditorium.",
    "Mountains are formed by tectonic plate movement.",
    "User authentication is essential for secure systems.",
    "She traveled to Europe last summer with her family.",
    "Electric vehicles are becoming more common in urban areas.",
    "Coffee consumption has increased during remote work periods.",
    "The novel was set in 19th-century London and Paris.",
    "Biology explores the diversity of life on Earth.",
    "He set up a local server for the development team.",
    "They planned to watch the meteor shower on Friday night.",
    "The history of computing dates back to the 1940s.",
    "Nutrition and exercise are crucial for a healthy life.",
    "The package will be delivered within 3-5 business days.",
    "Global warming poses a serious threat to ecosystems.",
    "The cat knocked over a vase while chasing a fly.",
    "Digital art has gained popularity through NFTs."
]
non_license_samples = [[text, 0, None] for text in non_license_texts]

license_samples = get_license_samples(output_base)
combined = license_samples + non_license_samples
random.shuffle(combined)


print(f"Total samples: {len(combined)} (License: {len(license_samples)}, Non-license: {len(non_license_samples)})")
for i, (text, label, folder) in enumerate(combined[:3]):
    print(f"\nSample {i+1} — Label: {label}, Folder: {folder}\n{text[:200]}...")  # Preview first 200 chars

Total samples: 674 (License: 654, Non-license: 20)

Sample 1 — Label: 1, Folder: Interbase
INTERBASE PUBLIC LICENSE Version 1.0 1 Definitions. 1.0.1 "Commercial Use" means distribution or otherwise making the Covered Code available to a third party. 1.1 ''Contributor'' means each entity tha...

Sample 2 — Label: 1, Folder: Glide
3DFX GLIDE Source Code General Public License 1 PREAMBLE This license is for software that provides a 3D graphics application program interface (API).The license is intended to offer terms similar to ...

Sample 3 — Label: 1, Folder: Font-exception-2.0
As a special exception, if you create a document which uses this font, and embed this font or unaltered portions of this font into the document, this font does not by itself cause the resulting docume...


In [34]:
def query_lsh_filenames_only(
    lsh,
    dense_vectors,
    filenames,
    query_text,
    model,
    top_k=5,
    similarity_metric="euclidean"
):
    results = query_lsh(
        lsh,
        dense_vectors,
        filenames,
        query_text,
        model,
        top_k=top_k,
        similarity_metric=similarity_metric
    )
    return [name for name, _ in results]

In [35]:
import pandas as pd

def evaluate_license_classification(dataset, lsh, dense_vectors, filenames, model):
    results = []

    for i, (text, label, folder) in enumerate(tqdm(dataset, desc="🔍 Evaluating Queries")):
        matched_files = query_lsh_filenames_only(
            lsh, dense_vectors, filenames, text, model, top_k=5
        )

        is_match = len(matched_files) > 0
        correct = (label == 1 and is_match) or (label == 0 and not is_match)

        results.append({
            "query_index": i,
            "text_preview": text[:100].replace("\n", " ") + "...",
            "true_label": label,
            "matched_files": matched_files,
            "correct": correct,
            "license_folder": folder
        })

    df_results = pd.DataFrame(results)
    accuracy = df_results["correct"].mean()
    print(f"\n Overall accuracy: {accuracy * 100:.2f}% ({df_results['correct'].sum()}/{len(df_results)})")
    return df_results


In [36]:
df_eval = evaluate_license_classification(combined, lsh, dense_vectors, sample_filenames, model)

🔍 Evaluating Queries:   0%|                            | 0/674 [00:00<?, ?it/s]


 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   0%|                    | 1/674 [00:01<17:02,  1.52s/it]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.519 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
[DEBUG] Table 7 hit → 1 candidates
 - LSH returned 4 candidates in 0.062 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████████| 4/4 [00:00<00:00, 872.04it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-371.txt → Score: 0.9962
  • SCEA/SCEA-41_Split_SPDX_Licenses.txt → Score: 1.0061
  • SCEA/SCEA-114.txt → Score: 1.0061
  • SCEA/SCEA-119.txt → Score: 1.0884

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 1 candidates in 0.458 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14122.24it/s][A
🔍 Evaluating Queries:   0%|                    | 3/674 [00:02<06:37,  1.69it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • gnu-javamail-exception/gnu-javamail-exception-3_Split_SPDX_Licenses.txt → Score: 0.9034

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   1%|                    | 4/674 [00:02<05:25,  2.06it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.285 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   1%|▏                   | 5/674 [00:02<04:40,  2.38it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.286 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 3 candidates
[DEBUG] Table 7 hit → 3 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Table 18 hit → 1 candidates
[DEBUG] Table 21 hit → 3 candidates
[DEBUG] Table 27 hit → 1 candidates
 - LSH returned 7 candidates in 0.373 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 7 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 7/7 [00:00<00:00, 75475.91it/s][A
🔍 Evaluating Queries:   1%|▏                   | 6/674 [00:03<04:31,  2.46it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-8.txt → Score: 0.2593
  • ZPL-2.1/ZPL-2.1-26.txt → Score: 0.3482
  • ZPL-2.1/ZPL-21-33.txt → Score: 0.3482
  • info-zip/info-zip-28.txt → Score: 0.3521
  • info-zip/info-zip-28_Split_DB_Foss_Licenses.txt → Score: 0.3521

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
 - LSH returned 3 candidates in 0.033 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 38362.54it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-412.txt → Score: 1.0884
  • GFDL-1.3/GFDL-1.3-489.txt → Score: 1.1151
  • Ferris-1.2/Ferris-1.2-403.txt → Score: 1.1446

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 42 candidates
[DEBUG] Table 1 hit → 83 candidates
[DEBUG] Table 2 hit → 40 candidates
[DEBUG] Table 3 hit → 40 candidates
[DEBUG] Table 4 hit → 42 candidates
[DEBUG] Table 5 hit → 42 candidates
[DEBUG] Table 6 hit → 40 candidates
[DEBUG] Table 7 hit → 42 candidates
[DEBUG] Table 8 hit → 46 candidates
[DEBUG] Table 9 hit → 42 candidates
[DEBUG] Table 10 hit → 40 candidates
[DEBUG] Table 11 hit → 42 candidates
[DEBUG] Table 12 hit → 40 candidates
[DEBUG] Table 13 hit → 40 candidates
[DEBUG] Table 14 hit → 42 candidates
[DEBUG] Table 15 hit → 44 candidates
[DEBUG] Table 16 hit → 42 candidates
[DEBUG] Table 17 hit → 45 candidates


Scoring: 100%|████████████████████████████| 167/167 [00:00<00:00, 162328.80it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27.txt → Score: 0.0000
  • OLDAP-2.6/OLDAP-2.6-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • OLDAP-2.6/OLDAP-2.6-25.txt → Score: 0.0000
  • OLDAP-2.6/OLDAP-2.6-24.txt → Score: 0.0000
  • OLDAP-2.6/OLDAP-2.6-26.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1233 candidates
[DEBUG] Table 2 hit → 1236 candidates
[DEBUG] Table 3 hit → 1236 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 1236 candidates
[DEBUG] Table 6 hit → 1236 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1236 candidates
[DEBUG] Table 9 hit → 1236 candidates
[DEBUG] Table 10 hit → 1236 candidates
[DEBUG] Table 11 hit → 1233 candidates
[DEBUG] Table 12 hit → 1233 candidates
[DEBUG] Table 13 hit → 1242 candidates
[DEBUG] Tab




  - Found 2900 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 115509.35it/s][A
🔍 Evaluating Queries:   1%|▎                   | 9/674 [00:03<02:41,  4.12it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 18641.35it/s][A
🔍 Evaluating Queries:   1%|▎                  | 10/674 [00:05<07:04,  1.56it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   2%|▎                  | 11/674 [00:07<09:57,  1.11it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.748 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   2%|▎                  | 12/674 [00:08<11:48,  1.07s/it]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.574 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 234 candidates
[DEBUG] Table 1 hit → 472 candidates
[DEBUG] Table 2 hit → 472 candidates
[DEBUG] Table 3 hit → 472 candidates
[DEBUG] Table 4 hit → 472 candidates
[DEBUG] Table 5 hit → 472 candidates
[DEBUG] Table 6 hit → 472 candidates
[DEBUG] Table 7 hit → 472 candidates
[DEBUG] Table 8 hit → 472 candidates
[DEBUG] Table 9 hit → 472 candidates
[DEBUG] Table 10 hit → 234 candidates
[DEBUG] Table 11 hit → 472 candidates
[DEBUG] Table 12 hit → 472 candidates
[DEBUG] Table 13 hit → 472 candidates
[DEBUG] Table 14 hit → 472 candidates
[DEBUG] Table 15 hit → 472 candidates
[DEBUG] Table 16 hit → 472 candidates
[DEBUG] Table


Scoring: 100%|████████████████████████████| 472/472 [00:00<00:00, 201970.16it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • YPL-1.1/YPL-1.1-108.txt → Score: 0.0000
  • YPL-1.1/YPL-1.1-92.txt → Score: 0.0000
  • YPL-1.1/YPL-1.1-26_Split_SPDX_Licenses.txt → Score: 0.0000
  • YPL-1.1/YPL-1.1-1.txt → Score: 0.0000
  • YPL-1.1/YPL-1.1-55.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:   2%|▍                  | 14/674 [00:09<07:23,  1.49it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.162 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 3 hit → 1 candidates
 - LSH returned 1 candidates in 0.043 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 7463.17it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • AAL/AAL-17.txt → Score: 0.9957

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:   2%|▍                  | 16/674 [00:09<05:56,  1.84it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.604 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 1


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 17360.53it/s][A
🔍 Evaluating Queries:   3%|▌                  | 19/674 [00:09<03:29,  3.13it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.084 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 1 candidates
 - LSH returned 1 candidates in 0.017 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12052.60it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-419.txt → Score: 1.0815

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 24 hit → 3 candidates
 - LSH returned 3 candidates in 0.052 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 25627.11it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-369_Split_SPDX_Licenses.txt → Score: 0.8469
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-369.txt → Score: 0.8469
  • GFDL-1.3/GFDL-1.3-386.txt → Score: 0.8469

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 6 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 6 candidates
[DEBUG] Table 15 hit → 5 candidates
[DEBUG] Table 16 hit → 5 candidates
[DEBUG] Table 17 hit → 5 candidates
[DEBUG] Table 18 hit → 5 candidates
[DEBUG] Table 19 hit → 5 candidates
[DEBUG] Table 20 hit → 5 candidates
[DEBUG] Table 21 hit → 5 candidates
[DEBUG] Table 22 hit → 6 candidates
[DEBUG] Table 23 hit → 5 candidates
[DEBUG] Table 24 hit → 5 candidates
[DEBUG]


Scoring: 100%|█████████████████████████████████| 6/6 [00:00<00:00, 42871.93it/s][A
🔍 Evaluating Queries:   3%|▌                  | 22/674 [00:10<02:30,  4.32it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-20-32.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-38.txt → Score: 0.0000
  • ZPL-2.1/ZPL-2.1-25.txt → Score: 0.0000
  • ZPL-2.1/ZPL-21-32.txt → Score: 0.0000
  • Intel/Intel-22.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.164 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 25 hit → 479 candidates
 - LSH returned 479 candidates in 0.040 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 479 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 163485.36it/s][A
🔍 Evaluating Queries:   4%|▋                  | 24/674 [00:10<02:11,  4.94it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.9286
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.9286
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.9286
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.9286
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.9286

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.045 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   4%|▋                  | 26/674 [00:10<01:53,  5.72it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.161 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 27 hit → 1 candidates
 - LSH returned 1 candidates in 0.024 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 11214.72it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-20-13.txt → Score: 0.9983

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 4 hit → 1 candidates
 - LSH returned 1 candidates in 0.152 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14463.12it/s][A
🔍 Evaluating Queries:   4%|▊                  | 29/674 [00:10<01:29,  7.19it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-2.0-37.txt → Score: 1.0679

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 5 candidates
[DEBUG] Table 15 hit → 5 candidates
[DEBUG] Table 16 hit → 5 candidates
[DEBUG] Table 17 hit → 5 candidates
[DEBUG] Table 18 hit → 5 candidates
[DEBUG] Table 19 hit → 5 candidates
[DEBUG] Table 20 hit → 5 candidates
[DEB


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 55043.36it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 1 candidates in 0.028 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 17331.83it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-386.txt → Score: 1.0672

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 2 candidates in 0.036 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 26715.31it/s][A
🔍 Evaluating Queries:   5%|▉                  | 33/674 [00:10<00:59, 10.76it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-245.txt → Score: 0.6086
  • CC-BY-3.0/CC-BY-3.0-157_Split_SPDX_Licenses.txt → Score: 0.6086

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 8 hit → 3 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 27165.18it/s][A
🔍 Evaluating Queries:   5%|▉                  | 35/674 [00:11<00:58, 10.89it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6.txt → Score: 0.1160
  • Imlib2/Imlib2-20.txt → Score: 0.1925
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.2866
  • ZPL-2.1/ZPL-2.1-32.txt → Score: 0.4597
  • Intel/Intel-26.txt → Score: 0.5110

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   5%|█                  | 37/674 [00:11<01:11,  8.93it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.305 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.041 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.054 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 20 hit → 479 candidates
 - LSH returned 479 candidates in 0.029 sec

 [STEP 3] Matching candidate file


Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 194113.20it/s][A
🔍 Evaluating Queries:   6%|█                  | 39/674 [00:11<01:02, 10.12it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.3580

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   6%|█▏                 | 43/674 [00:11<00:47, 13.42it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.032 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.070 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer

🔍 Evaluating Queries:   7%|█▎                 | 46/674 [00:11<00:51, 12.21it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.236 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.696 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   7%|█▎                 | 48/674 [00:13<03:00,  3.47it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.261 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   7%|█▍                 | 50/674 [00:14<02:37,  3.95it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.252 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.038 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   8%|█▌                 | 54/674 [00:14<01:43,  5.97it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.244 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.023 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.068 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 6/6 [00:00<00:00, 31575.69it/s][A
🔍 Evaluating Queries:   8%|█▌                 | 56/674 [00:14<01:45,  5.85it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • gnu-javamail-exception/gnu-javamail-exception-2.txt → Score: 0.0000
  • gnu-javamail-exception/gnu-javamail-exception-1_Split_SPDX_Licenses.txt → Score: 0.0405
  • gnu-javamail-exception/gnu-javamail-exception-3.txt → Score: 0.0514
  • gnu-javamail-exception/gnu-javamail-exception-2_Split_SPDX_Licenses.txt → Score: 0.2178
  • gnu-javamail-exception/gnu-javamail-exception-1.txt → Score: 0.2178

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 153 candidates
[DEBUG] Table 1 hit → 153 candidates
[DEBUG] Table 2 hit → 153 candidates
[DEBUG] Table 3 hit → 153 candidates
[DEBU


Scoring: 100%|████████████████████████████| 153/153 [00:00<00:00, 141818.46it/s][A
🔍 Evaluating Queries:   9%|█▋                 | 60/674 [00:15<01:06,  9.18it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-52.txt → Score: 0.0000
  • CPL-0.5/CPL-0.5-151.txt → Score: 0.0000
  • CPL-0.5/CPL-0.5-84.txt → Score: 0.0000
  • CPL-0.5/CPL-0.5-156.txt → Score: 0.0000
  • CPL-0.5/CPL-0.5-11.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.015 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.042 sec
 No candidates found. Possibly due to hash siz

🔍 Evaluating Queries:   9%|█▋                 | 62/674 [00:15<00:59, 10.26it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.066 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.251 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:   9%|█▊                 | 64/674 [00:15<01:23,  7.31it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.253 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  10%|█▊                 | 66/674 [00:15<01:24,  7.21it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.253 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Table 18 hit → 1 candidates
[DEBUG] Table 26 hit → 1 candidates
 - LSH returned 2 candidates in 0.252 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 26973.02it/s][A
🔍 Evaluating Queries:  10%|█▉                 | 68/674 [00:16<01:26,  7.03it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-4.txt → Score: 0.1537
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-2.txt → Score: 0.3269

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.014 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 582 candidates
[DEBUG] Table 1 hit → 582 candidates
[DEBUG] Table 2 hit → 582 candidates
[DEBUG] Table 3 hit → 582 


Scoring: 100%|████████████████████████████| 947/947 [00:00<00:00, 217608.39it/s][A
🔍 Evaluating Queries:  11%|██                 | 71/674 [00:16<01:02,  9.68it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-123_Split_SPDX_Licenses.txt → Score: 0.0000
  • OSL-2.1/OSL-2.1-85_Split_DB_Foss_Licenses.txt → Score: 0.0000
  • OSL-2.1/OSL-2.1-79_Split_DB_Foss_Licenses.txt → Score: 0.0000
  • OSL-2.1/OSL-2.1-152_Split_SPDX_Licenses.txt → Score: 0.0000
  • OSL-2.1/OSL-2.1-178.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
 - LSH returned 2 candidates in 0.022 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 19972.88it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-24_Split_SPDX_Licenses.txt → Score: 1.0452
  • SCEA/SCEA-130.txt → Score: 1.1600

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  11%|██                 | 74/674 [00:16<01:00,  9.84it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.240 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 1


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12826.62it/s][A
🔍 Evaluating Queries:  11%|██▏                | 76/674 [00:16<01:05,  9.07it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CrystalStacker/CrystalStacker-18.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.255 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
 - LSH returned 2 candidates in 0.099 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25040.62it/s][A
🔍 Evaluating Queries:  12%|██▏                | 78/674 [00:17<01:16,  7.81it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-453.txt → Score: 1.1819
  • GFDL-1.2/GFDL-1.2-423.txt → Score: 1.1819

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  12%|██▏                | 79/674 [00:17<01:26,  6.86it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.240 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Table 25 hit → 718 candidates
 - LSH returned 719 candidates in 0.035 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 719 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 719/719 [00:00<00:00, 211346.60it/s][A
🔍 Evaluating Queries:  12%|██▎                | 81/674 [00:17<01:10,  8.44it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 1.0033
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 1.0033
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 1.0033
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 1.0033
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 1.0033

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.247 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12787.51it/s][A
🔍 Evaluating Queries:  12%|██▎                | 83/674 [00:17<01:13,  7.99it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2/GFDL-1.2-460.txt → Score: 1.1416

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH retu

🔍 Evaluating Queries:  13%|██▍                | 87/674 [00:18<01:00,  9.78it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.250 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.263 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 7 hit → 1 candidates
 - LSH returned 1 candidates in 0.041 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████████| 1/1 [00:00<00:00, 683.00it/s][A
🔍 Evaluating Queries:  14%|██▌                | 92/674 [00:18<00:52, 11.14it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • gnu-javamail-exception/gnu-javamail-exception-3_Split_SPDX_Licenses.txt → Score: 1.0578

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.032 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.043 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 38


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 215197.56it/s][A
🔍 Evaluating Queries:  14%|██▋                | 94/674 [00:18<00:57, 10.11it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.055 sec
 No candidates found. Possibly due to hash size or vec


Scoring: 100%|██████████████████████████████| 62/62 [00:00<00:00, 188850.29it/s][A
🔍 Evaluating Queries:  14%|██▋                | 97/674 [00:19<00:44, 12.92it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-27.txt → Score: 0.4277
  • ZPL-2.1/ZPL-2.1-4.txt → Score: 0.4298
  • ZPL-2.1/ZPL-2.1-20_Split_SPDX_Licenses.txt → Score: 0.4298
  • ZPL-2.0/ZPL-2.0-5_Split_SPDX_Licenses.txt → Score: 0.4356
  • ZPL-2.1/ZPL-2.1-10.txt → Score: 0.4374

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.023 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  15%|██▊                | 99/674 [00:19<00:52, 10.85it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.249 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 20 hit → 42 candidates
[DEBUG] Table 23 hit → 4 candidates
[DEBUG] Table 29 hit → 4 candidates
 - LSH returned 46 candidates in 0.026 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 46 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████| 46/46 [00:00<00:00, 166757.12it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-6.txt → Score: 0.3649
  • OLDAP-2.6/OLDAP-2.6-27_Split_SPDX_Licenses.txt → Score: 0.3649
  • OLDAP-2.6/OLDAP-2.6-1_Split_SPDX_Licenses.txt → Score: 0.3776
  • OLDAP-2.6/OLDAP-2.6-22.txt → Score: 0.3776
  • OLDAP-2.6/OLDAP-2.6-5_Split_SPDX_Licenses.txt → Score: 0.3936

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.260 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12052.60it/s][A
🔍 Evaluating Queries:  15%|██▋               | 101/674 [00:19<01:01,  9.38it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • SCEA/SCEA-147.txt → Score: 0.9391

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 1322 candidates
[DEBUG] Table 15 hit → 1326 candidates
[DEBUG] Table 16 hit → 1648 candidates
[DEBUG] Table 17 hit → 2889 candidates
[DEBUG] Table 18 hit → 2555 candidates
[DEBUG] Table 19 hit → 1649 cand


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 197334.81it/s][A
🔍 Evaluating Queries:  15%|██▊               | 103/674 [00:19<01:09,  8.25it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 17 hit → 1 candidates
 - LSH returned 1 candidates in 0.244 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing d


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 15650.39it/s][A
🔍 Evaluating Queries:  16%|██▊               | 105/674 [00:20<01:12,  7.88it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-19.txt → Score: 0.4381

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.049 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 487 candidates
[DEBUG] Table 1 hit → 488 candidates
[DEBUG] Table 2 hit → 484 candidates
[DEBUG] Table 3 hit → 484 candidates
[DEBUG] Table 4 hit → 487 candidates
[DEBUG] Table 5 hit → 485 candidates
[DEBUG] Table 6 hit → 484 candidates
[DEBUG] Tab


Scoring: 100%|████████████████████████████| 489/489 [00:00<00:00, 214496.41it/s][A
🔍 Evaluating Queries:  16%|██▊               | 107/674 [00:20<01:02,  9.13it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZoneAlarm-EULA/ZoneAlarm-EULA-368.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-72.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-435.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-342.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-481.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
 - LSH returned 1 candidates in 0.025 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 16980.99it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.6944

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 29 hit → 2 candidates
 - LSH returned 2 candidates in 0.034 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 24036.13it/s][A
🔍 Evaluating Queries:  16%|██▉               | 111/674 [00:20<00:40, 13.82it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-67_Split_SPDX_Licenses.txt → Score: 0.5065
  • CC-BY-3.0/CC-BY-3.0-215.txt → Score: 0.5065

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  17%|███               | 115/674 [00:21<01:16,  7.28it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.828 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.103 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.040 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.042 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|██████████████████████████████████| 2/2 [00:00<00:00, 8330.30it/s][A
🔍 Evaluating Queries:  17%|███               | 117/674 [00:21<01:04,  8.63it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • YPL-1.1/YPL-1.1-140.txt → Score: 1.2232
  • YPL-1.0/YPL-1.0-140.txt → Score: 1.2232

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  18%|███▏              | 119/674 [00:21<01:09,  7.95it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.269 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 33 candidates
[DEBUG] Table 1 hit → 32 candidates
[DEBUG] Table 2 hit → 33 candidates
[DEBUG] Table 3 hit → 32 candidates
[DEBUG] Table 4 hit → 31 candidates
[DEBUG] Table 5 hit → 31 candidates
[DEBUG] Table 6 hit → 31 candidates
[DEBUG] Table 7 hit → 31 candidates
[DEBUG] Table 8 hit → 32 candidates
[DEBUG] Table 9 hit → 32 candidates
[DEBUG] Table 10 hit → 33 candidates
[DEBUG] Table 11 hit → 32 candidates
[DEBUG] Table 12 hit → 32 candidates
[DEBUG] Table 13 hit → 32 candidates
[DEBUG] Table 14 hit → 32 candidates
[DEBUG] Table 15 hit → 31 candidates
[DEBUG] Table 16 hit → 32 candidates
[DEBUG] Table 17 hit → 33 cand


Scoring: 100%|██████████████████████████████| 37/37 [00:00<00:00, 161487.25it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Parity-7.0.0/Parity-7.0.0-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • Parity-7.0.0/Parity-7.0.0-36_Split_SPDX_Licenses.txt → Score: 0.0000
  • Parity-7.0.0/Parity-7.0.0-22_Split_SPDX_Licenses.txt → Score: 0.0000
  • Parity-7.0.0/Parity-7.0.0-10_Split_SPDX_Licenses.txt → Score: 0.0000
  • Parity-7.0.0/Parity-7.0.0-32_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  18%|███▎              | 123/674 [00:22<01:27,  6.28it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.731 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.042 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.079 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13530.01it/s][A
🔍 Evaluating Queries:  19%|███▎              | 125/674 [00:23<01:25,  6.45it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-7.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  20%|███▌              | 132/674 [00:23<00:47, 11.41it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.257 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.027 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 217913.25it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  20%|███▌              | 134/674 [00:23<00:55,  9.77it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 1


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 15363.75it/s][A
🔍 Evaluating Queries:  20%|███▋              | 136/674 [00:24<00:59,  9.01it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-388.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.249 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  20%|███▋              | 138/674 [00:24<01:20,  6.69it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.270 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 23 hit → 2 candidates
 - LSH returned 2 candidates in 0.045 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25420.02it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-3_Split_SPDX_Licenses.txt → Score: 0.9077
  • ZPL-2.0/ZPL-2.0-35.txt → Score: 0.9077

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  21%|███▋              | 140/674 [00:24<01:20,  6.63it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.261 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  21%|███▊              | 142/674 [00:25<01:18,  6.74it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.257 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.028 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|████████████████████████████| 718/718 [00:00<00:00, 182173.51it/s][A
🔍 Evaluating Queries:  22%|███▉              | 146/674 [00:25<00:53,  9.86it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.2497
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.2497
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.2497
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.2497
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.2497

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.028 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 9 candidates
 - LSH returned 9 candidates in 0.022 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 9 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 9/9 [00:00<00:00, 79638.68it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-57_Split_DB_Foss_Licenses.txt → Score: 0.5237
  • OSL-2.1/OSL-2.1-147_Split_SPDX_Licenses.txt → Score: 0.5237
  • OSL-2.1/OSL-2.1-57.txt → Score: 0.5237
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-4.txt → Score: 0.9065
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-3.txt → Score: 0.9065

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1233 candidates
[DEBUG] Table 2 hit → 1236 candidates
[DEBUG] Table 3 hit → 1236 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 1236 candidates
[DEBUG] Table 6 hit → 1236 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1236 candidates
[DEBUG] Table 9 hit → 1236 candidates
[DEBUG] Table 10 hit → 1236 candidates
[DEBUG] Table 11 hit → 1233 candidates
[DEBUG] Table 12 hit → 1233 candidates
[DE


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 159701.19it/s][A
🔍 Evaluating Queries:  22%|███▉              | 149/674 [00:25<00:52, 10.08it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.040 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  23%|████              | 152/674 [00:25<00:53,  9.75it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.263 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 2 candidates in 0.031 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 24745.16it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-245.txt → Score: 0.6140
  • CC-BY-3.0/CC-BY-3.0-157_Split_SPDX_Licenses.txt → Score: 0.6140

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  23%|████              | 154/674 [00:26<00:58,  8.95it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.251 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 1322 candidates
[DEBUG] Table 15 hit → 1326 candidates
[DEBUG] Table 16 hit → 1648 candida


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 223241.85it/s][A
🔍 Evaluating Queries:  23%|████▏             | 155/674 [00:26<01:05,  7.92it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.027 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  23%|████▏             | 157/674 [00:26<01:08,  7.54it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.269 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 1 hit → 482 candidates
[DEBUG] Table 2 hit → 478 candidates
[DEBUG] Table 3 hit → 478 candidates
[DEBUG] Table 4 hit → 478 candidates
[DEBUG] Table 5 hit → 478 candidates
[DEBUG] Table 6 hit → 478 candidates
[DEBUG] Table 7 hit → 478 candidates
[DEBUG] Table 8 hit → 479 candidates
[DEBUG] Table 9 hit → 478 candidates
[DEBUG] Table 10 hit → 478 candidates
[DEBUG] Table 11 hit → 478 candidates
[DEBUG] Table 12 hit → 478 candidates
[DEBUG] Table 13 hit → 481 candidates
[DEBUG] Table 14 hit → 479 candidates
[DEBUG] Table 15 hit → 478 candidates
[DEBUG] Table 16 hit → 478 candidates
[DEBUG] Table


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 179189.97it/s][A
🔍 Evaluating Queries:  23%|████▏             | 158/674 [00:26<01:11,  7.22it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  24%|████▏             | 159/674 [00:27<01:22,  6.21it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.253 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.053 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 4 candidates
 - LSH returned 4 candidates in 0.035 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors fo


Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 34807.50it/s][A
🔍 Evaluating Queries:  24%|████▎             | 162/674 [00:27<00:53,  9.57it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-2.0-4.txt → Score: 0.9992
  • NCGL-UK-2.0/NCGL-UK-2.0-2.txt → Score: 0.9992
  • NCGL-UK-2.0/NCGL-UK-2.0-3.txt → Score: 0.9992
  • NCGL-UK-2.0/NCGL-UK-2.0-1.txt → Score: 0.9992

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.259 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1233 candidates
[DEBUG] Table 2 hit → 1236 candidates
[DEBUG] Table 3 hit → 1236 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 1236 candidates
[DEBUG] Table 6 hit → 1236 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1236 candidates
[DEBUG] Tab


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 209052.00it/s][A
🔍 Evaluating Queries:  25%|████▍             | 167/674 [00:27<00:53,  9.46it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 


Scoring: 100%|█████████████████████████████████| 8/8 [00:00<00:00, 62484.98it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-283.txt → Score: 0.9843
  • GFDL-1.2-only/GFDL-1.2-only-129.txt → Score: 0.9940
  • GFDL-1.2/GFDL-1.2-129_Split_SPDX_Licenses.txt → Score: 0.9940
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-129.txt → Score: 0.9940
  • GFDL-1.2/GFDL-1.2-39.txt → Score: 0.9940

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  25%|████▌             | 169/674 [00:28<00:46, 10.97it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.016 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 27 hit → 1 candidates
 - LSH returned 1 candidates in 0.243 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 7570.95it/s][A
🔍 Evaluating Queries:  25%|████▌             | 171/674 [00:28<00:51,  9.77it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OASIS/OASIS-45.txt → Score: 1.0561

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.248 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14768.68it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-396.txt → Score: 0.9261

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 9 hit → 4 candidates
[DEBUG] Table 10 hit → 4 candidates
 - LSH returned 4 candidates in 0.028 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 52593.15it/s][A
🔍 Evaluating Queries:  26%|████▋             | 176/674 [00:28<00:42, 11.70it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-2.0-3_Split_SPDX_Licenses.txt → Score: 0.5149
  • NCGL-UK-2.0/NCGL-UK-2.0-4_Split_SPDX_Licenses.txt → Score: 0.5149
  • NCGL-UK-2.0/NCGL-UK-2.0-1_Split_SPDX_Licenses.txt → Score: 0.5149
  • NCGL-UK-2.0/NCGL-UK-2.0-2_Split_SPDX_Licenses.txt → Score: 0.5149

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candida

🔍 Evaluating Queries:  26%|████▊             | 178/674 [00:29<00:57,  8.57it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.381 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 19 hit → 718 candidates
 - LSH returned 718 candidates in 0.062 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 718 vec


Scoring: 100%|████████████████████████████| 718/718 [00:00<00:00, 236902.95it/s][A
🔍 Evaluating Queries:  27%|████▊             | 181/674 [00:29<00:45, 10.74it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.5826
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.5826
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.5826
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.5826
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.5826

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 19 hit → 1 candidates
 - LSH returned 1 candidates in 0.030 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 16384.00it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-172.txt → Score: 0.8504

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 21 hit → 2 candidates
 - LSH returned 4 candidates in 0.687 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 49200.05it/s][A
🔍 Evaluating Queries:  27%|████▉             | 184/674 [00:29<01:11,  6.87it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-383.txt → Score: 0.9087
  • GFDL-1.3/GFDL-1.3-460.txt → Score: 0.9790
  • GFDL-1.2/GFDL-1.2-457.txt → Score: 1.0300
  • GFDL-1.3/GFDL-1.3-487.txt → Score: 1.0431

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 2 candidates
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 12 hit → 2 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 16 hit → 2 candidates
[DEBUG] Table


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25266.89it/s][A
🔍 Evaluating Queries:  28%|████▉             | 186/674 [00:30<01:09,  6.97it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MX4J/MX4J-1.txt → Score: 0.0000
  • MX4J/MX4J-3.txt → Score: 0.0655

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.027 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 2 candidates in 0.240 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25970.92it/s][A
🔍 Evaluating Queries:  28%|█████             | 188/674 [00:30<01:08,  7.05it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-33.txt → Score: 0.5702
  • OLDAP-2.8/OLDAP-2.8-30.txt → Score: 0.6671

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.060 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  28%|█████             | 190/674 [00:30<01:10,  6.91it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.244 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  29%|█████▏            | 193/674 [00:31<01:01,  7.78it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.241 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Table 14 hit → 1 candidates
[DEBUG] Table 15 hit → 1 candidates
[DEBUG] Table 16 hit → 1 candidates
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13842.59it/s][A
🔍 Evaluating Queries:  29%|█████▏            | 194/674 [00:31<01:09,  6.86it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-188.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  29%|█████▏            | 195/674 [00:31<01:17,  6.15it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.242 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 20 hit → 479 candidates
 - LSH returned 479 candidates in 0.072 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 479 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 167898.35it/s][A
🔍 Evaluating Queries:  29%|█████▏            | 196/674 [00:31<01:13,  6.54it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.3580

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 24 hit → 6 candidates
 - LSH returned 484 candidates in 0.068 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 484 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 223671.57it/s][A
🔍 Evaluating Queries:  29%|█████▎            | 197/674 [00:31<01:08,  6.98it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.4778

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 15 hit → 21 candidates
 - LSH returned 21 candidates in 0.030 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 21 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████| 21/21 [00:00<00:00, 120000.52it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-26_Split_SPDX_Licenses.txt → Score: 0.8253
  • ZPL-2.0/ZPL-2.0-23_Split_SPDX_Licenses.txt → Score: 0.8253
  • ZPL-2.0/ZPL-2.0-20_Split_SPDX_Licenses.txt → Score: 0.8253
  • ZPL-2.0/ZPL-2.0-14_Split_SPDX_Licenses.txt → Score: 0.8253
  • ZPL-2.0/ZPL-2.0-10_Split_SPDX_Licenses.txt → Score: 0.8253

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 7 hit → 2 candidates
 - LSH returned 2 candidates in 0.018 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 27776.85it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-11_Split_SPDX_Licenses.txt → Score: 0.9030
  • W3C/W3C-13.txt → Score: 0.9030

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 1 candidates
 - LSH returned 1 candidates in 0.025 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 17549.39it/s][A
🔍 Evaluating Queries:  30%|█████▎            | 201/674 [00:31<00:36, 12.97it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-419.txt → Score: 1.1602

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.038 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  30%|█████▍            | 203/674 [00:32<00:44, 10.68it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.231 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 10 candidates
[DEBUG] Table 1 hit → 13 candidates
[DEBUG] Table 2 hit → 10 candidates
[DEBUG] Table 3 hit → 11 candidates
[DEBUG] Table 4 hit → 10 candidates
[DEBUG] Table 5 hit → 10 candidates
[DEBUG] Table 6 hit → 10 candidates
[DEBUG] Table 7 hit → 10 candidates
[DEBUG] Table 8 hit → 10 candidates
[DEBUG] Table 9 hit → 10 candidates
[DEBUG] Table 10 hit → 11 candidates
[DEBUG] Table 11 hit → 13 candidates
[DEBUG] Table 12 hit → 10 candidates
[DEBUG] Table 13 hit → 11 candidates
[DEBUG] Table 14 hit → 10 candidates
[DEBUG] Table 15 hit → 10 candidates
[DEBUG] Table 16 hit → 10 candidates
[DEBUG] Table 17 hit → 12 cand


Scoring: 100%|███████████████████████████████| 16/16 [00:00<00:00, 88301.14it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OpenMarket/OpenMarket-14.txt → Score: 0.0000
  • OpenMarket/OpenMarket-4.txt → Score: 0.0000
  • OpenMarket/OpenMarket-1.txt → Score: 0.0000
  • OpenMarket/OpenMarket-15.txt → Score: 0.0000
  • OpenMarket/OpenMarket-10.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 18 hit → 1 candidates
 - LSH returned 1 candidates in 0.020 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14074.85it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-20.txt → Score: 0.7143

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  31%|█████▌            | 209/674 [00:32<00:34, 13.37it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.244 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.014 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 16384.00it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OASIS/OASIS-45.txt → Score: 1.1754

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 1322 candidates
[DEBUG] Table 15 hit → 1326 candidates
[DEBUG] Table 16 hit → 1648 candidates
[DEBUG] Table 17 hit → 2889 candidates
[DEBUG] Table 18 hit → 2555 candidates
[DEBUG] Table 19 hit → 1649 candidates
[DEBUG] Table 20 hit → 1653 candidates
[DEBUG] Table 21 hit → 1652 candidates
[DEBUG] Table 22 hit → 2558 candidates
[DEBUG] Ta


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 216385.01it/s][A
🔍 Evaluating Queries:  32%|█████▋            | 213/674 [00:32<00:36, 12.70it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 1


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 63550.06it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  32%|█████▋            | 215/674 [00:33<00:43, 10.55it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.262 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 2 hit → 2 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 2 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 2 candidates
[DEBUG] Table 10 hit → 2 candidates
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 12 hit → 2 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 16 hit → 2 candidates
[DEBUG] Table 17 hit → 2 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 26296.58it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-211_Split_DB_Foss_Licenses.txt → Score: 0.0000
  • OSL-2.1/OSL-2.1-211.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 24 hit → 3 candidates
 - LSH returned 3 candidates in 0.045 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 25165.82it/s][A
🔍 Evaluating Queries:  32%|█████▊            | 217/674 [00:33<00:49,  9.32it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-369_Split_SPDX_Licenses.txt → Score: 0.8469
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-369.txt → Score: 0.8469
  • GFDL-1.3/GFDL-1.3-386.txt → Score: 0.8469

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 8 candidates
[DEBUG] Table 1 hit → 7 candidates
[DEBUG] Table 2 hit → 2 candidates
[DEBUG] Table 3 hit → 6 candidates
[DEBUG] Table 4 hit → 4 candidates
[DEBUG] Table 5 hit → 4 candidates
[DEBUG] Table 6 hit → 2 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 8 hit → 6 candidates
[DEBUG] Table 9 hit → 2 candidates
[DEBUG] Table 10 hit → 4 candidates
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 12 hit → 4 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 15 hit → 4 candidates
[DEBUG] Table 16 hit → 2 candidates
[


Scoring: 100%|██████████████████████████████| 45/45 [00:00<00:00, 146199.60it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.1/ZPL-2.1-15.txt → Score: 0.0000
  • ZPL-2.1/ZPL-2.1-13_Split_SPDX_Licenses.txt → Score: 0.0000
  • ZPL-2.1/ZPL-2.1-1.txt → Score: 0.1101
  • ZPL-2.1/ZPL-2.1-11_Split_SPDX_Licenses.txt → Score: 0.1101
  • ZPL-2.1/ZPL-2.1-4_Split_SPDX_Licenses.txt → Score: 0.2399

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 2 candidates in 0.030 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 24456.58it/s][A
🔍 Evaluating Queries:  32%|█████▊            | 219/674 [00:33<00:52,  8.59it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CrystalStacker/CrystalStacker-19.txt → Score: 1.0381
  • CC-BY-3.0/CC-BY-3.0-371.txt → Score: 1.1604

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 1 hit → 482 candidates
[DEBUG] Table 2 hit → 478 candidates
[DEBUG] Table 3 hit → 478 candidates
[DEBUG] Table 4 hit → 478 candidates
[DEBUG] Table 5 hit → 478 candidates
[DEBUG] Table 6 hit → 478 candidates
[DEBUG] Table 7 hit → 478 candidates
[DEBUG] Table 8 hit → 479 candidates
[DEBUG] Table 9 hit → 478 candidates
[DEBUG] Table 10 hit → 478 candidates
[DEBUG] Table 11 hit → 478 candidates
[DEBUG] Table 12 hit → 478 candidates
[DEBUG] Table 13 hit → 481 candidates
[DEBUG] Table 14 hit → 479 candidates
[DEBUG] Table 15 hit → 478 candidates
[DEBUG] Table 16 hit → 478 candidates
[DEBUG] Table 17 hit → 478 candidates
[DEBUG] Table 18 hit →


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 219796.79it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.0000

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 18 hit → 1 candidates
 - LSH returned 2 candidates in 0.242 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 13005.59it/s][A
🔍 Evaluating Queries:  33%|█████▉            | 221/674 [00:34<00:59,  7.65it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-20.txt → Score: 0.5932
  • CPL-0.5/CPL-0.5-180.txt → Score: 0.8486

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 5 candidates
 - LSH returned 5 candidates in 0.231 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 28532.68it/s][A
🔍 Evaluating Queries:  33%|█████▉            | 222/674 [00:34<01:06,  6.78it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-32.txt → Score: 0.6741
  • GFDL-1.2-only/GFDL-1.2-only-72.txt → Score: 0.9080
  • GFDL-1.2/GFDL-1.2-381.txt → Score: 0.9080
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-72.txt → Score: 0.9080
  • GFDL-1.2/GFDL-1.2-72_Split_SPDX_Licenses.txt → Score: 0.9080

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  33%|█████▉            | 223/674 [00:34<01:14,  6.04it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.244 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 26 hit → 1 candidates
 - LSH returned 1 candidates in 0.039 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 4993.22it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-412.txt → Score: 1.0077

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  34%|██████▏           | 230/674 [00:35<00:40, 10.86it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.244 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 176476.72it/s][A
🔍 Evaluating Queries:  35%|██████▏           | 233/674 [00:35<00:42, 10.36it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 2 candidates in 0.046 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 20867.18it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-245.txt → Score: 0.6067
  • CC-BY-3.0/CC-BY-3.0-157_Split_SPDX_Licenses.txt → Score: 0.6067

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.040 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 17 hit → 8 candidates
 - LSH returned 10 candidates in 0.086 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 10 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████| 10/10 [00:00<00:00, 93414.34it/s][A
🔍 Evaluating Queries:  35%|██████▎           | 236/674 [00:35<00:37, 11.83it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.8/OLDAP-2.8-13.txt → Score: 1.0924
  • OLDAP-2.8/OLDAP-2.8-7_Split_SPDX_Licenses.txt → Score: 1.0924
  • OSL-2.1/OSL-2.1-216.txt → Score: 1.1012
  • OSL-2.1/OSL-2.1-216_Split_DB_Foss_Licenses.txt → Score: 1.1012
  • OLDAP-2.8/OLDAP-2.8-16_Split_SPDX_Licenses.txt → Score: 1.1016

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.336 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 3 hit → 478 candidates
[DEBUG] Table 15 hit → 478 candidates
[DEBUG] Table 17 hit → 478 candidates
[DEBUG] Table 25 hit → 478 candidates
[DEBUG] Table 26 hit → 1 candidates
 - LSH returned 479 candidates in 0.056 sec

 [STEP 3] Matching candidate f


Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 203141.72it/s][A
🔍 Evaluating Queries:  36%|██████▍           | 242/674 [00:36<00:34, 12.53it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-234_Split_SPDX_Licenses.txt → Score: 0.3843
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-301_Split_SPDX_Licenses.txt → Score: 0.3843
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-339_Split_SPDX_Licenses.txt → Score: 0.3843
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-271_Split_SPDX_Licenses.txt → Score: 0.3843
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-236_Split_SPDX_Licenses.txt → Score: 0.3843

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 4951.95it/s][A
🔍 Evaluating Queries:  36%|██████▌           | 245/674 [00:36<00:29, 14.37it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • FTL/FTL-131.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.092 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 5 candidates
[DEBUG] Table 29 hit → 6 candidates
 - LSH returned 11 candidates in 0.061 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 11 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████| 11/11 [00:00<00:00, 66289.29it/s][A
🔍 Evaluating Queries:  37%|██████▌           | 247/674 [00:36<00:30, 13.89it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-8.txt → Score: 0.6036
  • info-zip/info-zip-28.txt → Score: 0.6067
  • info-zip/info-zip-28_Split_DB_Foss_Licenses.txt → Score: 0.6067
  • ZPL-2.1/ZPL-21-34.txt → Score: 0.6454
  • ZPL-2.1/ZPL-2.1-27.txt → Score: 0.6454

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 4 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 4 candidates
[DEBUG] Table 3 hit → 4 candidates
[DEBUG] Table 4 hit → 4 candidates
[DEBUG] Table 5 hit → 4 candidates
[DEBUG] Table 6 hit → 4 candidates
[DEBUG] Table 7 hit → 4 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 12810.95it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-2.0-4_Split_SPDX_Licenses.txt → Score: 0.0000
  • NCGL-UK-2.0/NCGL-UK-2.0-1_Split_SPDX_Licenses.txt → Score: 0.0000
  • NCGL-UK-2.0/NCGL-UK-2.0-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • NCGL-UK-2.0/NCGL-UK-2.0-2_Split_SPDX_Licenses.txt → Score: 0.0000
  • NLOD-1.0/NLOD-1.0-142.txt → Score: 0.9506

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  37%|██████▋           | 250/674 [00:36<00:31, 13.59it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.158 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.065 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 25 hit → 84 candidates
[DEBUG] Table 29 hit → 4 candidates
 - LSH returned 93 candidates in 0.023 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 93 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████| 93/93 [00:00<00:00, 127808.08it/s][A
🔍 Evaluating Queries:  37%|██████▋           | 252/674 [00:36<00:28, 14.64it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27.txt → Score: 0.3992
  • OLDAP-2.6/OLDAP-2.6-3_Split_SPDX_Licenses.txt → Score: 0.3992
  • OLDAP-2.6/OLDAP-2.6-25.txt → Score: 0.3992
  • OLDAP-2.6/OLDAP-2.6-24.txt → Score: 0.3992
  • OLDAP-2.6/OLDAP-2.6-26.txt → Score: 0.3992

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.059 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit


Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 31242.49it/s][A
🔍 Evaluating Queries:  38%|██████▊           | 254/674 [00:36<00:28, 14.52it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6.txt → Score: 0.0000
  • Imlib2/Imlib2-20.txt → Score: 0.1561
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.2701
  • Intel/Intel-26.txt → Score: 0.5186

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 29 hit → 1 candidates
 - LSH returned 1 candidates in 0.341 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 8473.34it/s][A
🔍 Evaluating Queries:  38%|██████▊           | 256/674 [00:37<00:44,  9.43it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-369.txt → Score: 1.0954

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.074 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.017 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.051 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  38%|██████▉           | 259/674 [00:37<00:53,  7.78it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.436 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.135 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 40 candidates
[DEBUG] Table 1 hit → 83 candidates
[DEBUG] Table 2 hit → 40 candidates
[DEBUG] Table 3 hit → 40 candidates
[DEBUG] Table 4 hit → 40 candidates
[DEBUG] Table 5 hit → 40 candidates
[DEBUG] Table 6 hit → 40 candidates
[DEBUG] Table 7 hit → 42 candidates
[DEBUG] Table 8 hit → 42 candidates
[DEBUG] Table 9 hit → 40 candidates
[DEBUG] Table 10 hit → 42 candidates
[DEB


Scoring: 100%|█████████████████████████████| 102/102 [00:00<00:00, 86428.08it/s][A
🔍 Evaluating Queries:  39%|██████▉           | 261/674 [00:38<00:48,  8.46it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.8/OLDAP-2.8-2.txt → Score: 0.0000
  • OLDAP-2.8/OLDAP-2.8-5_Split_SPDX_Licenses.txt → Score: 0.0000
  • OLDAP-2.8/OLDAP-2.8-14_Split_SPDX_Licenses.txt → Score: 0.0000
  • OLDAP-2.8/OLDAP-2.8-11.txt → Score: 0.0000
  • OLDAP-2.8/OLDAP-2.8-26.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.087 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
 - LSH returned 2 candidates in 0.046 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 2/2 [00:00<00:00, 6620.84it/s][A
🔍 Evaluating Queries:  39%|███████           | 263/674 [00:38<00:43,  9.51it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • YPL-1.0/YPL-1.0-131.txt → Score: 0.9169
  • YPL-1.1/YPL-1.1-131.txt → Score: 0.9169

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  39%|███████           | 265/674 [00:38<00:59,  6.88it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.471 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 2 hit → 2 candidates
 - LSH returned 2 candidates in 0.023 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 29433.71it/s][A
🔍 Evaluating Queries:  40%|███████▏          | 267/674 [00:38<00:49,  8.26it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-2.0-10.txt → Score: 0.9060
  • NCGL-UK-2.0/NCGL-UK-20-9.txt → Score: 0.9060

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.095 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 24 hit → 3 candidates
 - LSH returned 3 candidates in 0.043 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 24244.53it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-369_Split_SPDX_Licenses.txt → Score: 0.8469
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-369.txt → Score: 0.8469
  • GFDL-1.3/GFDL-1.3-386.txt → Score: 0.8469

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 5 candidates
[DEBUG] Table 15 hit → 5 candidates
[DEBUG] Table 16 hit → 5 candidates
[DEBUG] Table 17 hit → 5 candidates
[DEBUG] Table 18 hit → 5 candidates
[DEBUG] Table 19 hit → 5 candidates
[DEBUG] Table 20 hit → 5 candidates
[DEBUG] Table 21 hit → 5 candidates
[DEBUG] Table 22 hit → 5 candidates
[DEBUG] Table 23 hit → 5 candidates
[DEBUG] Table 24 hit → 5 candidates
[DEBUG]


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 56987.83it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  40%|███████▏          | 270/674 [00:38<00:38, 10.37it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.070 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.060 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 1 candidates in 0.038 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12192.74it/s][A
🔍 Evaluating Queries:  40%|███████▎          | 272/674 [00:39<00:33, 11.87it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-32.txt → Score: 0.7959

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 2 candidates
[DEBUG] Table 11 hit → 3 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Tabl


Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 38479.85it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CATOSL/CATOSL-3.txt → Score: 0.0000
  • CATOSL/CATOSL-2.txt → Score: 0.0719
  • CATOSL/CATOSL-1.txt → Score: 0.4917

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  41%|███████▍          | 278/674 [00:39<00:40,  9.82it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.581 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer

🔍 Evaluating Queries:  42%|███████▌          | 283/674 [00:40<00:38, 10.19it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.371 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 331 candidates
[DEBUG] Table 2 hit → 326 candidates
[DEBUG] Table 3 hit → 326 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 326 candidates
[DEBUG] Table 6 hit → 327 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 326


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 203951.06it/s][A
🔍 Evaluating Queries:  42%|███████▌          | 285/674 [00:40<00:41,  9.44it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 2 hit → 2 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 2 candidates
[DEBUG] Table 7 hit → 4 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 2 candi


Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 42908.48it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-4.txt → Score: 0.0000
  • MITNFA/MITNFA-4_Split_SPDX_Licenses.txt → Score: 0.0000
  • MITNFA/MITNFA-2.txt → Score: 0.7830
  • MITNFA/MITNFA-1_Split_SPDX_Licenses.txt → Score: 0.7830

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 1322 candidates
[DEBUG] Table 15 hit → 132




  - Found 2889 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 210235.51it/s][A
🔍 Evaluating Queries:  43%|███████▋          | 287/674 [00:41<00:57,  6.75it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.045 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  43%|███████▋          | 289/674 [00:42<01:24,  4.54it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.842 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 5 candidates
[DEBUG] Table 15 hit → 5 candidates
[DEBUG] Table 16 hit → 5 candidates
[DEBUG] Table 17 hit → 5 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 17050.02it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 9 hit → 26 candidates
 - LSH returned 28 candidates in 0.029 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 28 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████| 28/28 [00:00<00:00, 48791.24it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-11_Split_SPDX_Licenses.txt → Score: 0.4676
  • W3C/W3C-13.txt → Score: 0.4676
  • W3C/W3C-2.txt → Score: 0.4724
  • W3C/W3C-1_Split_SPDX_Licenses.txt → Score: 0.4724
  • W3C/W3C-1.txt → Score: 0.4907

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  43%|███████▊          | 292/674 [00:42<01:17,  4.96it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.406 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  44%|███████▉          | 295/674 [00:43<01:10,  5.35it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.459 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.078 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.041 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 11366.68it/s][A
🔍 Evaluating Queries:  45%|████████          | 301/674 [00:43<00:44,  8.32it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • libtiff/Libtiff-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • libtiff/libtiff-4.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.043 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.038 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding di


Scoring: 100%|████████████████████████████| 725/725 [00:00<00:00, 232891.97it/s][A
🔍 Evaluating Queries:  45%|████████          | 304/674 [00:43<00:34, 10.82it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.0000
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.0000
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG]


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 66576.25it/s][A
🔍 Evaluating Queries:  46%|████████▏         | 307/674 [00:43<00:27, 13.53it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.040 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec


🔍 Evaluating Queries:  46%|████████▎         | 310/674 [00:43<00:22, 15.88it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.041 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 25 hit → 1 candidates
 - LSH returned 3 candidates in 0.038 sec

 [STEP 3] Matching candidate filenames 


Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 41803.69it/s][A
🔍 Evaluating Queries:  46%|████████▎         | 313/674 [00:44<00:19, 18.46it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-200.txt → Score: 0.7295
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-2.txt → Score: 0.8849
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-2_Split_SPDX_Licenses.txt → Score: 0.8849

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1233 candidates
[DEBUG] Table 2 hit → 1236 candidates
[DEBUG] Table 3 hit → 1236 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 1236 candidates
[DEBUG] Table 6 hit → 1236 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1236 candidates
[DEBUG] Table 9 hit


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 196052.38it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 25 hit → 479 candidates
 - LSH returned 479 candidates in 0.032 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 479 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 146519.23it/s][A
🔍 Evaluating Queries:  47%|████████▍         | 316/674 [00:44<00:27, 12.91it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.9301
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.9301
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.9301
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.9301
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.9301

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.398 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  47%|████████▍         | 318/674 [00:45<00:53,  6.70it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.390 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
 - LSH returned 3 candidates in 0.126 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 3/3 [00:00<00:00, 6462.72it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NLOD-1.0/NLOD-1.0-155.txt → Score: 1.0689
  • GFDL-1.2/GFDL-1.2-415.txt → Score: 1.0871
  • GFDL-1.3/GFDL-1.3-445.txt → Score: 1.0871

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 21 hit → 1 candidates
 - LSH returned 1 candidates in 0.023 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 5412.01it/s][A
🔍 Evaluating Queries:  47%|████████▌         | 320/674 [00:45<00:46,  7.57it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-369.txt → Score: 1.0359

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 25 hit → 1 candidates
 - LSH returned 1 candidates in 0.457 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 11244.78it/s][A
🔍 Evaluating Queries:  48%|████████▌         | 322/674 [00:45<00:56,  6.20it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.8/OLDAP-2.8-35.txt → Score: 0.5926

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.055 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 1 hit → 482 candidates
[DEBUG] Table 2 hit → 478 candidates
[DEBUG] Table 3 hit → 478 candidates
[DEBUG] Table 4 hit → 478 candidates
[DEBUG] Table 5 hit → 478 candidates
[DEBUG] Table 6 hit → 478 candidates


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 131701.25it/s][A
🔍 Evaluating Queries:  48%|████████▋         | 326/674 [00:46<00:41,  8.30it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.057 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.02

🔍 Evaluating Queries:  49%|████████▊         | 328/674 [00:46<00:49,  6.92it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.383 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 20 hit → 1 candidates
 - LSH returned 1 candidates in 0.029 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 10010.27it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OpenMarket/OpenMarket-17.txt → Score: 0.8086

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 7 hit → 1 candidates
 - LSH returned 1 candidates in 0.080 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 11650.84it/s][A
🔍 Evaluating Queries:  49%|████████▊         | 331/674 [00:46<00:37,  9.06it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NCGL-UK-2.0/NCGL-UK-20-12.txt → Score: 1.0405

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  50%|████████▉         | 334/674 [00:46<00:29, 11.72it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.051 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.032 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 1 candidates
 - LSH returned 1 candidates in 0.035 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors fo


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14122.24it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OASIS/OASIS-64.txt → Score: 1.0574

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  50%|█████████         | 337/674 [00:47<00:24, 13.98it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 153 candidates
[DEBUG] Table 3 hit → 153 candidates
[DEBUG] Table 7 hit → 153 candidates
[DEBUG] Table 9 hit → 153 candidates
[DEBUG] Table 10 hit → 153 candidates
[DEBUG] Table 13 hit → 153 candidates
[DEBUG] Table 14 hit → 153 candidates
[DEBUG] Table 16 hit → 153 candidates
[DEBUG] Table 19 hit → 153 candidates
[DEBUG] Table 21 hit → 153 candidates
[DEBUG] Table 23 hit → 15


Scoring: 100%|████████████████████████████| 153/153 [00:00<00:00, 204828.76it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-52.txt → Score: 0.0586
  • CPL-0.5/CPL-0.5-151.txt → Score: 0.0586
  • CPL-0.5/CPL-0.5-84.txt → Score: 0.0586
  • CPL-0.5/CPL-0.5-156.txt → Score: 0.0586
  • CPL-0.5/CPL-0.5-11.txt → Score: 0.0586

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  51%|█████████         | 341/674 [00:47<00:18, 18.14it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 83 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Table 14 hit → 1 candidates
[DEBUG] Table 15 hit → 1 candidates
[DEBUG] Table 16 hit → 1 candidates
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Ta


Scoring: 100%|██████████████████████████████| 83/83 [00:00<00:00, 126178.77it/s][A
🔍 Evaluating Queries:  51%|█████████▏        | 344/674 [00:47<00:28, 11.71it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-374.txt → Score: 0.0000
  • OLDAP-2.8/OLDAP-2.8-2.txt → Score: 1.1590
  • OLDAP-2.8/OLDAP-2.8-5_Split_SPDX_Licenses.txt → Score: 1.1590
  • OLDAP-2.8/OLDAP-2.8-14_Split_SPDX_Licenses.txt → Score: 1.1590
  • OLDAP-2.8/OLDAP-2.8-11.txt → Score: 1.1590

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.062 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates 

🔍 Evaluating Queries:  51%|█████████▏        | 346/674 [00:48<00:37,  8.81it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.376 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 2 candidates in 0.364 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors f


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 18850.80it/s][A
🔍 Evaluating Queries:  52%|█████████▍        | 353/674 [00:48<00:28, 11.13it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-1.txt → Score: 0.6987
  • MITNFA/MITNFA-2_Split_SPDX_Licenses.txt → Score: 0.6987

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimens


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 26296.58it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-245.txt → Score: 0.6348
  • CC-BY-3.0/CC-BY-3.0-157_Split_SPDX_Licenses.txt → Score: 0.6348

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 21 hit → 1 candidates
 - LSH returned 1 candidates in 0.018 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...




Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 16131.94it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.6040

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
 - LSH returned 3 candidates in 0.018 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 3 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 30992.39it/s][A
🔍 Evaluating Queries:  53%|█████████▌        | 360/674 [00:48<00:18, 16.77it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.7146
  • PostgreSQL/PostgreSQL-1_Split_SPDX_Licenses.txt → Score: 0.8721
  • PostgreSQL/PostgreSQL-2.txt → Score: 0.8721

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.049 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embeddin

🔍 Evaluating Queries:  54%|█████████▋        | 363/674 [00:48<00:17, 18.27it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 210 candidates
[DEBUG] Table 1 hit → 213 candidates
[DEBUG] Table 2 hit → 204 candidates
[DEBUG] Table 3 hit → 201 candidates
[DEBU


Scoring: 100%|████████████████████████████| 219/219 [00:00<00:00, 180675.17it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • VIM/Vim-30_Split_SPDX_Licenses.txt → Score: 0.0000
  • VIM/VIM-44_Split_DB_Foss_Licenses.txt → Score: 0.0000
  • VIM/Vim-25_Split_SPDX_Licenses.txt → Score: 0.0000
  • VIM/VIM-12_Split_DB_Foss_Licenses.txt → Score: 0.0000
  • VIM/Vim-23_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 6 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 6 c


Scoring: 100%|█████████████████████████████████| 6/6 [00:00<00:00, 65879.12it/s][A
🔍 Evaluating Queries:  54%|█████████▊        | 366/674 [00:49<00:15, 19.49it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-20-32.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-38.txt → Score: 0.0000
  • ZPL-2.1/ZPL-2.1-25.txt → Score: 0.0000
  • ZPL-2.1/ZPL-21-32.txt → Score: 0.0000
  • Intel/Intel-22.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  55%|█████████▊        | 369/674 [00:49<00:15, 20.28it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 582 candidates
[DEBUG] Table 10 hit → 582 candidates
[DEBUG] Table 17 hit → 582 candidates
[DEBUG] Table 20 hit → 582 candidates
[DEBUG] Table 23 hit → 3 candidates
 - LSH returned 585 candidates in 0.037 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 585 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 585/585 [00:00<00:00, 192807.47it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-123_Split_SPDX_Licenses.txt → Score: 0.1466
  • OSL-2.1/OSL-2.1-85_Split_DB_Foss_Licenses.txt → Score: 0.1466
  • OSL-2.1/OSL-2.1-79_Split_DB_Foss_Licenses.txt → Score: 0.1466
  • OSL-2.1/OSL-2.1-152_Split_SPDX_Licenses.txt → Score: 0.1466
  • OSL-2.1/OSL-2.1-178.txt → Score: 0.1466

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  55%|█████████▉        | 372/674 [00:49<00:14, 21.10it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 23 hit → 3 candidates
[DEBUG] Table 27 hit → 2 candidates
 - LSH returned 5 candidates in 0.040 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 13626.72it/s][A
🔍 Evaluating Queries:  56%|██████████        | 375/674 [00:49<00:13, 22.76it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-3.txt → Score: 0.9515
  • Imlib2/Imlib2-17.txt → Score: 0.9515
  • MITNFA/MITNFA-3_Split_SPDX_Licenses.txt → Score: 0.9515
  • ZPL-2.1/ZPL-21-35.txt → Score: 0.9604
  • ZPL-2.1/ZPL-2.1-28.txt → Score: 0.9604

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.040 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.023 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 15 hit → 4 candidates
[DEBUG]


Scoring: 100%|██████████████████████████████| 80/80 [00:00<00:00, 176509.37it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27_Split_SPDX_Licenses.txt → Score: 0.4155
  • OLDAP-2.6/OLDAP-2.6-6.txt → Score: 0.4155
  • OLDAP-2.6/OLDAP-2.6-1_Split_SPDX_Licenses.txt → Score: 0.4310
  • OLDAP-2.6/OLDAP-2.6-22.txt → Score: 0.4310
  • OLDAP-2.6/OLDAP-2.6-13.txt → Score: 0.4439

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 23 hit → 2 candidates
 - LSH returned 2 candidates in 0.377 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 30727.50it/s][A
🔍 Evaluating Queries:  56%|██████████        | 378/674 [00:49<00:22, 13.30it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • gnu-javamail-exception/gnu-javamail-exception-2_Split_SPDX_Licenses.txt → Score: 1.0308
  • gnu-javamail-exception/gnu-javamail-exception-1.txt → Score: 1.0308

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.039 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
 - LSH returned 3 candidates in 0.371 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 


Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 29959.31it/s][A
🔍 Evaluating Queries:  56%|██████████▏       | 380/674 [00:50<00:30,  9.76it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.7531
  • PostgreSQL/PostgreSQL-1_Split_SPDX_Licenses.txt → Score: 0.9259
  • PostgreSQL/PostgreSQL-2.txt → Score: 0.9259

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 13 hit → 42 candidates
[DEBUG] Table 15 hit → 44 candidates
[DEBUG] Table 20 hit → 2 candidates
[DEBUG] Table 26 hit → 2 candidates
 - LSH returned 88 candidates in 0.046 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 88 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████| 88/88 [00:00<00:00, 156199.22it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27.txt → Score: 0.3601
  • OLDAP-2.6/OLDAP-2.6-3_Split_SPDX_Licenses.txt → Score: 0.3601
  • OLDAP-2.6/OLDAP-2.6-25.txt → Score: 0.3601
  • OLDAP-2.6/OLDAP-2.6-24.txt → Score: 0.3601
  • OLDAP-2.6/OLDAP-2.6-26.txt → Score: 0.3601

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 21 hit → 5 candidates
 - LSH returned 5 candidates in 0.374 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 43509.38it/s][A
🔍 Evaluating Queries:  57%|██████████▏       | 383/674 [00:50<00:34,  8.42it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-20-32.txt → Score: 0.5198
  • ZPL-2.0/ZPL-2.0-38.txt → Score: 0.5198
  • ZPL-2.1/ZPL-2.1-25.txt → Score: 0.5198
  • ZPL-2.1/ZPL-21-32.txt → Score: 0.5198
  • Intel/Intel-22.txt → Score: 0.5198

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 9 hit → 4 candidates
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 12 hit → 64 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 16 hit → 66 candidates
[DEBUG] Table 17 hit → 62 candidates
[DEBUG] Table 18 hit → 104 candidates
[DEBUG] Table 19 hit → 6 candidates
[DEBUG] Table 22 hit → 64 candidates
 - LSH returned 118 candidates in 0.030 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 118 vectors fo


Scoring: 100%|████████████████████████████| 118/118 [00:00<00:00, 125393.43it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.1/OLDAP-2.1-12_Split_SPDX_Licenses.txt → Score: 0.1477
  • OLDAP-2.1/OLDAP-2.1-30.txt → Score: 0.1477
  • OLDAP-2.1/OLDAP-2.1-24_Split_SPDX_Licenses.txt → Score: 0.1495
  • OLDAP-2.1/OLDAP-2.1-36_Split_SPDX_Licenses.txt → Score: 0.1495
  • OLDAP-2.1/OLDAP-2.1-24.txt → Score: 0.1495

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  57%|██████████▎       | 386/674 [00:51<00:37,  7.70it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.372 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.372 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 83 candidates
 - LSH returned 83 candidates in 0.359 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 83 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████| 83/83 [00:00<00:00, 163286.69it/s][A
🔍 Evaluating Queries:  58%|██████████▎       | 388/674 [00:51<00:52,  5.40it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27.txt → Score: 0.9259
  • OLDAP-2.6/OLDAP-2.6-3_Split_SPDX_Licenses.txt → Score: 0.9259
  • OLDAP-2.6/OLDAP-2.6-25.txt → Score: 0.9259
  • OLDAP-2.6/OLDAP-2.6-24.txt → Score: 0.9259
  • OLDAP-2.6/OLDAP-2.6-26.txt → Score: 0.9259

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 182536.86it/s][A
🔍 Evaluating Queries:  58%|██████████▍       | 393/674 [00:52<00:37,  7.40it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.063 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.067 sec
 No candidates found. Possibly due to hash size or vec


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13486.51it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-195.txt → Score: 1.1267

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  59%|██████████▋       | 398/674 [00:53<00:33,  8.22it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.381 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.103 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.051 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  60%|██████████▋       | 402/674 [00:53<00:35,  7.65it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.452 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.053 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.075 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer

🔍 Evaluating Queries:  60%|██████████▊       | 406/674 [00:54<00:32,  8.16it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.364 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Table 20 hit → 1 candidates
[DEBUG] Table 22 hit → 1 candidates
[DEBUG] Table 23 hit → 1 candidates
[DEBUG] Table 25 hit → 1 candidates
[DEBUG] Table 27 hit → 1 candidates
[DEBUG] Table 29 hit → 1 candidates
 - LSH returned 5 candidates in 0.377 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 45392.90it/s][A
🔍 Evaluating Queries:  61%|██████████▉       | 408/674 [00:54<00:38,  7.00it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.1403
  • Imlib2/Imlib2-20.txt → Score: 0.2742
  • ZPL-2.1/ZPL-2.1-32.txt → Score: 0.5665
  • PostgreSQL/PostgreSQL-2.txt → Score: 0.9387
  • PostgreSQL/PostgreSQL-1_Split_SPDX_Licenses.txt → Score: 0.9387

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No

🔍 Evaluating Queries:  61%|███████████       | 412/674 [00:54<00:33,  7.75it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.364 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  61%|███████████       | 413/674 [00:55<00:41,  6.27it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.370 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.057 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 26 hit → 1 candidates
 - LSH returned 1 candidates in 0.051 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 15196.75it/s][A
🔍 Evaluating Queries:  62%|███████████       | 415/674 [00:55<00:34,  7.58it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-412.txt → Score: 1.0317

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.032 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] T


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 11125.47it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-7.txt → Score: 0.0000

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 4 candidates
[DEBUG] Table 3 hit → 3 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 10 hit → 326 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 326 candidates
[DEBUG] Table 15 hit → 327 candidates
[DEBUG] Table 16 hit → 1648 candidates
[DEBUG] Table 17 hit → 2889 candidates
[DEBUG] Table 19 hit → 1649 candidates
[DEBUG] Table 20 hit → 1653 candidates
[DEBUG] Table 21 hit → 4 candidates
[DEBUG] Table 22 hit → 4 candidates
[DEBUG] Table 23 hit → 1648 candidates
[DEBUG] Table 25 hit → 1246 candidates
[DEBUG] Table 28 hit → 1653 candidates
[DEBUG] Table 29 hit → 1563 candidates
 - LSH returned 2897 candidates in 0.034 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2897 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████| 2897/2897 [00:00<00:00, 201063.97it/s][A
🔍 Evaluating Queries:  62%|███████████▏      | 418/674 [00:55<00:31,  8.23it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.1170
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.1170
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.1170
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.1170
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.1170

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.080 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
 - LSH returned 3 candidates in 0.025 sec

 [STEP 3] Matching


Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 34007.87it/s][A
🔍 Evaluating Queries:  62%|███████████▏      | 420/674 [00:55<00:26,  9.60it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • SCEA/SCEA-41_Split_SPDX_Licenses.txt → Score: 1.0656
  • SCEA/SCEA-114.txt → Score: 1.0656
  • SCEA/SCEA-119.txt → Score: 1.1228

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 331 candidates
[DEBUG] Table 2 hit → 326 candidates
[DEBUG] Table 3 hit → 326 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 326 candidates
[DEBUG] Table 6 hit → 327 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 326 candidates
[DEBUG] Table 15 hit → 327 candidates
[DEBUG] Table 16 hit → 1648 candidates
[DEBUG] Table 17 hit → 


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 198775.33it/s][A
🔍 Evaluating Queries:  63%|███████████▎      | 422/674 [00:56<00:27,  9.04it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vec

🔍 Evaluating Queries:  63%|███████████▍      | 426/674 [00:56<00:28,  8.67it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.401 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] Table 14 hit → 1322 candidates
[DEBUG] Table 15 hit → 1326 candidates
[DEBUG] Table 16 hit → 1648 candida


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 221872.49it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 24 hit → 718 candidates
 - LSH returned 718 candidates in 0.047 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 718 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 718/718 [00:00<00:00, 221532.31it/s][A
🔍 Evaluating Queries:  64%|███████████▍      | 428/674 [00:56<00:31,  7.90it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.5917
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.5917
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.5917
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.5917
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.5917

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 26 hit → 718 candidates
 - LSH returned 720 candidates in 0.057 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 720 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 720/720 [00:00<00:00, 202827.52it/s][A
🔍 Evaluating Queries:  64%|███████████▍      | 429/674 [00:57<00:30,  8.11it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.3153
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.3153
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.3153
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.3153
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.3153

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 331 candidates
[DEBUG] Table 2 hit → 326 candidates
[DEBUG] Table 3 hit → 326 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 326 candidates
[DEBUG] Table 6 hit → 327 can


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 226078.29it/s][A
🔍 Evaluating Queries:  64%|███████████▌      | 431/674 [00:57<00:30,  7.96it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.049 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
 - LSH returned 1 candidates in 0.051 sec

 [STEP 3] Matching


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 16578.28it/s][A
🔍 Evaluating Queries:  64%|███████████▌      | 433/674 [00:57<00:25,  9.64it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 1.0845

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1233 candidates
[DEBUG] Table 2 hit → 1236 candidates
[DEBUG] Table 3 hit → 1236 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 1236 candidates
[DEBUG] Table 6 hit → 1236 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1236 candidates
[DEBUG] Table 9 hit → 1236 candidates
[DEBUG] Table 10 hit → 1236 candidates
[DEBUG] Table 11 hit → 1233 candidates
[DEBUG] Table 12 hit → 1233 candidates
[DEBUG] Table 13 hit → 1242 candidates
[DEBUG] Table 14 hit → 1236 candidates
[DEBUG] Table 15 hit → 1236 candidates
[DEBUG] Table 16 hit → 1236 candidates
[DEBUG] Table 17 hit → 2889 candidates
[DEBUG] Table 18 hit → 2555 candidates
[DEBUG] Ta


Scoring: 100%|██████████████████████████| 2900/2900 [00:00<00:00, 224592.52it/s][A
🔍 Evaluating Queries:  65%|███████████▌      | 435/674 [00:57<00:26,  9.19it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-265.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-363_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-188_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.3-invariants-or-later/GFDL-1.3-invariants-or-later-276.txt → Score: 0.0000
  • GFDL-1.3/GFDL-1.3-276_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 238 candidates
[DEBUG] Table 1 hit → 472 candidates
[DEBUG] Table 2 hit → 472 candidates
[DEBUG] Table 3 hit → 472 candidates
[DEBUG] Table 4 hit → 472 candidates
[DEBUG] Table 5 hit → 472 candidates
[


Scoring: 100%|████████████████████████████| 472/472 [00:00<00:00, 193520.18it/s][A
🔍 Evaluating Queries:  65%|███████████▋      | 437/674 [00:57<00:21, 10.86it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • YPL-1.0/YPL-1.0-23_Split_SPDX_Licenses.txt → Score: 0.0000
  • YPL-1.0/YPL-1.0-44_Split_SPDX_Licenses.txt → Score: 0.0000
  • YPL-1.0/YPL-1.0-19_Split_SPDX_Licenses.txt → Score: 0.0000
  • YPL-1.0/YPL-1.0-85.txt → Score: 0.0000
  • YPL-1.0/YPL-1.0-30.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.042 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 19 hit → 718 candidates
 - LSH returned 718 candidates in 0.054 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 718 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 718/718 [00:00<00:00, 218510.40it/s][A
🔍 Evaluating Queries:  65%|███████████▋      | 439/674 [00:57<00:19, 11.87it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.5631
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.5631
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.5631
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.5631
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.5631

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  66%|███████████▊      | 442/674 [00:57<00:15, 15.36it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.878 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 201814.47it/s][A
🔍 Evaluating Queries:  66%|███████████▉      | 447/674 [01:00<00:51,  4.42it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.039 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vec


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 15650.39it/s][A
🔍 Evaluating Queries:  67%|████████████      | 454/674 [01:00<00:25,  8.67it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Ferris-1.2/Ferris-1.2-383.txt → Score: 1.0046

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.063 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH 

🔍 Evaluating Queries:  68%|████████████▏     | 457/674 [01:00<00:19, 10.96it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 21 candidates
[DEBUG] Table 1 hit → 51 candidates
[DEBUG] Table 2 hit → 21 candidates
[DEBUG] Table 3 hit → 50 candidates
[DEBUG] Table 4 hit → 47 candidates
[DEBUG] Table 5 hit → 20 candidates
[DEBUG] Table 6 hit → 49 candidates
[DEBUG] Table 7 hit → 47 candidates
[DEBUG] Table 8 hit → 87 candidates
[DEBUG] Table 9 hit → 49 candidates
[DEBUG] Table 10 hit → 22 candidates
[DEBUG] Table 11 hit → 53 candidates
[DEBUG] Table 12 hit → 50 candidates
[DEBUG] Table 13 hit → 23 candidates
[DEBUG] Table 14 hit → 49 candidates
[DEBUG] Table 15 hit → 21 candidates
[DEBUG] Table 16 hit → 23 candidates
[DEBUG] Table 17 hit → 21 cand


Scoring: 100%|██████████████████████████████| 92/92 [00:00<00:00, 193810.13it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-13_Split_SPDX_Licenses.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-16_Split_SPDX_Licenses.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-1_Split_SPDX_Licenses.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-27_Split_SPDX_Licenses.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-9_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 2 candidates in 0.019 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25653.24it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • PostgreSQL/PostgreSQL-1_Split_SPDX_Licenses.txt → Score: 1.0706
  • PostgreSQL/PostgreSQL-2.txt → Score: 1.0706

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  68%|████████████▎     | 460/674 [01:00<00:22,  9.50it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.365 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 2 hit → 2 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 2 candidates
[DEBUG] Table 7 hit → 2 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 2 candidates
[DEBUG] Table 10 hit → 2 candidates
[DEBUG] Table 11 hit → 2 candidates
[DEBUG] Table 12 hit → 2 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 16 hit → 2 candidates
[DEBUG] Table 17 hit → 2 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 22429.43it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Zed/Zed-1.txt → Score: 0.0000
  • Zed/Zed-1_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  69%|████████████▎     | 462/674 [01:01<00:27,  7.81it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.365 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 3 candidates
[DEBUG] Table 1 hit → 4 candidates
[DEBUG] Table 2 hit → 3 candidates
[DEBUG] Table 3 hit → 3 candidates
[DEBUG] Table 4 hit → 3 candidates
[DEBUG] Table 5 hit → 3 candidates
[DEBUG] Table 6 hit → 3 candidates
[DEBUG] Table 7 hit → 3 candidates
[DEBUG] Table 8 hit → 3 candidates
[DEBUG] Table 9 hit → 3 candidates
[DEBUG] Table 10 hit → 3 candidates
[DEBUG] Table 11 hit → 3 candidates
[DEBUG] Table 12 hit → 3 candidates
[DEBUG] Table 13 hit → 3 candidates
[DEBUG] Table 14 hit → 3 candidates
[DEBUG] Table 15 hit → 3 candidates
[DEBUG] Table 16 hit → 3 candidates
[DEBUG] Table 17 hit → 3 candidates
[DEBUG] Tab


Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 45964.98it/s][A
🔍 Evaluating Queries:  69%|████████████▍     | 464/674 [01:01<00:31,  6.67it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Imlib2/Imlib2-17.txt → Score: 0.0000
  • MITNFA/MITNFA-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • MITNFA/MITNFA-3.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-498.txt → Score: 0.7797

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.057 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 20 hit → 479 candidates
 - LSH returned 479 candidates in 0.031 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 479 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 479/479 [00:00<00:00, 219810.90it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.3580
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.3580

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 3 candidates
[DEBUG] Table 7 hit → 1 candidates
 - LSH returned 4 candidates in 0.020 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 37365.74it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Imlib2/Imlib2-18.txt → Score: 0.6553
  • MITNFA/MITNFA-6_Split_SPDX_Licenses.txt → Score: 0.7721
  • PostgreSQL/PostgreSQL-1_Split_SPDX_Licenses.txt → Score: 0.9694
  • PostgreSQL/PostgreSQL-2.txt → Score: 0.9694

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
 - LSH returned 2 candidates in 0.020 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25653.24it/s][A
🔍 Evaluating Queries:  69%|████████████▍     | 467/674 [01:01<00:23,  8.92it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • SCEA/SCEA-141.txt → Score: 0.7231
  • OLDAP-2.8/OLDAP-2.8-34.txt → Score: 0.8405

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.019 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 14979.66it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Intel/Intel-26.txt → Score: 0.8983

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
 - LSH returned 2 candidates in 0.020 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 26630.50it/s][A
🔍 Evaluating Queries:  70%|████████████▌     | 470/674 [01:02<00:17, 11.41it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-24_Split_SPDX_Licenses.txt → Score: 1.0452
  • SCEA/SCEA-130.txt → Score: 1.1600

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.070 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 24 hit → 6 candidates
 - LSH returned 484 candidates in 0.057 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 484 vectors for compari


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 213868.85it/s][A
🔍 Evaluating Queries:  70%|████████████▌     | 472/674 [01:02<00:16, 11.97it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.4778

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.023 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 4 candidates



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 31775.03it/s][A
🔍 Evaluating Queries:  70%|████████████▋     | 475/674 [01:02<00:13, 14.99it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-207.txt → Score: 0.9782
  • GFDL-1.2/GFDL-1.2-207_Split_SPDX_Licenses.txt → Score: 0.9782
  • GFDL-1.2-only/GFDL-1.2-only-207.txt → Score: 0.9782
  • GFDL-1.2/GFDL-1.2-205.txt → Score: 0.9782

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.058 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  71%|████████████▊     | 478/674 [01:02<00:20,  9.68it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.441 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.038 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 29 hit → 2 candidates
 - LSH returned 2 candidates in 0.055 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 11983.73it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-6.txt → Score: 0.4662
  • Intel/Intel-21.txt → Score: 0.4662

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 13 hit → 112 candidates
[DEBUG] Table 24 hit → 1 candidates
 - LSH returned 113 candidates in 0.054 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 113 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 113/113 [00:00<00:00, 192196.41it/s][A
🔍 Evaluating Queries:  71%|████████████▊     | 480/674 [01:02<00:18, 10.76it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-61.txt → Score: 0.6074
  • LiLiQ-P-1.1/LiLiQ-P-1.1-30.txt → Score: 0.6299
  • LiLiQ-P-1.1/LiLiQ-P-1.1-8_Split_SPDX_Licenses.txt → Score: 0.6299
  • LiLiQ-P-1.1/LiLiQ-P-1.1-41.txt → Score: 0.6299
  • LiLiQ-P-1.1/LiLiQ-P-1.1-49.txt → Score: 0.6299

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  72%|████████████▊     | 482/674 [01:03<00:23,  8.29it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.374 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 3/3 [00:00<00:00, 16491.37it/s][A
🔍 Evaluating Queries:  72%|████████████▉     | 486/674 [01:03<00:22,  8.18it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.8/OLDAP-2.8-36.txt → Score: 0.0000
  • OLDAP-2.6/OLDAP-2.6-38.txt → Score: 0.1169
  • OLDAP-2.1/OLDAP-2.1-47.txt → Score: 0.5709

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 1 candidates in 0.035 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12018.06it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-386.txt → Score: 1.0687

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 718 candidates
[DEBUG] Table 3 hit → 4 candidates
[DEBUG] Table 17 hit → 718 candidates
[DEBUG] Table 18 hit → 2 candidates
 - LSH returned 724 candidates in 0.051 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 724 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 724/724 [00:00<00:00, 157970.98it/s][A
🔍 Evaluating Queries:  73%|█████████████     | 489/674 [01:04<00:19,  9.63it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.2056
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.2056
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.2056
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.2056
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.2056

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 5 hit → 3 candidates
[DEBUG] Table 29 hit → 2 candidates
 - LSH returned 5 candidates in 0.030 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 24995.85it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • VIM/VIM-54.txt → Score: 0.9211
  • VIM/Vim-19_Split_SPDX_Licenses.txt → Score: 0.9211
  • VIM/VIM-54_Split_DB_Foss_Licenses.txt → Score: 0.9211
  • Intel/Intel-8.txt → Score: 0.9381
  • Intel/Intel-3_Split_SPDX_Licenses.txt → Score: 0.9388

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  73%|█████████████▏    | 492/674 [01:06<00:54,  3.35it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 2.158 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 582 candidates
[DEBUG] Table 1 hit → 582 candidates
[DEBUG] Table 19 hit → 582 candidates
 - LSH returned 582 candidates in 0.084 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 582 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████| 582/582 [00:00<00:00, 69574.33it/s][A
🔍 Evaluating Queries:  73%|█████████████▏    | 493/674 [01:06<00:49,  3.66it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-123_Split_SPDX_Licenses.txt → Score: 0.1937
  • OSL-2.1/OSL-2.1-85_Split_DB_Foss_Licenses.txt → Score: 0.1937
  • OSL-2.1/OSL-2.1-79_Split_DB_Foss_Licenses.txt → Score: 0.1937
  • OSL-2.1/OSL-2.1-152_Split_SPDX_Licenses.txt → Score: 0.1937
  • OSL-2.1/OSL-2.1-178.txt → Score: 0.1937

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  73%|█████████████▏    | 494/674 [01:06<00:45,  3.97it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.132 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 9 hit → 2 candidates
 - LSH returned 2 candidates in 0.042 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 12748.64it/s][A
🔍 Evaluating Queries:  74%|█████████████▎    | 497/674 [01:06<00:29,  5.90it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-4.txt → Score: 0.9760
  • MITNFA/MITNFA-4_Split_SPDX_Licenses.txt → Score: 0.9760

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 1326 candidates
[DEBUG] Table 11 hit → 1653 candid


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 175400.15it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Table 20 hit → 6 candidates
[DEBUG] Table 21 hit → 1 candidates
[DEBUG] Table 24 hit → 2 candidates
 - LSH returned 10 candidates in 0.060 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 10 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████| 10/10 [00:00<00:00, 40485.56it/s][A
🔍 Evaluating Queries:  74%|█████████████▍    | 501/674 [01:07<00:25,  6.69it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27_Split_SPDX_Licenses.txt → Score: 0.8734
  • OLDAP-2.6/OLDAP-2.6-6.txt → Score: 0.8734
  • AAL/AAL-2.txt → Score: 0.8943
  • AAL/AAL-6_Split_SPDX_Licenses.txt → Score: 0.8943
  • CPL-0.5/CPL-0.5-172.txt → Score: 0.9228

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.059 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 1 hit → 4


Scoring: 100%|████████████████████████████| 959/959 [00:00<00:00, 234812.47it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-234_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-301_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-339_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-271_Split_SPDX_Licenses.txt → Score: 0.0000
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-236_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  75%|█████████████▌    | 506/674 [01:07<00:21,  7.94it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.404 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.074 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|████████████████████████████| 722/722 [00:00<00:00, 166783.47it/s][A
🔍 Evaluating Queries:  76%|█████████████▋    | 511/674 [01:08<00:14, 10.90it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.2238
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.2238
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.2238
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.2238
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.2238

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.070 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.062 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBU


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13315.25it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.9711

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.023 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13706.88it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.9711

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 18 candidates
[DEBUG] Table 1 hit → 18 candidates
[DEBUG] Table 2 hit → 18 candidates
[DEBUG] Table 3 hit → 20 candidates
[DEBUG] Table 4 hit → 19 candidates
[DEBUG] Table 5 hit → 36 candidates
[DEBUG] Table 6 hit → 18 candidates
[DEBUG] Table 7 hit → 37 candidates
[DEBUG] Table 8 hit → 18 candidates
[DEBUG] Table 9 hit → 20 candidates
[DEBUG] Table 10 hit → 36 candidates
[DEBUG] Table 11 hit → 36 candidates
[DEBUG] Table 12 hit → 36 candidates
[DEBUG] Table 13 hit → 19 candidates
[DEBUG] Table 14 hit → 37 candidates
[DEBUG] Table 15 hit → 18 candidates
[DEBUG] Table 16 hit → 19 candidates
[DEBUG] Table 17 hit → 19 candidates
[DEBUG] Table 18 hit → 18 candidates
[DEBUG] Table 19 hit → 18 candidates
[DEBUG


Scoring: 100%|███████████████████████████████| 38/38 [00:00<00:00, 90302.30it/s][A
🔍 Evaluating Queries:  76%|█████████████▊    | 515/674 [01:08<00:10, 15.62it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Intel/Intel-19.txt → Score: 0.0000
  • Intel/Intel-4.txt → Score: 0.0000
  • Intel/Intel-17.txt → Score: 0.0000
  • Intel/Intel-6.txt → Score: 0.0000
  • Intel/Intel-7.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.020 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.068 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  77%|█████████████▊    | 518/674 [01:08<00:16,  9.63it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.464 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.049 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|████████████████████████████| 153/153 [00:00<00:00, 159198.34it/s][A
🔍 Evaluating Queries:  77%|█████████████▉    | 521/674 [01:09<00:18,  8.31it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CPL-0.5/CPL-0.5-52.txt → Score: 0.7122
  • CPL-0.5/CPL-0.5-151.txt → Score: 0.7122
  • CPL-0.5/CPL-0.5-84.txt → Score: 0.7122
  • CPL-0.5/CPL-0.5-156.txt → Score: 0.7122
  • CPL-0.5/CPL-0.5-11.txt → Score: 0.7122

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 20 hit → 6 candidates
 - LSH returned 6 candidates in 0.028 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 6 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 6/6 [00:00<00:00, 48865.68it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27_Split_SPDX_Licenses.txt → Score: 0.8306
  • OLDAP-2.6/OLDAP-2.6-6.txt → Score: 0.8306
  • OLDAP-2.6/OLDAP-2.6-23.txt → Score: 0.8638
  • OLDAP-2.6/OLDAP-2.6-6_Split_SPDX_Licenses.txt → Score: 0.8638
  • AAL/AAL-2.txt → Score: 0.9023

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  78%|█████████████▉    | 523/674 [01:09<00:22,  6.67it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.473 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  78%|██████████████    | 525/674 [01:10<00:25,  5.76it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.453 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.039 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 13842.59it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-8.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  78%|██████████████    | 528/674 [01:10<00:25,  5.65it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.454 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 1322 candidates
[DEBUG] Table 2 hit → 1323 candidates
[DEBUG] Table 3 hit → 1326 candidates
[DEBUG] Table 4 hit → 1322 candidates
[DEBUG] Table 5 hit → 1322 candidates
[DEBUG] Table 6 hit → 1326 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit 


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 189097.13it/s][A
🔍 Evaluating Queries:  79%|██████████████▏   | 530/674 [01:11<00:24,  5.80it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 212 candidates
[DEBUG] Table 1 hit → 212 candidates
[DEBUG] Table 2 hit → 212 candidates
[DEBUG] Table 3 hit → 212 candidates
[DEBUG] Table 4 hit → 212 candidates
[DEBUG] Table 5 hit → 212 candidates
[DEBUG] Table 6 hit → 212 candidates
[DEBUG] Table 7 hit → 212 candidates
[DEBUG] Table 8 hit → 212 candidates
[DEBUG] Table 9 hit → 212 candidates
[DEBUG] Table 10 hit → 212 candidates
[DEBUG] Table 11 hit → 216 candidates
[DEBUG] Table 12 hit → 212 


Scoring: 100%|████████████████████████████| 216/216 [00:00<00:00, 176774.57it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • NLOD-1.0/NLOD-1.0-16.txt → Score: 0.0000
  • NLOD-1.0/NLOD-1.0-64_Split_SPDX_Licenses.txt → Score: 0.0000
  • NLOD-1.0/NLOD-1.0-97_Split_SPDX_Licenses.txt → Score: 0.0000
  • NLOD-1.0/NLOD-1.0-85.txt → Score: 0.0000
  • NLOD-1.0/NLOD-1.0-94.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.041 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  80%|██████████████▎   | 536/674 [01:11<00:17,  7.74it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.457 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.036 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.016 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Quer


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 25970.92it/s][A
🔍 Evaluating Queries:  80%|██████████████▎   | 538/674 [01:11<00:15,  8.88it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-216.txt → Score: 1.0927
  • OSL-2.1/OSL-2.1-216_Split_DB_Foss_Licenses.txt → Score: 1.0927

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 62 candidates
[DEBUG] Table 1 hit → 62 candidates
[DEBUG] Table 2 hit → 62 candidates
[DEBUG] Table 3 hit → 62 candidates
[DEBUG] Table 4 hit → 60 candidates
[DEBUG] Table 5 hit → 66 candidates
[DEBUG] Table 6 hit → 62 candidates
[DEBUG] Table 7 hit → 62 candidates
[DEBUG] Table 8 hit → 62 candidates
[DEBUG] Table 9 hit → 62 candidates
[DEBUG] Table 10 hit → 62 candidates
[DEBUG] Table 11 hit → 60 candidates
[DEBUG] Table 12 hit → 64 candidates
[DEBUG] Table 13 hit → 60 candidates
[DEBUG] Table 14 hit → 64 candidates
[DEBUG] Table 15 hit → 62 candidates
[DEBUG] Table 16 hit → 66 candidates
[DEBUG] Table 17 hit → 62 candidates
[DEBUG] Table 18 hit → 104 candida


Scoring: 100%|████████████████████████████| 110/110 [00:00<00:00, 173513.89it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.1/OLDAP-2.1-24_Split_SPDX_Licenses.txt → Score: 0.0000
  • OLDAP-2.1/OLDAP-2.1-36_Split_SPDX_Licenses.txt → Score: 0.0000
  • OLDAP-2.1/OLDAP-2.1-24.txt → Score: 0.0000
  • OLDAP-2.1/OLDAP-2.1-6.txt → Score: 0.0000
  • OLDAP-2.1/OLDAP-2.1-18.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  80%|██████████████▍   | 540/674 [01:12<00:19,  6.80it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.458 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.018 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  81%|██████████████▌   | 543/674 [01:12<00:20,  6.45it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.453 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.031 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.047 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 1 hit → 482 candidates
[DEBUG] Table 2 hit → 478 candidates
[DEBUG] Table 3 hit → 478 candidates
[DEBU


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 187671.55it/s][A
🔍 Evaluating Queries:  81%|██████████████▌   | 546/674 [01:13<00:15,  8.17it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.0000
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 331 candidates
[DEBUG] Table 2 hit → 326 candidates
[DEBUG] Table 3 hit → 326 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 326 candidates
[DEBUG] Table 6 hit → 327 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[D


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 202326.67it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  81%|██████████████▋   | 548/674 [01:13<00:16,  7.69it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.059 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.053 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  82%|██████████████▋   | 551/674 [01:13<00:12,  9.93it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.050 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 24 hit → 6 candidates
 - LSH returned 484 candidates in 0.034 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 484 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 161178.49it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.4778

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 10 candidates
[DEBUG] Table 1 hit → 13 candidates
[DEBUG] Table 2 hit → 10 candidates
[DEBUG] Table 3 hit → 11 candidates
[DEBUG] Table 4 hit → 10 candidates
[DEBUG] Table 5 hit → 10 candidates
[DEBUG] Table 6 hit → 10 candidates
[DEBUG] Table 7 hit → 10 candidates
[DEBUG] Table 8 hit → 10 candidates
[DEBUG] Table 9 hit → 10 candidates
[DEBUG] Table 10 hit → 11 candidates
[DEBUG] Table 11 hit → 13 candidates
[DEBUG] Table 12 hit → 10 candidates
[DEBUG] Table 13 hit → 11 candidates
[DEBUG] Table 14 hit → 1


Scoring: 100%|███████████████████████████████| 16/16 [00:00<00:00, 98112.37it/s][A
🔍 Evaluating Queries:  82%|██████████████▊   | 553/674 [01:13<00:10, 11.14it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OpenMarket/OpenMarket-14.txt → Score: 0.0000
  • OpenMarket/OpenMarket-4.txt → Score: 0.0000
  • OpenMarket/OpenMarket-1.txt → Score: 0.0000
  • OpenMarket/OpenMarket-15.txt → Score: 0.0000
  • OpenMarket/OpenMarket-10.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.045 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  82%|██████████████▊   | 556/674 [01:13<00:08, 13.61it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.052 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.052 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  83%|██████████████▉   | 558/674 [01:13<00:08, 14.23it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.069 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.039 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.035 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 4 hit → 2 candidates
 - LSH returned 2 candidates in 0.086 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors fo


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 11831.61it/s][A
🔍 Evaluating Queries:  83%|██████████████▉   | 561/674 [01:14<00:07, 15.42it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-327.txt → Score: 0.9640
  • CC-BY-SA-3.0-AT/CC-BY-SA-3.0-AT-260_Split_SPDX_Licenses.txt → Score: 0.9640

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 12 hit → 718 candidates
[DEBUG] Table 17 hit → 4 candidates
 - LSH returned 722 candidates in 0.032 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 722 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 722/722 [00:00<00:00, 123041.10it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-216.txt → Score: 0.4306
  • CC-BY-3.0/CC-BY-3.0-337_Split_SPDX_Licenses.txt → Score: 0.4306
  • CC-BY-3.0/CC-BY-3.0-215.txt → Score: 0.4497
  • CC-BY-3.0/CC-BY-3.0-67_Split_SPDX_Licenses.txt → Score: 0.4497
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.4908

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  84%|███████████████   | 563/674 [01:14<00:09, 11.70it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.190 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  84%|███████████████   | 565/674 [01:14<00:16,  6.79it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.609 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 9 candidates
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 2 hit → 4 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 4 candidates
[DEBUG] Table 5 hit → 2 candidates
[DEBUG] Table 6 hit → 2 candidates
[DEBUG] Table 7 hit → 6 candidates
[DEBUG] Table 8 hit → 2 candidates
[DEBUG] Table 9 hit → 4 candidates
[DEBUG] Table 10 hit → 2 candidates
[DEBUG] Table 1


Scoring: 100%|███████████████████████████████| 14/14 [00:00<00:00, 99189.62it/s][A
🔍 Evaluating Queries:  84%|███████████████▏  | 567/674 [01:15<00:19,  5.40it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-1.txt → Score: 0.0000
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-4.txt → Score: 0.1697
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-3.txt → Score: 0.1697
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-1_Split_SPDX_Licenses.txt → Score: 0.1697

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.055 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2 candidates
 - LSH returned 2 candidates in 0.034 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing dista


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 24456.58it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-2.0-24_Split_SPDX_Licenses.txt → Score: 1.0452
  • SCEA/SCEA-130.txt → Score: 1.1600

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  85%|███████████████▏  | 570/674 [01:16<00:19,  5.33it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.482 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 2 candidates
[DEBUG] Table 29 hit → 2 candidates
 - LSH returned 4 candidates in 0.044 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 10076.41it/s][A
🔍 Evaluating Queries:  85%|███████████████▎  | 573/674 [01:16<00:13,  7.28it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • MITNFA/MITNFA-2.txt → Score: 0.6215
  • MITNFA/MITNFA-1_Split_SPDX_Licenses.txt → Score: 0.6215
  • MITNFA/MITNFA-1.txt → Score: 0.6346
  • MITNFA/MITNFA-2_Split_SPDX_Licenses.txt → Score: 0.6346

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.041 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.027 sec
 No candidates found. Possibly due to hash size or vector spars

🔍 Evaluating Queries:  85%|███████████████▍  | 576/674 [01:16<00:10,  9.59it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.060 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  86%|███████████████▍  | 579/674 [01:16<00:08, 11.79it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.022 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.046 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.073 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  86%|███████████████▌  | 581/674 [01:16<00:07, 11.87it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.091 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.069 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 230 candidates
[DEBUG] Table 1 hit → 232 candidates
[DEBUG] Table 2 hit → 230 candidates
[DEBUG] Table 3 hit → 232 candidates
[DEBUG] Table 4 hit → 230 candidates
[DEBUG] Table 5 hit → 230 candidates
[DEBUG] Table 6 hit → 230 candidates
[DEBUG] Table 7 hit → 230 candidates
[DEBUG] Table 8 hit → 230 candidates
[DEBUG] Table 9 hit → 230 candidates
[DEBUG] Table 10 hit → 230 cand


Scoring: 100%|████████████████████████████| 234/234 [00:00<00:00, 181989.09it/s][A
🔍 Evaluating Queries:  86%|███████████████▌  | 583/674 [01:16<00:07, 12.38it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • SCEA/SCEA-61.txt → Score: 0.0000
  • SCEA/SCEA-26_Split_SPDX_Licenses.txt → Score: 0.0000
  • SCEA/SCEA-101.txt → Score: 0.0000
  • SCEA/SCEA-40.txt → Score: 0.0000
  • SCEA/SCEA-108_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit → 5 candidates
[DEBUG] Table 15 hit → 5 candidates
[DEBUG] 


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 48657.82it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  87%|███████████████▌  | 585/674 [01:17<00:11,  7.43it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.505 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 42 candidates
[DEBUG] Table 1 hit → 41 candidates
[DEBUG] Table 2 hit → 42 candidates
[DEBUG] Table 3 hit → 41 candidates
[DEBUG] Table 4 hit → 41 candidates
[DEBUG] Table 5 hit → 41 candidates
[DEBUG] Table 6 hit → 42 candidates
[DEBUG] Table 7 hit → 42 candidates
[DEBUG] Table 8 hit → 42 candidates
[DEBUG] Table 9 hit → 42 candidates
[DEBUG] Table 10 hit → 41 candidates
[DEBUG] Table 11 hit → 41 candidates
[DEBUG] Table 12 hit → 42 candidates
[DEBUG] Table 13 hit → 42 candidates
[DEBUG] Table 14 hit → 42 candidates
[DEBUG] Table 15 hit → 41 candidates
[DEBUG] Table 16 hit → 41 candidates
[DEBUG] Table 17 hit → 42 cand


Scoring: 100%|██████████████████████████████| 42/42 [00:00<00:00, 143922.20it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OASIS/OASIS-14.txt → Score: 0.0000
  • OASIS/OASIS-29.txt → Score: 0.0000
  • OASIS/OASIS-18.txt → Score: 0.0000
  • OASIS/OASIS-10.txt → Score: 0.0000
  • OASIS/OASIS-7.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.034 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 22 candidates
[DEBUG] Table 1 hit → 22 candidates
[DEBUG] Table 2 hit → 20 candidates
[DEBUG] Table 3 hit → 22 candidates
[DEBUG] Table 4 hit → 22 candidates
[DEBUG] Table 5 hit → 20 candidates
[DEBUG] Table 6 hit → 24 candidates
[DEBUG] Table 7 hit → 22 candidates
[DEBUG] Table 8 hit → 22 candidates
[DEBUG] Table 9 hit → 22 candidates


Scoring: 100%|██████████████████████████████| 28/28 [00:00<00:00, 144988.29it/s][A
🔍 Evaluating Queries:  87%|███████████████▋  | 588/674 [01:17<00:08, 10.19it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Imlib2/Imlib2-3.txt → Score: 0.0000
  • Imlib2/Imlib2-12.txt → Score: 0.0000
  • Imlib2/Imlib2-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • Imlib2/Imlib2-1_Split_SPDX_Licenses.txt → Score: 0.0000
  • Imlib2/Imlib2-13_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.019 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
 - LSH returned 2 candidates in 0.050 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 2 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 17189.77it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-212.txt → Score: 1.1576
  • OSL-2.1/OSL-2.1-212_Split_DB_Foss_Licenses.txt → Score: 1.1576

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 4 candidates
 - LSH returned 4 candidates in 0.020 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 4 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 4/4 [00:00<00:00, 41630.81it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.1/ZPL-21-33.txt → Score: 0.7629
  • ZPL-2.1/ZPL-2.1-26.txt → Score: 0.7629
  • ZPL-2.0/ZPL-2.0-39.txt → Score: 0.7747
  • ZPL-2.0/ZPL-20-33.txt → Score: 0.7747

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  88%|███████████████▊  | 592/674 [01:17<00:06, 13.48it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.063 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.442 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
 - LSH returned 1 candidates in 0.027 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 10866.07it/s][A
🔍 Evaluating Queries:  89%|███████████████▉  | 597/674 [01:18<00:06, 11.51it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.8/OLDAP-2.8-30.txt → Score: 0.7790

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.059 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.021 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Ta


Scoring: 100%|██████████████████████████████| 54/54 [00:00<00:00, 172106.70it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • info-zip/info-zip-20.txt → Score: 0.0000
  • info-zip/Info-ZIP-20_Split_SPDX_Licenses.txt → Score: 0.0000
  • info-zip/info-zip-5.txt → Score: 0.0000
  • info-zip/Info-ZIP-4_Split_SPDX_Licenses.txt → Score: 0.0000
  • info-zip/info-zip-17.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 2885 candidates
[DEBUG] Table 1 hit → 331 candidates
[DEBUG] Table 2 hit → 326 candidates
[DEBUG] Table 3 hit → 326 candidates
[DEBUG] Table 4 hit → 1567 candidates
[DEBUG] Table 5 hit → 326 candidates
[DEBUG] Table 6 hit → 327 candidates
[DEBUG] Table 7 hit → 2885 candidates
[DEBUG] Table 8 hit → 1648 candidates
[DEBUG] Table 9 hit → 1649 candidates
[DEBUG] Table 10 hit → 326 candidates
[DEBUG] Table 11 hit → 1653 candidates
[DEBUG] Table 12 hit → 1656 candidates
[DEBUG] Table 13 hit → 1652 candidates
[DEBUG] 




  - Found 2889 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 156627.69it/s][A
🔍 Evaluating Queries:  89%|███████████████▉  | 599/674 [01:18<00:07, 10.32it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-7.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-190.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-40.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-110.txt → Score: 0.0000
  • GFDL-1.1-no-invariants-or-later/GFDL-1.1-no-invariants-or-later-270.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.026 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.048 sec
 No candidates found. Possibly due to hash size or vec


Scoring: 100%|██████████████████████████| 2889/2889 [00:00<00:00, 222606.17it/s][A
🔍 Evaluating Queries:  89%|████████████████  | 602/674 [01:18<00:07,  9.91it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.2-invariants-or-later/GFDL-1.2-invariants-or-later-365.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-170.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-141_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • GFDL-1.2/GFDL-1.2-57_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 3 hit → 582 candidates
[DEBUG] Table 4 hit → 582 candidates
[DEBUG] Table 19 hit → 582 candidates
[DEBUG] Table 20 hit → 582 candidates
[DEBUG] Table 22 hit → 582 candidates
[DEBUG] Table 23 hit → 3 candidates
[DEBUG] Table 25 hit → 585 candidates
 - LSH returned 585 candidates in 0.035 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 585 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|████████████████████████████| 585/585 [00:00<00:00, 180390.22it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OSL-2.1/OSL-2.1-123_Split_SPDX_Licenses.txt → Score: 0.1543
  • OSL-2.1/OSL-2.1-85_Split_DB_Foss_Licenses.txt → Score: 0.1543
  • OSL-2.1/OSL-2.1-79_Split_DB_Foss_Licenses.txt → Score: 0.1543
  • OSL-2.1/OSL-2.1-152_Split_SPDX_Licenses.txt → Score: 0.1543
  • OSL-2.1/OSL-2.1-178.txt → Score: 0.1543

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DE


Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 50051.36it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • LiLiQ-P-1.1/LiLiQ-P-1.1-109.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-78.txt → Score: 0.0000
  • ZoneAlarm-EULA/ZoneAlarm-EULA-510.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-83.txt → Score: 0.0000
  • LiLiQ-P-1.1/LiLiQ-P-1.1-87.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 24 candidates
[DEBUG] Table 1 hit → 24 candidates
[DEBUG] Table 2 hit → 24 candidates
[DEBUG] Table 3 hit → 24 candidates
[DEBUG] Table 4 hit → 24 candidates
[DEBUG] Table 5 hit → 24 candidates
[DEBUG] Table 6 hit → 26 candidates
[DEBUG] Table 7 hit → 24 candidates
[DEBUG] Table 8 hit → 24 candidates
[DEBUG] Table 9 hit → 24 candidates
[DEBUG] Table 10 hit → 24 candidates
[DEBUG] Table 11 hit → 24 candidates
[DEBUG] Table 12 hit → 24 candidates
[DEBUG] Table 13 hit → 24 candidates
[DEBUG] Table 14 hit → 24 cand


Scoring: 100%|██████████████████████████████| 26/26 [00:00<00:00, 132505.35it/s][A
🔍 Evaluating Queries:  90%|████████████████▏ | 605/674 [01:18<00:05, 12.34it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • AAL/AAL-9.txt → Score: 0.0000
  • AAL/AAL-11_Split_SPDX_Licenses.txt → Score: 0.0000
  • AAL/AAL-4_Split_SPDX_Licenses.txt → Score: 0.0000
  • AAL/AAL-9_Split_SPDX_Licenses.txt → Score: 0.0000
  • AAL/AAL-3_Split_SPDX_Licenses.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 28 hit → 1 candidates
 - LSH returned 1 candidates in 0.029 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 12633.45it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.9814

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...





  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 15 hit → 1 candidates
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Table 18 hit → 2 candidates
[DEBUG] Table 21 hit → 3 candidates
[DEBUG] Table 22 hit → 1 candidates
[DEBUG] Table 23 hit → 1 candidates
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 9 candidates in 0.073 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 9 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 9/9 [00:00<00:00, 26471.76it/s][A
🔍 Evaluating Queries:  90%|████████████████▏ | 608/674 [01:19<00:04, 14.42it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.1/ZPL-2.1-32.txt → Score: 0.1617
  • Intel/Intel-26.txt → Score: 0.2512
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.3675
  • ZPL-2.1/ZPL-21-39.txt → Score: 0.3827
  • AAL/AAL-26.txt → Score: 0.4114

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.024 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 


Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 6543.38it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • Parity-7.0.0/Parity-7.0.0-71.txt → Score: 0.0000

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  91%|████████████████▎ | 611/674 [01:19<00:03, 16.58it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.061 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 22 hit → 1 candidates
 - LSH returned 1 candidates in 0.038 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors f


Scoring: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 11366.68it/s][A
🔍 Evaluating Queries:  91%|████████████████▍ | 614/674 [01:19<00:03, 18.12it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-386.txt → Score: 1.0804

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.052 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 6 hit → 1 candidates
 - LSH returned 1 candidates in 0.045 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 3146.51it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • libtiff/libtiff-1.txt → Score: 0.8115

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  92%|████████████████▍ | 617/674 [01:19<00:03, 16.41it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.121 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.055 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  92%|████████████████▌ | 619/674 [01:19<00:03, 15.37it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.102 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.097 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  92%|████████████████▌ | 621/674 [01:20<00:06,  8.07it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.515 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 14 candidates
[DEBUG] Table 10 hit → 8 candidates
[DEBUG] Table 13 hit → 2 candidates
[DEBUG] Table 14 hit → 2 candidates
[DEBUG] Table 16 hit → 2 candidates
[DEBUG] Table 18 hit → 14 candidates
[DEBUG] Table 21 hit → 2 candidates
[DEBUG] Table 22 hit → 2 candidates
[DEBUG] Table 23 hit → 2 candidates
[DEBUG] Table 26 hit → 2 candidates
[DEBUG] Table 27 hit → 2 candidates
[DEBUG] Table 29 hit → 2 candidates
 - LSH returned 24 candidates in 0.511 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 24 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████| 24/24 [00:00<00:00, 52292.62it/s][A
🔍 Evaluating Queries:  92%|████████████████▋ | 623/674 [01:20<00:08,  6.13it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.1/OLDAP-2.1-13_Split_SPDX_Licenses.txt → Score: 0.1143
  • OLDAP-2.1/OLDAP-2.1-37.txt → Score: 0.1143
  • OLDAP-2.1/OLDAP-2.1-27_Split_SPDX_Licenses.txt → Score: 0.2490
  • OLDAP-2.1/OLDAP-2.1-33.txt → Score: 0.2490
  • OLDAP-2.1/OLDAP-2.1-3_Split_SPDX_Licenses.txt → Score: 0.2690

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 17549.39it/s][A
🔍 Evaluating Queries:  93%|████████████████▋ | 625/674 [01:21<00:06,  7.23it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • PostgreSQL/PostgreSQL-3_Split_SPDX_Licenses.txt → Score: 0.0000
  • PostgreSQL/PostgreSQL-3.txt → Score: 0.7445

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.069 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 478 candidates
[DEBUG] Table 24 hit → 6 candidates
 - LSH returned 484 candidates in 0.153 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 484 vect


Scoring: 100%|████████████████████████████| 484/484 [00:00<00:00, 128695.52it/s][A
🔍 Evaluating Queries:  93%|████████████████▊ | 629/674 [01:21<00:05,  8.57it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-only/GPL-3.0-only-87.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-317.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-262.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-375.txt → Score: 0.4778
  • GPL-3.0-only/GPL-3.0-only-216.txt → Score: 0.4778

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.091 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[


Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 8542.37it/s][A
🔍 Evaluating Queries:  94%|████████████████▉ | 633/674 [01:21<00:03, 10.96it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GPL-3.0-with-bison-exception/GPL-3.0-with-bison-exception-5.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.033 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.106 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.029 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STE


Scoring: 100%|█████████████████████████████████| 2/2 [00:00<00:00, 10046.24it/s][A
🔍 Evaluating Queries:  94%|████████████████▉ | 635/674 [01:22<00:06,  5.95it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • GFDL-1.3/GFDL-1.3-457.txt → Score: 1.0854
  • GFDL-1.2/GFDL-1.2-427.txt → Score: 1.0854

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 1 candidates
[DEBUG] Table 1 hit → 1 candidates
[DEBUG] Table 2 hit → 1 candidates
[DEBUG] Table 3 hit → 1 candidates
[DEBUG] Table 4 hit → 1 candidates
[DEBUG] Table 5 hit → 1 candidates
[DEBUG] Table 6 hit → 1 candidates
[DEBUG] Table 7 hit → 1 candidates
[DEBUG] Table 8 hit → 1 candidates
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 10 hit → 1 candidates
[DEBUG] Table 11 hit → 1 candidates
[DEBUG] Table 12 hit → 1 candidates
[DEBUG] Table 13 hit → 1 candidates
[DEBUG] Table 14 hit → 1 candidates
[DEBUG] Table 15 hit → 1 candidates
[DEBUG] Table 16 hit → 1 candidates
[DEBUG] Table 17 hit → 1 candidates
[DEBUG] Table 18 hit → 1 candidates
[DEBUG] Table 19 hit → 1 candidates


Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 1295.34it/s][A


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • W3C/W3C-21.txt → Score: 0.0000

 [STEP 1] Embedding query text...



🔍 Evaluating Queries:  95%|█████████████████ | 637/674 [01:22<00:05,  7.04it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.090 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  95%|█████████████████ | 639/674 [01:23<00:08,  4.05it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.900 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.088 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  95%|█████████████████ | 640/674 [01:24<00:10,  3.25it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.593 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 1 candidates
 - LSH returned 1 candidates in 0.031 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|███████████████████████████████████| 1/1 [00:00<00:00, 454.57it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • WashU/WashU-8.txt → Score: 0.9632

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.054 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 5 candidates
[DEBUG] Table 1 hit → 5 candidates
[DEBUG] Table 2 hit → 6 candidates
[DEBUG] Table 3 hit → 5 candidates
[DEBUG] Table 4 hit → 5 candidates
[DEBUG] Table 5 hit → 5 candidates
[DEBUG] Table 6 hit → 5 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 5 candidates
[DEBUG] Table 9 hit → 5 candidates
[DEBUG] Table 10 hit → 5 candidates
[DEBUG] Table 11 hit → 5 candidates
[DEBUG] Table 12 hit → 5 candidates
[DEBUG] Table 13 hit → 5 candidates
[DEBUG] Table 14 hit 


Scoring: 100%|█████████████████████████████████| 6/6 [00:00<00:00, 63072.24it/s][A
🔍 Evaluating Queries:  95%|█████████████████▏| 643/674 [01:24<00:06,  5.13it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • ZPL-2.0/ZPL-20-32.txt → Score: 0.0000
  • ZPL-2.0/ZPL-2.0-38.txt → Score: 0.0000
  • ZPL-2.1/ZPL-2.1-25.txt → Score: 0.0000
  • ZPL-2.1/ZPL-21-32.txt → Score: 0.0000
  • Intel/Intel-22.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.037 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  96%|█████████████████▎| 646/674 [01:24<00:03,  7.03it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.076 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.095 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  96%|█████████████████▎| 648/674 [01:24<00:03,  7.73it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.092 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  96%|█████████████████▎| 650/674 [01:25<00:06,  3.94it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 1.119 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 5 candidates
 - LSH returned 5 candidates in 0.790 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 5 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████████| 5/5 [00:00<00:00, 15709.00it/s][A
🔍 Evaluating Queries:  97%|█████████████████▍| 651/674 [01:26<00:07,  2.91it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • WashU/WashU-5.txt → Score: 0.8599
  • WashU/WashU-3.txt → Score: 0.9465
  • BSD-3-Clause-Clear/BSD-3-Clause-Clear-11.txt → Score: 0.9526
  • PostgreSQL/PostgreSQL-2_Split_SPDX_Licenses.txt → Score: 1.1160
  • PostgreSQL/PostgreSQL-1.txt → Score: 1.1160

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 4 candidates
[DEBUG] Table 1 hit → 4 candidates
[DEBUG] Table 2 hit → 5 candidates
[DEBUG] Table 3 hit → 4 candidates
[DEBUG] Table 4 hit → 4 candidates
[DEBUG] Table 5 hit → 4 candidates
[DEBUG] Table 6 hit → 4 candidates
[DEBUG] Table 7 hit → 5 candidates
[DEBUG] Table 8 hit → 4 candidates
[DEBUG] Table 9 hit → 4 candidates
[DEBUG] Table 10 hit → 4 candidates
[DEBUG] Table 11 hit → 4 candidates
[DEBUG] Table 12 hit → 4 candidates
[DEBUG] Table 13 hit → 4 candidates
[DEBUG] Table 14 hit → 4 candidates
[DEBUG] Table 


Scoring: 100%|█████████████████████████████████| 8/8 [00:00<00:00, 20152.81it/s][A



 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • WashU/WashU-7.txt → Score: 0.0000
  • WashU/WashU-1.txt → Score: 0.0000
  • WashU/WashU-4.txt → Score: 0.0000
  • WashU/WashU-6.txt → Score: 0.0000
  • WashU/WashU-2.txt → Score: 0.3740

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 14 candidates
[DEBUG] Table 3 hit → 2 candidates
[DEBUG] Table 4 hit → 2 candidates
[DEBUG] Table 5 hit → 6 candidates
[DEBUG] Table 7 hit → 4 candidates
[DEBUG] Table 8 hit → 46 candidates
[DEBUG] Table 9 hit → 10 candidates
[DEBUG] Table 10 hit → 6 candidates
[DEBUG] Table 12 hit → 6 candidates
[DEBUG] Table 15 hit → 2 candidates
[DEBUG] Table 17 hit → 8 candidates
[DEBUG] Table 19 hit → 12 candidates
[DEBUG] Table 20 hit → 2 candidates
[DEBUG] Table 21 hit → 6 candidates
[DEBUG] Table 22 hit → 6 candidates
[DEBUG] Table 24 hit → 8 candidates
[DEBUG] Table 25 hit → 6 candidates
[D


Scoring: 100%|███████████████████████████████| 70/70 [00:00<00:00, 99156.12it/s][A
🔍 Evaluating Queries:  97%|█████████████████▍| 653/674 [01:26<00:05,  3.82it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-2.txt → Score: 0.1327
  • OLDAP-2.6/OLDAP-2.6-4_Split_SPDX_Licenses.txt → Score: 0.1327
  • OLDAP-2.6/OLDAP-2.6-21.txt → Score: 0.1463
  • OLDAP-2.6/OLDAP-2.6-9_Split_SPDX_Licenses.txt → Score: 0.1463
  • OLDAP-2.8/OLDAP-2.8-7_Split_SPDX_Licenses.txt → Score: 0.1890

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 9 hit → 1 candidates
[DEBUG] Table 27 hit → 718 candidates
[DEBUG] Table 28 hit → 718 candidates
 - LSH returned 719 candidates in 0.085 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 719 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|█████████████████████████████| 719/719 [00:00<00:00, 84860.98it/s][A
🔍 Evaluating Queries:  97%|█████████████████▍| 654/674 [01:26<00:05,  3.98it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.3430
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.3430
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.3430
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.3430
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.3430

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 0 hit → 111 candidates
[DEBUG] Table 1 hit → 111 candidates
[DEBUG] Table 2 hit → 111 candidates
[DEBUG] Table 3 hit → 111 candidates
[DEBUG] Table 4 hit → 111 candidates
[DEBUG] Table 5 hit → 111 candidates
[DEBUG] Table 6 hit → 111 candidates
[DEBUG] Table 7 hit → 111 candidates
[DEBUG] Table 8 hit → 111 candidates
[DEBUG] Table 9 hit → 111 candidates
[DEBUG] Table 10 hit → 112 candidates
[DEBUG] Table 11 hit → 112 candidates
[DEBUG] Table 12 hit → 111 candidates
[DEBUG] Table 13 


Scoring: 100%|█████████████████████████████| 112/112 [00:00<00:00, 53491.47it/s][A
🔍 Evaluating Queries:  97%|█████████████████▍| 655/674 [01:27<00:04,  4.46it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • RealNetworks-EULA/RealNetworks-EULA-41.txt → Score: 0.0000
  • RealNetworks-EULA/RealNetworks-EULA-5.txt → Score: 0.0000
  • RealNetworks-EULA/RealNetworks-EULA-57.txt → Score: 0.0000
  • RealNetworks-EULA/RealNetworks-EULA-84.txt → Score: 0.0000
  • RealNetworks-EULA/RealNetworks-EULA-46.txt → Score: 0.0000

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.088 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  97%|█████████████████▌| 657/674 [01:27<00:03,  5.59it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.119 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.073 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 8 hit → 1 candidates
 - LSH returned 1 candidates in 0.700 sec

 [STEP 3] Matching candidate filenames to vectors...
  - Found 1 vectors for comparison.

 [STEP 4] Computing distances...



Scoring: 100%|██████████████████████████████████| 1/1 [00:00<00:00, 4609.13it/s][A
🔍 Evaluating Queries:  98%|█████████████████▋| 661/674 [01:28<00:02,  5.38it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-33.txt → Score: 0.6393

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.045 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.059 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.057 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Ta


Scoring: 100%|████████████████████████████| 718/718 [00:00<00:00, 192023.86it/s][A
🔍 Evaluating Queries:  98%|█████████████████▋| 663/674 [01:28<00:01,  6.51it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.5772
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.5772
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.5772
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.5772
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.5772

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  99%|█████████████████▊| 665/674 [01:28<00:01,  5.35it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.454 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.064 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.025 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 24 hit → 718 candidates
[DEBUG] Table 28 hit → 2 candidates
 - LSH returned 720 candidates in 0.044 sec

 [STEP 3] Matching candidate filen


Scoring: 100%|████████████████████████████| 720/720 [00:00<00:00, 210901.52it/s][A
🔍 Evaluating Queries:  99%|█████████████████▊| 667/674 [01:28<00:01,  6.81it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • CC-BY-3.0/CC-BY-3.0-361_Split_SPDX_Licenses.txt → Score: 0.5685
  • CC-BY-3.0/CC-BY-3.0-318.txt → Score: 0.5685
  • CC-BY-3.0/CC-BY-3.0-225.txt → Score: 0.5685
  • CC-BY-3.0/CC-BY-3.0-211_Split_SPDX_Licenses.txt → Score: 0.5685
  • CC-BY-3.0/CC-BY-3.0-215_Split_SPDX_Licenses.txt → Score: 0.5685

 [STEP 1] Embedding query text...


🔍 Evaluating Queries:  99%|█████████████████▉| 670/674 [01:29<00:00,  9.47it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.055 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.044 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.030 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...


🔍 Evaluating Queries: 100%|█████████████████▉| 672/674 [01:29<00:00,  7.05it/s]

  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.431 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.042 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
 - LSH returned 0 candidates in 0.056 sec
 No candidates found. Possibly due to hash size or vector sparsity.

 [STEP 1] Embedding query text...
  - Query vector shape: (384,)
  - Non-zero embedding dimensions: 384

 [STEP 2] Querying LSH index...
[DEBUG] Table 1 hit → 2 candidates
[DEBUG] Table 5 hit → 42 candidates
[DEBUG] Table 9 hit → 42 candidates
[DEBUG] Table 13 hit → 40 candidates
[DEBUG] T


Scoring: 100%|██████████████████████████████| 96/96 [00:00<00:00, 202135.13it/s][A
🔍 Evaluating Queries: 100%|██████████████████| 674/674 [01:29<00:00,  7.52it/s]


 [STEP 5] Selecting top matches...

 Top-5 nearest results:
  • OLDAP-2.6/OLDAP-2.6-27_Split_SPDX_Licenses.txt → Score: 0.2350
  • OLDAP-2.6/OLDAP-2.6-6.txt → Score: 0.2350
  • OLDAP-2.6/OLDAP-2.6-27.txt → Score: 0.2382
  • OLDAP-2.6/OLDAP-2.6-3_Split_SPDX_Licenses.txt → Score: 0.2382
  • OLDAP-2.6/OLDAP-2.6-25.txt → Score: 0.2382

✅ Overall accuracy: 36.94% (249/674)





In [39]:
df_eval

Unnamed: 0,query_index,text_preview,true_label,matched_files,correct,license_folder
0,0,INTERBASE PUBLIC LICENSE Version 1.0 1 Definit...,1,[],False,Interbase
1,1,3DFX GLIDE Source Code General Public License ...,1,"[CC-BY-3.0/CC-BY-3.0-371.txt, SCEA/SCEA-41_Spl...",True,Glide
2,2,"As a special exception, if you create a docume...",1,[gnu-javamail-exception/gnu-javamail-exception...,True,Font-exception-2.0
3,3,Lucent disclaims all warranties with regard to...,1,[],False,StandardML-NJ
4,4,\xe3\x82\xa2\xe3\x83\x88\xe3\x83\xaa\xe3\x83\x...,1,[],False,CC-BY-SA-2.1-JP
...,...,...,...,...,...,...
669,669,Common Documentation License Version 1.0 - Feb...,1,[],False,CDL-1.0
670,670,Sybase Open Watcom Public License version 1.0 ...,1,[],False,Watcom-1.0
671,671,Open CASCADE Technology Public License License...,1,[],False,OCCT
672,672,8. Indemnification...,1,[],False,Wintertree


In [43]:
# Filter only license-labeled samples
license_results = df_eval[df_eval['true_label'] == 1]

# Check which license folders were correctly identified
correctly_detected = license_results[license_results['correct'] == True]['license_folder'].unique()

# Cross-check against your list
correct_licenses = sorted(set(correctly_detected).intersection(set(unique_licenses)))
missing_licenses = sorted(set(unique_licenses) - set(correctly_detected))

print(f"\n Detected {len(correct_licenses)}/{len(unique_licenses)} target licenses correctly.\n")
print(" Correctly Detected Licenses:")
print("\n".join(correct_licenses))

print("\n Missing Licenses (not detected correctly):")
print("\n".join(missing_licenses))



 Detected 46/46 target licenses correctly.

🎯 Correctly Detected Licenses:
AAL
BSD-3-Clause-Clear
CATOSL
CC-BY-3.0
CC-BY-SA-3.0-AT
CPL-0.5
CrystalStacker
FTL
Ferris-1.2
GFDL-1.1-no-invariants-or-later
GFDL-1.2
GFDL-1.2-invariants-or-later
GFDL-1.2-only
GFDL-1.3
GFDL-1.3-invariants-or-later
GPL-3.0-only
GPL-3.0-with-bison-exception
Imlib2
Intel
LiLiQ-P-1.1
MITNFA
MX4J
NCGL-UK-2.0
NLOD-1.0
OASIS
OLDAP-2.1
OLDAP-2.6
OLDAP-2.8
OSL-2.1
OpenMarket
Parity-7.0.0
PostgreSQL
RealNetworks-EULA
SCEA
VIM
W3C
WashU
YPL-1.0
YPL-1.1
ZPL-2.0
ZPL-2.1
Zed
ZoneAlarm-EULA
gnu-javamail-exception
info-zip
libtiff

 Missing Licenses (not detected correctly):



In [45]:
non_license_results = df_eval[df_eval['true_label'] == 0]

correct_nonlicenses = non_license_results[non_license_results['correct'] == True]
incorrect_nonlicenses = non_license_results[non_license_results['correct'] == False]

print(f"\n Correctly identified {len(correct_nonlicenses)}/{len(non_license_results)} non-license texts.\n")

print(" Incorrectly identified these non-license texts as licenses:")
for i, row in incorrect_nonlicenses.iterrows():
    print(f" - Preview: {row['text_preview']}")
    print(f"   Matched files: {row['matched_files']}\n")


 Correctly identified 20/20 non-license texts.

 Incorrectly identified these non-license texts as licenses:


# Points to note:
- Identified all the licenses that were indexed correctly. (46 licenses in 10000 file samples)
- They were (46/654) licenses that were indexed into LSH.
- Identified all the non license text correclty. (20/674) samples.
- 608 licenses were not indexed, that is they were not part of the search space. Even then, it identified some statements that it had not seen previously. (203/608)
- Overall accuracy even with only (10000/162833) files indexed.

In [42]:
df_eval.to_csv("license_detection_results.csv", index=False)