## Create embeddings with dimension max_length x 1024

### Train Dataset

#### Create .npz batches

In [None]:
import pandas as pd
import numpy as np

# === Lade den Trainingsdatensatz ===
train_path = '../../../data/splitted_datasets/allele/beta/train.tsv'
train_data = pd.read_csv(train_path, sep='\t', low_memory=False)
print("Length train data:", len(train_data))


epitope_embeddings_path = '../../../data/embeddings/beta/allele/Epitope_beta_embeddings.npz'
tcr_embeddings_path = '../../../data/embeddings/beta/allele/TRB_beta_embeddings.npz'

tcr_data = np.load(tcr_embeddings_path, allow_pickle=True)
epitope_data = np.load(epitope_embeddings_path, allow_pickle=True)

# === Extrahiere die Keys aus dem Trainingsdatensatz ===
tcr_keys = train_data['TRB_CDR3'].dropna().tolist()
epitope_keys = train_data['Epitope'].dropna().tolist()

# === Nur Keys behalten, die in den Embeddings existieren ===
'''
wurde erstetzt mit === Schnelle Version mit set() ===
tcr_keys = [key for key in tcr_keys if key in tcr_data]
epitope_keys = [key for key in epitope_keys if key in epitope_data]
'''
# === Schnelle Version mit set() ===
tcr_keys_set = set(tcr_data.files)  # Mache eine schnelle Hash-Map (Set) für Keys
print("length tcr_keys_set: ", len(tcr_keys_set))
epitope_keys_set = set(epitope_data.files)
print("length epitope_keys_set: ", len(epitope_keys_set))

tcr_keys = [key for key in tcr_keys if key in tcr_keys_set]
epitope_keys = [key for key in epitope_keys if key in epitope_keys_set]


# === Dictionaries mit den Trainings-Embeddings erstellen ===
tcr_train_dict = {key: tcr_data[key] for key in tcr_keys}
epitope_train_dict = {key: epitope_data[key] for key in epitope_keys}

print(f"Keys in tcr_data.files: {list(tcr_data.files)[:5]}")  # Check a sample of the keys
print(f"First 5 TCR keys from train_data: {tcr_keys[:5]}")  # Check the initial TCR keys
# print(f"Filtered TCR keys: {tcr_keys}")  # Check the final filtered keys
print(f"len TCR keys: {len(tcr_keys)}")  # Check the final filtered keys

# 76m


Length train data: 755758
length tcr_keys_set:  211294
length epitope_keys_set:  1896
Keys in tcr_data.files: ['CASSWRDGATGELFF', 'CASSPYWGEAGYTF', 'CASSILAGSNTEAFF', 'CSARDPGQGAGELFF', 'CASSYWGPQDTQYF']
First 5 TCR keys from train_data: ['CSARDLYRQSSYEQYF', 'CASSSEPGQGSYEQYF', 'CASSLDGRPEQFF', 'CASSQGTSNNEQFF', 'CSARVQGTQETQYF']
len TCR keys: 755758


In [None]:
import os
# === Maximaler Padding-Wert bestimmen ===
max_tcr_length = max(embedding.shape[0] for embedding in tcr_train_dict.values())
max_epitope_length = max(embedding.shape[0] for embedding in epitope_train_dict.values())

max_length = max(max_tcr_length, max_epitope_length)  # Einheitliche Länge für Transformer

print(f"📌 Max Length: {max_length} (TCR: {max_tcr_length}, Epitope: {max_epitope_length})")

# === Padding-Funktion ===
def pad_embedding(embedding, max_length):
    """
    Padded ein einzelnes Embedding mit Nullen auf max_length.
    """
    padded = np.zeros((max_length, embedding.shape[1]), dtype=embedding.dtype)
    padded[:embedding.shape[0], :] = embedding  # Originalwerte behalten, Rest mit 0 füllen
    return padded

# === Speicherpfade für Trainingsdaten setzen ===
train_tcr_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches'
train_epitope_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/train_epitope_padded_batches'

os.makedirs(train_tcr_padded_path, exist_ok=True)
os.makedirs(train_epitope_padded_path, exist_ok=True)

# === Speicher-Funktion mit Batch-Mechanismus ===
def save_padded_embeddings_in_batches(embeddings_dict, save_dir, batch_size=5000):
    keys = list(embeddings_dict.keys())
    num_batches = (len(keys) + batch_size - 1) // batch_size  # Anzahl der Batches berechnen

    for i in range(num_batches):
        batch_keys = keys[i * batch_size: (i + 1) * batch_size]
        padded_batch = {key: pad_embedding(embeddings_dict[key], max_length) for key in batch_keys}
        
        batch_save_path = os.path.join(save_dir, f"batch_{i}.npz")
        np.savez_compressed(batch_save_path, **padded_batch)
        print(f"✅ Saved batch {i + 1}/{num_batches} to {batch_save_path}")

    print("✅ All batches saved successfully!")

# === Train-Embeddings padden und speichern ===
save_padded_embeddings_in_batches(tcr_train_dict, train_tcr_padded_path, batch_size=5000)
save_padded_embeddings_in_batches(epitope_train_dict, train_epitope_padded_path, batch_size=5000)

# 14m

📌 Max Length: 43 (TCR: 38, Epitope: 43)
✅ Saved batch 1/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_0.npz
✅ Saved batch 2/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_1.npz
✅ Saved batch 3/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_2.npz
✅ Saved batch 4/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_3.npz
✅ Saved batch 5/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_4.npz
✅ Saved batch 6/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_5.npz
✅ Saved batch 7/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_6.npz
✅ Saved batch 8/40 to ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_7.npz
✅ Saved batch 9/40 to ../../../data/embeddings/beta/allele/dimension_102

#### convert to hdf5

In [3]:
import h5py
import pandas as pd
import numpy as np
import os

# === Finale HDF5-Dateien aus gepaddeten Batches erstellen ===
def combine_selected_batches_to_hdf5(batch_files, output_path):
    """
    Kombiniert eine spezifische Liste von Batch-Dateien zu einer einzigen HDF5-Datei.
    """
    if not batch_files:
        print(f"❌ Keine Batch-Dateien in der Liste gefunden.")
        return

    with h5py.File(output_path, 'w') as hdf5_file:
        for i, batch_file in enumerate(batch_files):
            batch = np.load(batch_file, allow_pickle=True)

            for key in batch.files:
                if key not in hdf5_file:
                    hdf5_file.create_dataset(key, data=batch[key], compression="gzip")
                else:
                    print(f"⚠️ Duplikat-Key übersprungen: {key}")

            print(f"🔄 Batch {i+1}/{len(batch_files)} verarbeitet: {batch_file}")

    print(f"✅ Finale gepaddete Embeddings gespeichert unter: {output_path}")


def get_num_batches(directory, prefix="batch_", suffix=".npz"):
    files = os.listdir(directory)
    batch_files = [f for f in files if f.startswith(prefix) and f.endswith(suffix)]
    return len(batch_files)

# === TCR & Epitope Batches für Train zusammenführen ===
# TCR
num_batches = get_num_batches(train_tcr_padded_path)
train_tcr_batches = [os.path.join(train_tcr_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
print(f"Number of batches in {train_tcr_padded_path}: {num_batches}")
# Epitope
num_batches = get_num_batches(train_epitope_padded_path)
train_epitope_batches = [os.path.join(train_epitope_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
# train_epitope_batches = ["../../../data/embeddings/beta/allele/dimension_1024/train_epitope_padded_batches/batch_0.npz"]
print(f"Number of batches in {train_epitope_padded_path}: {num_batches}")

combine_selected_batches_to_hdf5(
    batch_files=train_tcr_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_train_tcr_embeddings_final.h5'
)

combine_selected_batches_to_hdf5(
    batch_files=train_epitope_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_train_epitope_embeddings_final.h5'
)

# === Überprüfe die HDF5-Dateien ===
def check_hdf5_file(file_path):
    with h5py.File(file_path, 'r') as hdf5_file:
        keys = list(hdf5_file.keys())
        print(f"✅ HDF5-Datei geladen: {file_path}")
        print(f"Anzahl Keys: {len(keys)}")
        print(f"Beispiel-Keys: {keys[:5]}")

# Überprüfe Train-TCR
check_hdf5_file('../../../data/embeddings/beta/allele/dimension_1024/padded_train_tcr_embeddings_final.h5')

# Überprüfe Train-Epitope
check_hdf5_file('../../../data/embeddings/beta/allele/dimension_1024/padded_train_epitope_embeddings_final.h5')

# 11m

Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches: 40
Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/train_epitope_padded_batches: 1
🔄 Batch 1/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_0.npz
🔄 Batch 2/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_1.npz
🔄 Batch 3/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_2.npz
🔄 Batch 4/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_3.npz
🔄 Batch 5/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_4.npz
🔄 Batch 6/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/batch_5.npz
🔄 Batch 7/40 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/train_tcr_padded_batches/

## Validation Dataset

#### create .npz batches

In [4]:
# === Lade den Validierungsdatensatz ===
validation_path = '../../../data/splitted_datasets/allele/beta/validation.tsv'
valid_data = pd.read_csv(validation_path, sep='\t', low_memory=False)

# === Lade die Keys aus dem Validierungsdatensatz ===
valid_tcr_keys = valid_data['TRB_CDR3'].dropna().tolist()
valid_epitope_keys = valid_data['Epitope'].dropna().tolist()

epitope_embeddings_path = '../../../data/embeddings/beta/allele/Epitope_beta_embeddings.npz'
tcr_embeddings_path = '../../../data/embeddings/beta/allele/TRB_beta_embeddings.npz'

tcr_data = np.load(tcr_embeddings_path, allow_pickle=True)
epitope_data = np.load(epitope_embeddings_path, allow_pickle=True)

# === Schnelle Version mit set() ===
tcr_keys_set = set(tcr_data.files)  # Mache eine schnelle Hash-Map (Set) für Keys
epitope_keys_set = set(epitope_data.files)

print("len of valid_tcr_keys: ", len(valid_tcr_keys))
print("len of valid_epitope_keys: ", len(valid_epitope_keys))

valid_tcr_keys = [key for key in valid_tcr_keys if key in tcr_keys_set]
valid_epitope_keys = [key for key in valid_epitope_keys if key in epitope_keys_set]

print("len of valid_tcr_keys after getting emnedding: ", len(valid_tcr_keys))
print("len of valid_epitope_keys after getting embedding: ", len(valid_epitope_keys))

# === Dictionaries für Validierungs-Embeddings erstellen ===
valid_tcr_embeddings_dict = {key: tcr_data[key] for key in valid_tcr_keys}
valid_epitope_embeddings_dict = {key: epitope_data[key] for key in valid_epitope_keys}

# === Speicherpfade für Validierungsdaten setzen ===
valid_tcr_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches'
valid_epitope_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/valid_epitope_padded_batches'

os.makedirs(valid_tcr_padded_path, exist_ok=True)
os.makedirs(valid_epitope_padded_path, exist_ok=True)

# === Validierungsdaten padden und speichern ===
save_padded_embeddings_in_batches(valid_tcr_embeddings_dict, valid_tcr_padded_path, batch_size=5000)
save_padded_embeddings_in_batches(valid_epitope_embeddings_dict, valid_epitope_padded_path, batch_size=5000)

# 17m

len of valid_tcr_keys:  169029
len of valid_epitope_keys:  169029
len of valid_tcr_keys after getting emnedding:  169029
len of valid_epitope_keys after getting embedding:  169029
✅ Saved batch 1/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_0.npz
✅ Saved batch 2/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_1.npz
✅ Saved batch 3/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_2.npz
✅ Saved batch 4/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_3.npz
✅ Saved batch 5/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_4.npz
✅ Saved batch 6/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_5.npz
✅ Saved batch 7/14 to ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_6.npz
✅ Saved batch 8/14 to ../../../data/embeddi

### Create hdf5 files for validation

In [5]:

def get_num_batches(directory, prefix="batch_", suffix=".npz"):
    files = os.listdir(directory)
    batch_files = [f for f in files if f.startswith(prefix) and f.endswith(suffix)]
    return len(batch_files)

# === TCR & Epitope Batches für Train zusammenführen ===
# TCR
num_batches = get_num_batches(valid_tcr_padded_path)
valid_tcr_batches = [os.path.join(valid_tcr_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
print(f"Number of batches in {valid_tcr_padded_path}: {num_batches}")
# Epitope
num_batches = get_num_batches(valid_epitope_padded_path)
valid_epitope_batches = [os.path.join(valid_epitope_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
# train_epitope_batches = ["../../../data/embeddings/beta/allele/dimension_1024/train_epitope_padded_batches/batch_0.npz"]
print(f"Number of batches in {valid_epitope_padded_path}: {num_batches}")

combine_selected_batches_to_hdf5(
    batch_files=valid_tcr_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_valid_tcr_embeddings_final.h5'
)

combine_selected_batches_to_hdf5(
    batch_files=valid_epitope_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_valid_epitope_embeddings_final.h5'
)

# 3.5 m

Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches: 14
Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/valid_epitope_padded_batches: 1
🔄 Batch 1/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_0.npz
🔄 Batch 2/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_1.npz
🔄 Batch 3/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_2.npz
🔄 Batch 4/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_3.npz
🔄 Batch 5/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_4.npz
🔄 Batch 6/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/batch_5.npz
🔄 Batch 7/14 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/valid_tcr_padded_batches/

## Test Dataset

In [6]:
import pandas as pd
import numpy as np

# === Lade den Trainingsdatensatz ===
test_path = '../../../data/splitted_datasets/allele/beta/test.tsv'
test_data = pd.read_csv(test_path, sep='\t', low_memory=False)
print("Length test data:", len(test_data))
print("Test data from: ", test_path)


epitope_embeddings_path = '../../../data/embeddings/beta/allele/Epitope_beta_embeddings.npz'
tcr_embeddings_path = '../../../data/embeddings/beta/allele/TRB_beta_embeddings.npz'

tcr_data = np.load(tcr_embeddings_path, allow_pickle=True)
epitope_data = np.load(epitope_embeddings_path, allow_pickle=True)

# === Extrahiere die Keys aus dem Trainingsdatensatz ===
tcr_keys = test_data['TRB_CDR3'].dropna().tolist()
epitope_keys = test_data['Epitope'].dropna().tolist()

# === Nur Keys behalten, die in den Embeddings existieren ===
'''
wurde erstetzt mit === Schnelle Version mit set() ===
tcr_keys = [key for key in tcr_keys if key in tcr_data]
epitope_keys = [key for key in epitope_keys if key in epitope_data]
'''
# === Schnelle Version mit set() ===
tcr_keys_set = set(tcr_data.files)  # Mache eine schnelle Hash-Map (Set) für Keys
print("length tcr_keys_set: ", len(tcr_keys_set))
epitope_keys_set = set(epitope_data.files)
print("length epitope_keys_set: ", len(epitope_keys_set))

print("len of test_tcr_keys: ", len(tcr_keys))
print("len of test_epitope_keys: ", len(epitope_keys))

tcr_keys = [key for key in tcr_keys if key in tcr_keys_set]
epitope_keys = [key for key in epitope_keys if key in epitope_keys_set]

print("len of test_tcr_keys after getting emnedding: ", len(tcr_keys))
print("len of test_epitope_keys after getting embedding: ", len(epitope_keys))

# === Dictionaries mit den Trainings-Embeddings erstellen ===
tcr_test_dict = {key: tcr_data[key] for key in tcr_keys}
epitope_test_dict = {key: epitope_data[key] for key in epitope_keys}

print(f"Keys in tcr_data.files: {list(tcr_data.files)[:5]}")  # Check a sample of the keys
print(f"First 5 TCR keys from test_data: {tcr_keys[:5]}")  # Check the initial TCR keys
# print(f"Filtered TCR keys: {tcr_keys}")  # Check the final filtered keys
print(f"len TCR keys: {len(tcr_keys)}")  # Check the final filtered keys

# 4m


Length test data: 54126
Test data from:  ../../../data/splitted_datasets/allele/beta/test.tsv
length tcr_keys_set:  211294
length epitope_keys_set:  1896
len of test_tcr_keys:  54126
len of test_epitope_keys:  54126
len of test_tcr_keys after getting emnedding:  54126
len of test_epitope_keys after getting embedding:  54126
Keys in tcr_data.files: ['CASSWRDGATGELFF', 'CASSPYWGEAGYTF', 'CASSILAGSNTEAFF', 'CSARDPGQGAGELFF', 'CASSYWGPQDTQYF']
First 5 TCR keys from test_data: ['CASSPSQGMNTEAFF', 'CASSSTRDSTDTQYF', 'CASSRGWGDNEQFF', 'CASRPWLWREGLNEQFF', 'CASSLSWGDTEAFF']
len TCR keys: 54126


In [7]:
# === Speicherpfade für Trainingsdaten setzen ===
test_tcr_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches'
test_epitope_padded_path = '../../../data/embeddings/beta/allele/dimension_1024/test_epitope_padded_batches'

os.makedirs(test_tcr_padded_path, exist_ok=True)
os.makedirs(test_epitope_padded_path, exist_ok=True)

# === Train-Embeddings padden und speichern ===
save_padded_embeddings_in_batches(tcr_test_dict, test_tcr_padded_path, batch_size=5000)
save_padded_embeddings_in_batches(epitope_test_dict, test_epitope_padded_path, batch_size=5000)

# 2m


✅ Saved batch 1/5 to ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_0.npz
✅ Saved batch 2/5 to ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_1.npz
✅ Saved batch 3/5 to ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_2.npz
✅ Saved batch 4/5 to ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_3.npz
✅ Saved batch 5/5 to ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_4.npz
✅ All batches saved successfully!
✅ Saved batch 1/1 to ../../../data/embeddings/beta/allele/dimension_1024/test_epitope_padded_batches/batch_0.npz
✅ All batches saved successfully!


#### create hdf5 files

In [8]:
def get_num_batches(directory, prefix="batch_", suffix=".npz"):
    files = os.listdir(directory)
    batch_files = [f for f in files if f.startswith(prefix) and f.endswith(suffix)]
    return len(batch_files)

# === TCR & Epitope Batches für Train zusammenführen ===
# TCR
num_batches = get_num_batches(test_tcr_padded_path)
test_tcr_batches = [os.path.join(test_tcr_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
print(f"Number of batches in {test_tcr_padded_path}: {num_batches}")
# Epitope
num_batches = get_num_batches(test_epitope_padded_path)
test_epitope_batches = [os.path.join(test_epitope_padded_path, f"batch_{i}.npz") for i in range(num_batches)]
# train_epitope_batches = ["../../../data/embeddings/beta/allele/dimension_1024/train_epitope_padded_batches/batch_0.npz"]
print(f"Number of batches in {test_epitope_padded_path}: {num_batches}")


# # === TCR & Epitope Batches für Train zusammenführen ===
# test_tcr_batches = [f"../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_{i}.npz" for i in range(3)]
# test_epitope_batches = ["../../data/embeddings/beta/allele/dimension_1024/test_epitope_padded_batches/batch_0.npz"]

combine_selected_batches_to_hdf5(
    batch_files=test_tcr_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_test_tcr_embeddings_final.h5'
)

combine_selected_batches_to_hdf5(
    batch_files=test_epitope_batches,
    output_path='../../../data/embeddings/beta/allele/dimension_1024/padded_test_epitope_embeddings_final.h5'
)

Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches: 5
Number of batches in ../../../data/embeddings/beta/allele/dimension_1024/test_epitope_padded_batches: 1
🔄 Batch 1/5 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_0.npz
🔄 Batch 2/5 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_1.npz
🔄 Batch 3/5 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_2.npz
🔄 Batch 4/5 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_3.npz
🔄 Batch 5/5 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_tcr_padded_batches/batch_4.npz
✅ Finale gepaddete Embeddings gespeichert unter: ../../../data/embeddings/beta/allele/dimension_1024/padded_test_tcr_embeddings_final.h5
🔄 Batch 1/1 verarbeitet: ../../../data/embeddings/beta/allele/dimension_1024/test_epitope_padd