In [None]:
import pandas as pd
import os

In [None]:
file_paths = {
    "VDJdb_beta": vdjdb_beta_read_path,
    "McPAS_beta": mcpastcr_beta_read_path,
    "IEDB_beta": iedb_beta_read_path,
    "pMTnet_beta": pmtnet_beta_read_path,
    "VDJdb_paired": vdjdb_paired_read_path,
    "McPAS_paired": mcpastcr_paired_read_path,
    "IEDB_paired": iedb_paired_read_path
}

In [None]:
def analyze_file(file_path, file_name):
    try:
        df = pd.read_csv(file_path, sep=None, engine="python")

        required_columns = {"TRB_CDR3", "Epitope"}
        missing_columns = required_columns - set(df.columns)

        if missing_columns:
            print(f"Fehlende Spalten in {file_name}: {missing_columns}")
            return

        distinct_tcrs = df["TRB_CDR3"].nunique()
        unique_tcrs = df[df.duplicated(subset=["TRB_CDR3"], keep=False) == False]["TRB_CDR3"].count()
        distinct_epitopes = df["Epitope"].nunique()
        unique_epitopes = df[df.duplicated(subset=["Epitope"], keep=False) == False]["Epitope"].count()

        print(f"**{file_name}**")
        print(f"  - Distinct TCRs: {distinct_tcrs}")
        print(f"  - Unique TCRs: {unique_tcrs}")
        print(f"  - Distinct Epitopes: {distinct_epitopes}")
        print(f"  - Unique Epitopes: {unique_epitopes}\n")

    except Exception as e:
        print(f"Fehler beim Verarbeiten der Datei {file_name}: {e}")

for file_name, path in file_paths.items():
    if os.path.exists(path):
        analyze_file(path, file_name)
    else:
        print(f"Datei nicht gefunden: {file_name}")