In [1]:
import pandas as pd

CARBAMIDOMETHYL = cm = 57.021464
OXIDATION = ox = 15.994915
MOD_MAPPING = mm = {"C": str(cm), "M": str(ox)}

In [2]:
def get_athena_peptidoform(amanda_row: pd.Series) -> str:
    seq = amanda_row["Annotated Sequence"]
    mods = amanda_row["Modifications"]
    if pd.isna(mods):
        return seq + "[]"
    mods = [str(mod).split("(")[0].strip() for mod in mods.split(";")]
    return f"{seq.upper()}[{"+".join([str(int(mod[1:]) - 1) + ":" + mm[mod[0]] for mod in mods])}]"

In [3]:
ground_truth = pd.read_excel("amanda/PSMs_rep2_deconv.xlsx")
ground_truth["Athena Sequence"] = ground_truth.apply(lambda row: get_athena_peptidoform(row), axis = 1)
athena_candidates = pd.read_csv("results/rep2_candidates.csv", sep = ";")
athena_psms = pd.read_csv("results/rep2_psms.csv", sep = ";")

In [4]:
len(ground_truth["First Scan"].unique().tolist())

14162

In [5]:
ground_truth_index = dict()
for i, row in ground_truth.iterrows():
    scan = int(row["First Scan"])
    psm = str(row["Athena Sequence"])
    if scan in ground_truth_index:
        ground_truth_index[scan].append(psm)
    else:
        ground_truth_index[scan] = [psm]

In [6]:
len(ground_truth_index)

14162

In [7]:
athena_psms[athena_psms["Label"]=="true-target"].shape[0]

11602

In [8]:
nr_psms = 0
nr_not_in_index = 0
nr_fp = 0
nr_tp = 0
for i, row in athena_psms[athena_psms["Label"]=="true-target"].iterrows():
    scan = int(row["ScanNumber"])
    psm = str(row["Peptidoform"])
    if scan in ground_truth_index:
        if psm in ground_truth_index[scan]:
            nr_tp += 1
        else:
            nr_fp += 1
    else:
        nr_not_in_index += 1
    nr_psms += 1

In [9]:
print(f"Total checked psms: {nr_psms}")
print(f"Total scans not in ground truth: {nr_not_in_index}")
print(f"Total FP: {nr_fp}")
print(f"Total TP: {nr_tp}")
print(f"FDR strict: {(nr_not_in_index + nr_fp) / nr_tp}")
print(f"FDR relaxed: {nr_fp / nr_tp}")

Total checked psms: 11602
Total scans not in ground truth: 330
Total FP: 141
Total TP: 11131
FDR strict: 0.04231425747911239
FDR relaxed: 0.012667325487377593
