In [3]:
import pandas as pd

#data for mass and intensity provided
data = {
    "m/z": [130.09, 147.11, 183.66, 204.13, 221.10, 239.11, 249.10, 267.11, 291.16, 296.48, 304.88, 322.19,
            362.18, 380.19, 402.20, 419.23, 439.16, 463.23, 481.24, 520.28, 592.27, 610.28, 649.31, 695.37,
            723.37, 748.39, 810.41, 863.41, 924.45, 926.45, 934.46, 962.47, 1023.51, 1076.52, 1138.55, 1163.55,
            1237.60, 1276.64, 1298.62, 1405.68, 1450.74, 1467.70, 1506.72, 1508.72, 1619.80],
    "Intensity": [4977.0, 5816.1, 4660.0, 9940.3, 5604.4, 31797.1, 21872.5, 128489.5, 34083.3, 5283.5, 10479.2,
                  8743.9, 46755.3, 39560.5, 5182.9, 7716.1, 5017.9, 11143.2, 10153.8, 45374.6, 9498.7, 53161.8,
                  42469.9, 7337.5, 48555.8, 29821.2, 10799.5, 38602.8, 7684.1, 21049.9, 39395.7, 11133.6, 19690.2,
                  25272.7, 27930.7, 74982.1, 10679.8, 37540.7, 5738.4, 21377.9, 5497.0, 9944.6, 36270.1, 7378.1, 
                  40099.0]
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Filter out peaks with intensity less than 10,000
filtered_df = df[df["Intensity"] > 10000]

# Amino acids and their mono masses
amino_acids = {
    "G": 57.02, "A": 71.04, "S": 87.03, "P": 97.05, "V": 99.07, "T": 101.05, "L": 113.08, "I": 113.08, "N": 114.04,
    "D": 115.03, "Q": 128.06, "K": 128.09, "E": 129.04, "M": 131.04, "H": 137.06, "F": 147.07, "R": 156.10,
    "C": 160.06, "Y": 163.06, "W": 186.08
}

# Tolerance for m/z differences that was given 
tolerance = 0.015

# Function to find matching amino acids
def find_matching_amino_acids(mass_diff, amino_acids, tolerance):
    matches = []
    for aa, mass in amino_acids.items():
        if abs(mass_diff - mass) <= tolerance:
            matches.append(aa)
    return matches

# Calculate mass differences and find matches
results = []
for i in range(len(filtered_df)):
    current_mz = filtered_df.iloc[i]["m/z"]
    for j in range(1, 5):  # Checking up to four neighboring peaks
        if i + j < len(filtered_df):
            neighbor_mz = filtered_df.iloc[i + j]["m/z"]
            mass_diff = abs(current_mz - neighbor_mz)
            matching_aas = find_matching_amino_acids(mass_diff, amino_acids, tolerance)
            if matching_aas:
                results.append((current_mz, neighbor_mz, mass_diff, matching_aas))

results_df = pd.DataFrame(results, columns=["Peak m/z", "Neighbor m/z", "Mass Difference", "Matching Amino Acids"])

results_df  


Unnamed: 0,Peak m/z,Neighbor m/z,Mass Difference,Matching Amino Acids
0,249.1,362.18,113.08,"[L, I]"
1,267.11,380.19,113.08,"[L, I]"
2,362.18,463.23,101.05,[T]
3,380.19,481.24,101.05,[T]
4,463.23,649.31,186.08,[W]
5,481.24,610.28,129.04,[E]
6,520.28,649.31,129.03,[E]
7,610.28,723.37,113.09,"[L, I]"
8,649.31,748.39,99.08,[V]
9,723.37,810.41,87.04,[S]
