In [23]:

! pip install pyteomics



In [24]:
from pyteomics import mass as pyteo_mass 
from collections import defaultdict

In [25]:
# Function to compute the monoisotopic mass of a peptide
def compute_peptide_mass(sequence):
    return pyteo_mass.calculate_mass(sequence=sequence)  # Use the renamed module

# Function to digest the protein into peptides using a cleavage rule (e.g., trypsin rule)
def digest_protein(protein_sequence, cleavage_rule):
    peptides = []
    start = 0
    for i in range(1, len(protein_sequence)):
        if protein_sequence[i] in cleavage_rule and protein_sequence[i-1] not in 'P':
            peptides.append(protein_sequence[start:i])
            start = i
    peptides.append(protein_sequence[start:])
    return peptides

# Function to map peptides to their masses
def map_peptides_to_masses(peptides):
    mass_to_peptides = defaultdict(list)
    for peptide in peptides:
        peptide_mass = compute_peptide_mass(peptide)
        mass_to_peptides[peptide_mass].append(peptide)
    return mass_to_peptides

# Function to find isobaric peptides (peptides with the same mass but different sequences)
def find_isobaric_peptides(mass_to_peptides):
    isobaric_peptides = {}
    for mass, peptides in mass_to_peptides.items():
        if len(peptides) > 1:  # If there are multiple peptides with the same mass
            unique_peptides = set(peptides)  # Remove duplicates
            if len(unique_peptides) > 1:  # If there are peptides with different sequences but the same mass
                isobaric_peptides[mass] = list(unique_peptides)
    return isobaric_peptides
# Function to read the protein sequence from a FASTA file
def read_protein_from_fasta(fasta_file):
    with open(fasta_file, 'r') as file:
        lines = file.readlines()
        protein_sequence = ''.join(lines[1:]).replace('\n', '')  # Combine lines after the first one (ignoring the header)
    return protein_sequence


In [30]:
# Main part of the code
if __name__ == "__main__":
    fasta_file = fasta_file = "C:/Users/REEM/Desktop/DIA.txt"  # Path to the FASTA file
 # Path to the FASTA file
    protein_sequence = read_protein_from_fasta(fasta_file)
    print(f"Loaded Protein Sequence:\n{protein_sequence}\n")

    # Define the cleavage rule for trypsin (cut after K and R, unless followed by P)
    trypsin_rule = ['K', 'R']

    # Digest the protein sequence into peptides
    digested_peptides = digest_protein(protein_sequence, trypsin_rule)

    # Map peptides to their masses
    peptide_mass_map = map_peptides_to_masses(digested_peptides)
    print("Peptide Masses and Sequences:")
    for mass, peptides in peptide_mass_map.items():
        print(f"Mass: {mass:.4f} Da => Peptides: {', '.join(peptides)}")

    # Find isobaric peptides (same mass, different sequences)
    isobaric_peptides = find_isobaric_peptides(peptide_mass_map)
    if isobaric_peptides:
        print("\nIsobaric Peptides Found:")
        for mass, seqs in isobaric_peptides.items():
            print(f"Mass: {mass:.4f} Da => Sequences: {', '.join(seqs)}")
    else:
        print("\nNo isobaric peptides detected.")

Loaded Protein Sequence:
MTLNGGGSGAGGSRGGGQERERRRGSTPWGPAPPLHRRSMPVDERDLQAALTPGALTAAAAGTGTQGPRLDWPEDSEDSLSSGGSDSDESVYKVLLLGAPGVGKSALARIFGGVEDGPEAEAAGHTYDRSIVVDGEEASLMVYDIWEQDGGRWLPGHCMAMGDAYVIVYSVTDKGSFEKASELRVQLRRARQTDDVPIILVGNKSDLVRSREVSVDEGRACAVVFDCKFIETSAALHHNVQALFEGVVRQIRLRRDSKEANARRQAGTRRRESLGKKAKRFLGRIVARNSRKMAFRAKSKSCHDLSVL

Peptide Masses and Sequences:
Mass: 1064.4557 Da => Peptides: MTLNGGGSGAGGS
Mass: 602.2772 Da => Peptides: RGGGQE
Mass: 303.1543 Da => Peptides: RE
Mass: 174.1117 Da => Peptides: R, R, R, R, R, R, R, R, R
Mass: 1371.7048 Da => Peptides: RGSTPWGPAPPLH
Mass: 832.3749 Da => Peptides: RSMPVDE
Mass: 4821.2125 Da => Peptides: RDLQAALTPGALTAAAAGTGTQGPRLDWPEDSEDSLSSGGSDSDESVY
Mass: 1022.6488 Da => Peptides: KVLLLGAPGVG
Mass: 488.2958 Da => Peptides: KSALA
Mass: 2089.9341 Da => Peptides: RIFGGVEDGPEAEAAGHTYD
Mass: 2567.1850 Da => Peptides: RSIVVDGEEASLMVYDIWEQDGG
Mass: 2483.1436 Da => Peptides: RWLPGHCMAMGDAYVIVYSVTD
Mass: 566.2700 Da => Peptides: KGSFE
Mass: 546.30