In [1]:
! pip install pyteomics

Defaulting to user installation because normal site-packages is not writeable


In [13]:
from pyteomics import mass as pyteo_mass 
from collections import defaultdict


In [15]:

# Function to compute the monoisotopic mass of a peptide
def compute_peptide_mass(sequence):
    return pyteo_mass.calculate_mass(sequence=sequence)  # Use the renamed module

# Function to digest the protein into peptides using a cleavage rule (e.g., trypsin rule)
def digest_protein(protein_sequence, cleavage_rule):
    peptides = []
    start = 0
    for i in range(1, len(protein_sequence)):
        if protein_sequence[i] in cleavage_rule and protein_sequence[i-1] not in 'P':
            peptides.append(protein_sequence[start:i])
            start = i
    peptides.append(protein_sequence[start:])
    return peptides

# Function to map peptides to their masses
def map_peptides_to_masses(peptides):
    mass_to_peptides = defaultdict(list)
    for peptide in peptides:
        peptide_mass = compute_peptide_mass(peptide)
        mass_to_peptides[peptide_mass].append(peptide)
    return mass_to_peptides

# Function to find isobaric peptides (peptides with the same mass but different sequences)
def find_isobaric_peptides(mass_to_peptides):
    isobaric_peptides = {}
    for mass, peptides in mass_to_peptides.items():
        if len(peptides) > 1:  # If there are multiple peptides with the same mass
            unique_peptides = set(peptides)  # Remove duplicates
            if len(unique_peptides) > 1:  # If there are peptides with different sequences but the same mass
                isobaric_peptides[mass] = list(unique_peptides)
    return isobaric_peptides

# Function to read the protein sequence from a FASTA file
def read_protein_from_fasta(fasta_file):
    with open(fasta_file, 'r') as file:
        lines = file.readlines()
        protein_sequence = ''.join(lines[1:]).replace('\n', '')  # Combine lines after the first one (ignoring the header)
    return protein_sequence



In [16]:
# Main part of the code
if __name__ == "__main__":
    fasta_file = "Hemoglobin.fasta"  # Path to the FASTA file
    protein_sequence = read_protein_from_fasta(fasta_file)
    print(f"Loaded Protein Sequence:\n{protein_sequence}\n")

    # Define the cleavage rule for trypsin (cut after K and R, unless followed by P)
    trypsin_rule = ['K', 'R']

    # Digest the protein sequence into peptides
    digested_peptides = digest_protein(protein_sequence, trypsin_rule)

    # Map peptides to their masses
    peptide_mass_map = map_peptides_to_masses(digested_peptides)
    print("Peptide Masses and Sequences:")
    for mass, peptides in peptide_mass_map.items():
        print(f"Mass: {mass:.4f} Da => Peptides: {', '.join(peptides)}")

    # Find isobaric peptides (same mass, different sequences)
    isobaric_peptides = find_isobaric_peptides(peptide_mass_map)
    if isobaric_peptides:
        print("\nIsobaric Peptides Found:")
        for mass, seqs in isobaric_peptides.items():
            print(f"Mass: {mass:.4f} Da => Sequences: {', '.join(seqs)}")
    else:
        print("\nNo isobaric peptides detected.")


Loaded Protein Sequence:
MDAIQDKIEEIESDFLGNLKSSTLQEQLEAFTLGIPTEKMSIPPEVKFNKPFVFNVTTDMTKTFTAILLQKRSGFQELLSEYLKENGITGMAMKYIHIPSEKGTEAAFVRQFNEKEDGRVSAHVLHSFRRHFNDLRMQTLGIILFPADEVAHTVLVANLPKFEKTERLYAQKIVATSAILRVYFIYMIYFKGNWYSP

Peptide Masses and Sequences:
Mass: 691.2847 Da => Peptides: MDAIQD
Mass: 1505.7613 Da => Peptides: KIEEIESDFLGNL
Mass: 2091.0736 Da => Peptides: KSSTLQEQLEAFTLGIPTE
Mass: 899.4786 Da => Peptides: KMSIPPEV
Mass: 407.2169 Da => Peptides: KFN
Mass: 1398.6853 Da => Peptides: KPFVFNVTTDMT
Mass: 1033.6172 Da => Peptides: KTFTAILLQ
Mass: 146.1055 Da => Peptides: K
Mass: 1440.7249 Da => Peptides: RSGFQELLSEYL
Mass: 1050.4838 Da => Peptides: KENGITGMAM
Mass: 985.5233 Da => Peptides: KYIHIPSE
Mass: 821.4283 Da => Peptides: KGTEAAFV
Mass: 692.3242 Da => Peptides: RQFNE
Mass: 447.1965 Da => Peptides: KEDG
Mass: 1151.6200 Da => Peptides: RVSAHVLHSF
Mass: 174.1117 Da => Peptides: R
Mass: 800.3929 Da => Peptides: RHFNDL
Mass: 3121.6998 Da => Peptides: RMQTLGIILFPADEVAHTVLVANLPKFE