In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import structuremap.utils
structuremap.utils.set_logger()
from structuremap.processing import download_alphafold_cif, download_alphafold_pae, format_alphafold_data, annotate_accessibility, get_smooth_score, annotate_proteins_with_idr_pattern, get_extended_flexible_pattern, get_proximity_pvals, perform_enrichment_analysis, perform_enrichment_analysis_per_protein, evaluate_ptm_colocalization, extract_motifs_in_proteome
from structuremap.plotting import plot_enrichment, plot_ptm_colocalization
from HelperFunctions import get_protein_seq
from HelperFunctions import find_the_index_of_the_modification
import re

In [2]:
raw_data = pd.read_excel("RvsS_DataSet.xlsx", sheet_name='Sheet2')


In [3]:
raw_data

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,...,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,Unnamed: 24,Protein ID.1
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,...,sp|Q8C196|CPSM_MOUSE,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",4.058191e-03,2.391668,3.289988,,Q8C196
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,...,sp|Q07417|ACADS_MOUSE,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,,Q07417
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,...,sp|Q91YI0|ARLY_MOUSE,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,4.283587e-02,1.368192,2.555706,,Q91YI0
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,...,sp|P50247|SAHH_MOUSE,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,,P50247
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,...,sp|P33267|CP2F2_MOUSE,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,2.032232e-04,3.692027,2.389057,,P33267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,AHMVTLDYTVQVPGTGR,AHMVTLDYTVQVPGTGR,AHM[649.3660]VTLDYTVQVPGTGR,AHM[655.3735]VTLDYTVQVPGTGR,-1.822067,-1.687501,-1.706146,-1.647231,-1.793966,-1.407199,...,sp|Q9QXF8|GNMT_MOUSE,Q9QXF8,GNMT_MOUSE,Gnmt,Glycine N-methyltransferase,1.880219e-08,7.725792,-1.857135,,Q9QXF8
199,KEQESEVDMK,KEQESEVDMK,KEQESEVDM[649.3660]K,KEQESEVDM[655.3735]K,-1.759694,,-1.821773,-1.960428,-1.781592,-1.988603,...,sp|Q8K3J1|NDUS8_MOUSE,Q8K3J1,NDUS8_MOUSE,Ndufs8,NADH dehydrogenase [ubiquinone] iron-sulfur pr...,7.034064e-14,13.152794,-1.871741,,Q8K3J1
200,RGVMLAVDAVIAELK,RGVMLAVDAVIAELK,RGVM[649.3660]LAVDAVIAELK,RGVM[655.3735]LAVDAVIAELK,-1.812349,-1.980371,-2.307386,-2.492858,-1.405837,-2.094407,...,sp|P63038|CH60_MOUSE,P63038,CH60_MOUSE,Hspd1,"60 kDa heat shock protein, mitochondrial",7.289026e-11,10.137330,-1.950460,,P63038
201,MQLLEIITTDK,MQLLEIITTDK,M[649.3660]QLLEIITTDK,M[655.3735]QLLEIITTDK,-2.286954,-2.700322,-1.941018,-1.860306,-2.069488,,...,sp|Q8BMS1|ECHA_MOUSE,Q8BMS1,ECHA_MOUSE,Hadha,"Trifunctional enzyme subunit alpha, mitochondrial",5.582467e-09,8.253174,-2.270072,,Q8BMS1


# Obtaining Complete Protein Sequences
In the cells below we add a new column to the raw_data data frame, 'Protein Sequence', that contains that complete protein sequence of the each peptide sequence.

In [4]:
raw_data_with_complete_protein_seq = raw_data.copy()
raw_data_with_complete_protein_seq['Protein Sequence'] = raw_data_with_complete_protein_seq['Protein ID'].apply(get_protein_seq)

In [5]:
raw_data_with_complete_protein_seq

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,...,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,Unnamed: 24,Protein ID.1,Protein Sequence
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,...,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",4.058191e-03,2.391668,3.289988,,Q8C196,MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTA...
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,...,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,,Q07417,MAAALLARARGPLRRALGVRDWRRLHTVYQSVELPETHQMLRQTCR...
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,...,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,4.283587e-02,1.368192,2.555706,,Q91YI0,MASESGKLWGGRFVGAVDPIMEKFNSSISYDRHLWNVDVQGSKAYS...
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,...,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,,P50247,MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMREMYSASKPLK...
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,...,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,2.032232e-04,3.692027,2.389057,,P33267,MDGVSTAILLLLLAVISLSLTFSSRGKGQLPPGPKPLPILGNLLQL...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,AHMVTLDYTVQVPGTGR,AHMVTLDYTVQVPGTGR,AHM[649.3660]VTLDYTVQVPGTGR,AHM[655.3735]VTLDYTVQVPGTGR,-1.822067,-1.687501,-1.706146,-1.647231,-1.793966,-1.407199,...,Q9QXF8,GNMT_MOUSE,Gnmt,Glycine N-methyltransferase,1.880219e-08,7.725792,-1.857135,,Q9QXF8,MVDSVYRTRSLGVAAEGLPDQYADGEAARVWQLYIGDTRSRTAEYK...
199,KEQESEVDMK,KEQESEVDMK,KEQESEVDM[649.3660]K,KEQESEVDM[655.3735]K,-1.759694,,-1.821773,-1.960428,-1.781592,-1.988603,...,Q8K3J1,NDUS8_MOUSE,Ndufs8,NADH dehydrogenase [ubiquinone] iron-sulfur pr...,7.034064e-14,13.152794,-1.871741,,Q8K3J1,MYRLSSSMLPRALAQAMRTGHLNGQSLHSSAVAATYKYVNKKEQES...
200,RGVMLAVDAVIAELK,RGVMLAVDAVIAELK,RGVM[649.3660]LAVDAVIAELK,RGVM[655.3735]LAVDAVIAELK,-1.812349,-1.980371,-2.307386,-2.492858,-1.405837,-2.094407,...,P63038,CH60_MOUSE,Hspd1,"60 kDa heat shock protein, mitochondrial",7.289026e-11,10.137330,-1.950460,,P63038,MLRLPTVLRQMRPVSRALAPHLTRAYAKDVKFGADARALMLQGVDL...
201,MQLLEIITTDK,MQLLEIITTDK,M[649.3660]QLLEIITTDK,M[655.3735]QLLEIITTDK,-2.286954,-2.700322,-1.941018,-1.860306,-2.069488,,...,Q8BMS1,ECHA_MOUSE,Hadha,"Trifunctional enzyme subunit alpha, mitochondrial",5.582467e-09,8.253174,-2.270072,,Q8BMS1,MVASRAIGSLSRFSAFRILRSRGCICRSFTTSSALLTRTHINYGVK...


In [47]:
raw_data_with_complete_protein_seq.to_csv('with_complete_protein_seq.csv')

# Counting how many modifications appear in each peptide

In [14]:
def count_number_of_modifications(modified_peptide_seq, amino_acid):
    # Extract the modification and its index in the modified peptide sequence
    modifications = re.findall(fr'{amino_acid}\[.*?\]', modified_peptide_seq)
    if (not modifications):
        return -1
    number_of_modifications = len(modifications)

    return number_of_modifications
        

In [15]:
with_modification_counts = raw_data_with_complete_protein_seq.copy()
with_modification_counts['Number of Light Modifications'] = with_modification_counts.apply(lambda x: count_number_of_modifications(x['Light Modified Peptide'], 'M'), axis=1)
with_modification_counts['Number of Heavy Modifications'] = with_modification_counts.apply(lambda x: count_number_of_modifications(x['Heavy Modified Peptide'], 'M'), axis=1)

In [16]:
with_modification_counts

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,...,Gene,Protein Description,p-value,neglogp,Log2HL avg,Unnamed: 24,Protein ID.1,Protein Sequence,Number of Light Modifications,Number of Heavy Modifications
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,...,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",4.058191e-03,2.391668,3.289988,,Q8C196,MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTA...,1,1
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,...,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,,Q07417,MAAALLARARGPLRRALGVRDWRRLHTVYQSVELPETHQMLRQTCR...,1,1
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,...,Asl,Argininosuccinate lyase,4.283587e-02,1.368192,2.555706,,Q91YI0,MASESGKLWGGRFVGAVDPIMEKFNSSISYDRHLWNVDVQGSKAYS...,1,1
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,...,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,,P50247,MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMREMYSASKPLK...,1,1
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,...,Cyp2f2,Cytochrome P450 2F2,2.032232e-04,3.692027,2.389057,,P33267,MDGVSTAILLLLLAVISLSLTFSSRGKGQLPPGPKPLPILGNLLQL...,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,AHMVTLDYTVQVPGTGR,AHMVTLDYTVQVPGTGR,AHM[649.3660]VTLDYTVQVPGTGR,AHM[655.3735]VTLDYTVQVPGTGR,-1.822067,-1.687501,-1.706146,-1.647231,-1.793966,-1.407199,...,Gnmt,Glycine N-methyltransferase,1.880219e-08,7.725792,-1.857135,,Q9QXF8,MVDSVYRTRSLGVAAEGLPDQYADGEAARVWQLYIGDTRSRTAEYK...,1,1
199,KEQESEVDMK,KEQESEVDMK,KEQESEVDM[649.3660]K,KEQESEVDM[655.3735]K,-1.759694,,-1.821773,-1.960428,-1.781592,-1.988603,...,Ndufs8,NADH dehydrogenase [ubiquinone] iron-sulfur pr...,7.034064e-14,13.152794,-1.871741,,Q8K3J1,MYRLSSSMLPRALAQAMRTGHLNGQSLHSSAVAATYKYVNKKEQES...,1,1
200,RGVMLAVDAVIAELK,RGVMLAVDAVIAELK,RGVM[649.3660]LAVDAVIAELK,RGVM[655.3735]LAVDAVIAELK,-1.812349,-1.980371,-2.307386,-2.492858,-1.405837,-2.094407,...,Hspd1,"60 kDa heat shock protein, mitochondrial",7.289026e-11,10.137330,-1.950460,,P63038,MLRLPTVLRQMRPVSRALAPHLTRAYAKDVKFGADARALMLQGVDL...,1,1
201,MQLLEIITTDK,MQLLEIITTDK,M[649.3660]QLLEIITTDK,M[655.3735]QLLEIITTDK,-2.286954,-2.700322,-1.941018,-1.860306,-2.069488,,...,Hadha,"Trifunctional enzyme subunit alpha, mitochondrial",5.582467e-09,8.253174,-2.270072,,Q8BMS1,MVASRAIGSLSRFSAFRILRSRGCICRSFTTSSALLTRTHINYGVK...,1,1


In [17]:
with_modification_counts['Number of Heavy Modifications'].value_counts()


Number of Heavy Modifications
1    188
2     15
Name: count, dtype: int64

In [18]:
with_modification_counts['Number of Light Modifications'].value_counts()


Number of Light Modifications
1    188
2     15
Name: count, dtype: int64

In [51]:
with_modification_counts.iloc[0]['Protein Sequence']

'MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTAHIVLEDGTKMKGYSFGHPSSVAGEVVFNTGLGGYPEALTDPAYKGQILTMANPIIGNGGAPDTTARDELGLNKYMESDGIKVAGLLVLNYSNDYNHWLATKSLGQWLQEEKVPAIYGVDTRMLTKIIRDKGTMLGKIEFEGQSVDFVDPNKQNLIAEVSTKDVKVFGKGNPTKVVAVDCGIKNNVIRLLVKRGAEVHLVPWNHDFTQMEYDGLLIAGGPGNPALAQPLIQNVKKILESDRKEPLFGISTGNIITGLAAGAKSYKMSMANRGQNQPVLNITNRQAFITAQNHGYALDNTLPAGWKPLFVNVNDQTNEGIMHESKPFFAVQFHPEVSPGPTDTEYLFDSFFSLIKKGKGTTITSVLPKPALVASRVEVSKVLILGSGGLSIGQAGEFDYSGSQAVKAMKEENVKTVLMNPNIASVQTNEVGLKQADAVYFLPITPQFVTEVIKAERPDGLILGMGGQTALNCGVELFKRGVLKEYGVKVLGTSVESIMATEDRQLFSDKLNEINEKIAPSFAVESMEDALKAADTIGYPVMIRSAYALGGLGSGICPNKETLIDLGTKAFAMTNQILVERSVTGWKEIEYEVVRDADDNCVTVCNMENVDAMGVHTGDSVVVAPAQTLSNAEFQMLRRTSVNVVRHLGIVGECNIQFALHPTSMEYCIIEVNARLSRSSALASKATGYPLAFIAAKIALGIPLPEIKNVVSGKTSACFEPSLDYMVTKIPRWDLDRFHGTSSRIGSSMKSVGEVMAIGRTFEESFQKALRMCHPSVDGFTPRLPMNKEWPANLDLKKELSEPSSTRIYAIAKALENNMSLDEIVRLTSIDKWFLYKMRDILNMDKTLKGLNSDSVTEETLRKAKEIGFSDKQISKCLGLTEAQTRELRLKKNIHPWVKQIDTLAAEYPSVTNYLYVTYNGQEHDIKFDEHGIMVLGCGPYHIGSSVEFDWCAVSS

# Finding the index at which which modifications occur

In [52]:
def find_modifications(peptide, modification, aa, protein_sequence):
    index = 0
    amino_acids = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
    found_match = False
    for i, char in enumerate(peptide):
        if char == '[':
            if peptide[i-1] == aa and modification == peptide[i+1: i + len(modification) + 1]:
                found_match = True
                break           
        if char in amino_acids:
            index += 1
    if found_match == False:
        return None

    
    cleaned_peptide = re.sub(r'\[\d+(\.\d+)?\]', '', peptide)
    positions = [] 
    peptide_length = len(cleaned_peptide)
    for i in range(len(protein_sequence) - peptide_length + 1):
        if protein_sequence[i:i + peptide_length] == cleaned_peptide:
            positions.append(i)
    if len(positions) == 0:
        return None
    elif len(positions) > 1:
        return -1
    else:
        return positions[0] + index - 1


# Example usage:
peptide = "AADTIGYPVM[649.3660]IR"
protein = 'MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTAHIVLEDGTKMKGYSFGHPSSVAGEVVFNTGLGGYPEALTDPAYKGQILTMANPIIGNGGAPDTTARDELGLNKYMESDGIKVAGLLVLNYSNDYNHWLATKSLGQWLQEEKVPAIYGVDTRMLTKIIRDKGTMLGKIEFEGQSVDFVDPNKQNLIAEVSTKDVKVFGKGNPTKVVAVDCGIKNNVIRLLVKRGAEVHLVPWNHDFTQMEYDGLLIAGGPGNPALAQPLIQNVKKILESDRKEPLFGISTGNIITGLAAGAKSYKMSMANRGQNQPVLNITNRQAFITAQNHGYALDNTLPAGWKPLFVNVNDQTNEGIMHESKPFFAVQFHPEVSPGPTDTEYLFDSFFSLIKKGKGTTITSVLPKPALVASRVEVSKVLILGSGGLSIGQAGEFDYSGSQAVKAMKEENVKTVLMNPNIASVQTNEVGLKQADAVYFLPITPQFVTEVIKAERPDGLILGMGGQTALNCGVELFKRGVLKEYGVKVLGTSVESIMATEDRQLFSDKLNEINEKIAPSFAVESMEDALKAADTIGYPVMIRSAYALGGLGSGICPNKETLIDLGTKAFAMTNQILVERSVTGWKEIEYEVVRDADDNCVTVCNMENVDAMGVHTGDSVVVAPAQTLSNAEFQMLRRTSVNVVRHLGIVGECNIQFALHPTSMEYCIIEVNARLSRSSALASKATGYPLAFIAAKIALGIPLPEIKNVVSGKTSACFEPSLDYMVTKIPRWDLDRFHGTSSRIGSSMKSVGEVMAIGRTFEESFQKALRMCHPSVDGFTPRLPMNKEWPANLDLKKELSEPSSTRIYAIAKALENNMSLDEIVRLTSIDKWFLYKMRDILNMDKTLKGLNSDSVTEETLRKAKEIGFSDKQISKCLGLTEAQTRELRLKKNIHPWVKQIDTLAAEYPSVTNYLYVTYNGQEHDIKFDEHGIMVLGCGPYHIGSSVEFDWCAVSSIRTLRQLGKKTVVVNCNPETVSTDFDECDKLYFEELSLERILDIYHQEACNGCIISVGGQIPNNLAVPLYKNGVKIMGTSPLQIDRAEDRSIFSAVLDELKVAQAPWKAVNTLNEALEFANSVGYPCLLRPSYVLSGSAMNVVFSEDEMKRFLEEATRVSQEHPVVLTKFVEGAREVEMDAVGKEGRVISHAISEHVEDAGVHSGDATLMLPTQTISQGAIEKVKDATRKIAKAFAISGPFNVQFLVKGNDVLVIECNLRASRSFPFVSKTLGVDFIDVATKVMIGESIDEKRLPTLEQPIIPSDYVAIKAPMFSWPRLRDADPILRCEMASTGEVACFGEGIHTAFLKAMLSTGFKIPQKGILIGIQQSFRPRFLGVAEQLHNEGFKLFATEATSDWLNANNVPATPVAWPSQEGQNPSLSSIRKLIRDGSIDLVINLPNNNTKFVHDNYVIRRTAVDSGIALLTNFQVTKLFAEAVQKSRTVDSKSLFHYRQYSAGKAA'

find_modifications(peptide, '649.3660', 'M', protein)

584

In [54]:
protein[574 : 585]

'KAADTIGYPVM'

In [None]:
with_modification_

In [45]:

peptide_sequence = "LM[649.3660]TQM[15.9949]NSR"

# Remove bracketed modifications and their corresponding numbers
cleaned_peptide = re.sub(r'\[\d+(\.\d+)?\]', '', peptide_sequence)

print("Peptide sequence without modifications:", cleaned_peptide)

Peptide sequence without modifications: LMTQMNSR


In [46]:
def find_subset_position(protein_sequence, subset):
    positions = []
    subset_length = len(subset)
    for i in range(len(protein_sequence) - subset_length + 1):
        if protein_sequence[i:i + subset_length] == subset:
            positions.append(i)
    return positions

# Example usage:
protein_sequence = "MAAGCTGAAAAGTGCTAGCT"
subset = "AGCT"
positions = find_subset_position(protein_sequence, subset)
print("Subset positions:", positions)

Subset positions: [2, 16]


In [31]:
peptides_of_interest = raw_data.iloc[list(range(0,39)) + list(range(182,203))]
peptides_of_interest

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,...,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,Unnamed: 24,Protein ID.1
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,...,sp|Q8C196|CPSM_MOUSE,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",0.004058191,2.391668,3.289988,,Q8C196
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,...,sp|Q07417|ACADS_MOUSE,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,,Q07417
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,...,sp|Q91YI0|ARLY_MOUSE,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,0.04283587,1.368192,2.555706,,Q91YI0
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,...,sp|P50247|SAHH_MOUSE,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,,P50247
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,...,sp|P33267|CP2F2_MOUSE,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,0.0002032232,3.692027,2.389057,,P33267
5,AREEMAK,AREEMAK,AREEM[649.3660]AK,AREEM[655.3735]AK,,,,2.537996,,,...,sp|P97872|FMO5_MOUSE,P97872,FMO5_MOUSE,Fmo5,Flavin-containing monooxygenase 5,6.484628e-05,4.188115,2.073924,,P97872
6,ISDHLEAAMEMIPVLK,ISDHLEAAMEMIPVLK,ISDHLEAAM[649.3660]EMIPVLK,ISDHLEAAM[655.3735]EMIPVLK,1.710849,2.320842,1.775574,1.099943,1.715956,1.722794,...,sp|Q9DBT9|M2GD_MOUSE,Q9DBT9,M2GD_MOUSE,Dmgdh,"Dimethylglycine dehydrogenase, mitochondrial",1.742137e-09,8.758918,1.920887,,Q9DBT9
7,RVFVVGVGMTKFMKPGGENSR,RVFVVGVGMTKFM[15.9949]KPGGENSR,RVFVVGVGM[649.3660]TKFM[15.9949]KPGGENSR,RVFVVGVGM[655.3735]TKFM[15.9949]KPGGENSR,,,,,,,...,sp|P32020|SCP2_MOUSE,P32020,SCP2_MOUSE,Scp2,Sterol carrier protein 2,0.01019022,1.991816,1.818755,,P32020
8,MNPQSAFFQGK,MNPQSAFFQGK,M[649.3660]NPQSAFFQGK,M[655.3735]NPQSAFFQGK,1.62896,1.601815,1.487507,1.513568,1.541146,1.413658,...,sp|P32020|SCP2_MOUSE,P32020,SCP2_MOUSE,Scp2,Sterol carrier protein 2,1.530212e-17,16.815248,1.527764,,P32020
9,KVMEAFEQAERK,KVMEAFEQAERK,KVM[649.3660]EAFEQAERK,KVM[655.3735]EAFEQAERK,1.582578,,1.422883,1.338301,1.239502,1.412158,...,sp|P50136|ODBA_MOUSE,P50136,ODBA_MOUSE,Bckdha,"2-oxoisovalerate dehydrogenase subunit alpha, ...",1.355653e-08,7.867852,1.470137,,P50136


In [32]:
peptides_of_interest = peptides_of_interest.reset_index(drop=True)
peptides_of_interest

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,...,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,Unnamed: 24,Protein ID.1
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,...,sp|Q8C196|CPSM_MOUSE,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",0.004058191,2.391668,3.289988,,Q8C196
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,...,sp|Q07417|ACADS_MOUSE,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,,Q07417
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,...,sp|Q91YI0|ARLY_MOUSE,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,0.04283587,1.368192,2.555706,,Q91YI0
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,...,sp|P50247|SAHH_MOUSE,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,,P50247
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,...,sp|P33267|CP2F2_MOUSE,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,0.0002032232,3.692027,2.389057,,P33267
5,AREEMAK,AREEMAK,AREEM[649.3660]AK,AREEM[655.3735]AK,,,,2.537996,,,...,sp|P97872|FMO5_MOUSE,P97872,FMO5_MOUSE,Fmo5,Flavin-containing monooxygenase 5,6.484628e-05,4.188115,2.073924,,P97872
6,ISDHLEAAMEMIPVLK,ISDHLEAAMEMIPVLK,ISDHLEAAM[649.3660]EMIPVLK,ISDHLEAAM[655.3735]EMIPVLK,1.710849,2.320842,1.775574,1.099943,1.715956,1.722794,...,sp|Q9DBT9|M2GD_MOUSE,Q9DBT9,M2GD_MOUSE,Dmgdh,"Dimethylglycine dehydrogenase, mitochondrial",1.742137e-09,8.758918,1.920887,,Q9DBT9
7,RVFVVGVGMTKFMKPGGENSR,RVFVVGVGMTKFM[15.9949]KPGGENSR,RVFVVGVGM[649.3660]TKFM[15.9949]KPGGENSR,RVFVVGVGM[655.3735]TKFM[15.9949]KPGGENSR,,,,,,,...,sp|P32020|SCP2_MOUSE,P32020,SCP2_MOUSE,Scp2,Sterol carrier protein 2,0.01019022,1.991816,1.818755,,P32020
8,MNPQSAFFQGK,MNPQSAFFQGK,M[649.3660]NPQSAFFQGK,M[655.3735]NPQSAFFQGK,1.62896,1.601815,1.487507,1.513568,1.541146,1.413658,...,sp|P32020|SCP2_MOUSE,P32020,SCP2_MOUSE,Scp2,Sterol carrier protein 2,1.530212e-17,16.815248,1.527764,,P32020
9,KVMEAFEQAERK,KVMEAFEQAERK,KVM[649.3660]EAFEQAERK,KVM[655.3735]EAFEQAERK,1.582578,,1.422883,1.338301,1.239502,1.412158,...,sp|P50136|ODBA_MOUSE,P50136,ODBA_MOUSE,Bckdha,"2-oxoisovalerate dehydrogenase subunit alpha, ...",1.355653e-08,7.867852,1.470137,,P50136


In [None]:
raw_data[['Protein ID']]