In [12]:
import Bio.PDB
import pandas as pd
import os
import re
import math
import requests as r
from io import StringIO
from Bio import SeqIO




In [4]:
def get_phi_and_psi(Protein_ID, CIF_file_path):
    df = pd.DataFrame()
    for model in Bio.PDB.MMCIFParser().get_structure(Protein_ID, CIF_file_path):
        for chain in model :
            polypeptides = Bio.PDB.PPBuilder().build_peptides(chain)
            res_index_list = []
            res_name_list = []
            phi_list = []
            psi_list = []
            for poly_index, poly in enumerate(polypeptides) :
                print("Model %s Chain %s" % (str(model.id), str(chain.id)))
                print ("(part %i of %i)" % (poly_index+1, len(polypeptides)))
                print ("length %i" % (len(poly)))
                print ("from %s%i" % (poly[0].resname, poly[0].id[1]))
                print ("to %s%i" % (poly[-1].resname, poly[-1].id[1]))
                phi_psi = poly.get_phi_psi_list()
                res_index_sublist = []
                res_name_sublist = []
                phi_sublist = []
                psi_sublist = []
                for res_index, residue in enumerate(poly) :
                    res_name = "%s%i" % (residue.resname, residue.id[1])
                    # print(res_name, phi_psi[res_index])
                    res_index_sublist.append(residue.id[1])
                    res_name_sublist.append(residue.resname)
                    phi_sublist.append(phi_psi[res_index][0])
                    psi_sublist.append(phi_psi[res_index][1])
                res_index_list.extend(res_index_sublist)
                res_name_list.extend(res_name_sublist)
                phi_list.extend(phi_sublist)
                psi_list.extend(psi_sublist)
            df['Protein ID'] = [Protein_ID] * len(res_index_list)
            df['Residue Name'] = res_name_list
            df['Residue Position'] = res_index_list
            df['PHI'] = phi_list
            df['PSI'] = psi_list
    return df
    


In [5]:
CIF_file_path = '../alphafold_data/cif/A0MZ66.cif'
Protein_ID = 'A0MZ66'

In [6]:
example_df = get_phi_and_psi(Protein_ID, CIF_file_path)

Model 0 Chain A
(part 1 of 1)
length 631
from MET1
to CYS631


In [7]:
example_df

Unnamed: 0,Protein ID,Residue Name,Residue Position,PHI,PSI
0,A0MZ66,MET,1,,-0.419242
1,A0MZ66,ASN,2,-1.256368,-0.347814
2,A0MZ66,SER,3,-1.130119,-0.431790
3,A0MZ66,SER,4,-1.393241,-0.556930
4,A0MZ66,ASP,5,-1.399088,-0.615379
...,...,...,...,...,...
626,A0MZ66,ASP,627,-2.789701,1.913404
627,A0MZ66,SER,628,0.573859,2.097280
628,A0MZ66,SER,629,1.593444,2.238103
629,A0MZ66,ASN,630,1.072666,1.765689


In [8]:
directory = '../alphafold_data/cif'

# Initialize an empty list to store file paths
file_paths = []

# Walk through the directory and its subdirectories
for root, directories, files in os.walk(directory):
    # Iterate over each file in the current directory
    for file in files:
        # Construct the full path of the file
        file_path = os.path.join(root, file)
        # Append the file path to the list
        file_paths.append(file_path)

# Now, file_paths contains the paths of all files in the directory and its subdirectories
print(file_paths)

['../alphafold_data/cif/O14617.cif', '../alphafold_data/cif/Q9D404.cif', '../alphafold_data/cif/P19525.cif', '../alphafold_data/cif/P62829.cif', '../alphafold_data/cif/Q96PK6.cif', '../alphafold_data/cif/Q9Z0X1.cif', '../alphafold_data/cif/O60814.cif', '../alphafold_data/cif/Q99MB2.cif', '../alphafold_data/cif/Q6PDF3.cif', '../alphafold_data/cif/P07900.cif', '../alphafold_data/cif/Q8C6I2.cif', '../alphafold_data/cif/Q86YV0.cif', '../alphafold_data/cif/Q5JQC4.cif', '../alphafold_data/cif/Q8QZT1.cif', '../alphafold_data/cif/Q9CQ92.cif', '../alphafold_data/cif/P47897.cif', '../alphafold_data/cif/Q14203.cif', '../alphafold_data/cif/O75955.cif', '../alphafold_data/cif/Q9Y5B6.cif', '../alphafold_data/cif/P41216.cif', '../alphafold_data/cif/Q9CQN1.cif', '../alphafold_data/cif/P15121.cif', '../alphafold_data/cif/P38919.cif', '../alphafold_data/cif/Q920A5.cif', '../alphafold_data/cif/Q9NXV6.cif', '../alphafold_data/cif/Q9P2K3.cif', '../alphafold_data/cif/Q53HL2.cif', '../alphafold_data/cif/Q96E

In [9]:
len(file_paths)

1696

In [10]:
pattern = r'/([^/]+)\.cif$'
protein_ids =[]
for file_path in file_paths:
# Search for the pattern in the string
    match = re.search(pattern, file_path)

    # Extract the desired substring from the matched pattern
    desired_substring = match.group(1)
    protein_ids.append(desired_substring)


In [11]:
protein_ids

['O14617',
 'Q9D404',
 'P19525',
 'P62829',
 'Q96PK6',
 'Q9Z0X1',
 'O60814',
 'Q99MB2',
 'Q6PDF3',
 'P07900',
 'Q8C6I2',
 'Q86YV0',
 'Q5JQC4',
 'Q8QZT1',
 'Q9CQ92',
 'P47897',
 'Q14203',
 'O75955',
 'Q9Y5B6',
 'P41216',
 'Q9CQN1',
 'P15121',
 'P38919',
 'Q920A5',
 'Q9NXV6',
 'Q9P2K3',
 'Q53HL2',
 'Q96E09',
 'Q8C1W2',
 'Q1ED39',
 'P27144',
 'P42125',
 'Q13418',
 'P50247',
 'P12074',
 'O75821',
 'Q9Y6Q5',
 'Q08945',
 'Q5HZI9',
 'P21333',
 'P24539',
 'O14777',
 'Q9CPQ3',
 'Q9UN86',
 'Q8IUD2',
 'P56391',
 'Q923K4',
 'Q9CRD0',
 'O14950',
 'P35637',
 'Q9Y3U8',
 'A2ATU0',
 'P39019',
 'P62753',
 'Q9HD42',
 'P54578',
 'Q9CWV0',
 'O14776',
 'P14174',
 'Q16777',
 'P07108',
 'Q14160',
 'Q6YN16',
 'O75175',
 'P17858',
 'Q99LP6',
 'P09496',
 'Q8WXI9',
 'Q9CW42',
 'Q9UNZ5',
 'O00267',
 'Q8BHE8',
 'Q8C2E4',
 'Q9UL46',
 'Q9D773',
 'Q9CQC7',
 'Q8BWF0',
 'Q9CZS1',
 'Q62425',
 'Q9CXJ1',
 'P52294',
 'P05455',
 'P50454',
 'Q60597',
 'Q7L4I2',
 'O75940',
 'P28074',
 'P85094',
 'Q9CQZ5',
 'Q99M87',
 'Q9BYJ9',

In [12]:
list_of_dfs = []
for cif_file, prot_id in zip(file_paths, protein_ids):
    list_of_dfs.append(get_phi_and_psi(prot_id, cif_file))


Model 0 Chain A
(part 1 of 1)
length 1153
from MET1
to CYS1153
Model 0 Chain A
(part 1 of 1)
length 459
from MET1
to MET459
Model 0 Chain A
(part 1 of 1)
length 551
from MET1
to CYS551
Model 0 Chain A
(part 1 of 1)
length 140
from MET1
to ALA140
Model 0 Chain A
(part 1 of 1)
length 669
from MET1
to MET669
Model 0 Chain A
(part 1 of 1)
length 612
from MET1
to ASP612
Model 0 Chain A
(part 1 of 1)
length 126
from MET1
to LYS126
Model 0 Chain A
(part 1 of 1)
length 328
from MET1
to SER328
Model 0 Chain A
(part 1 of 1)
length 494
from MET1
to LYS494
Model 0 Chain A
(part 1 of 1)
length 732
from MET1
to ASP732
Model 0 Chain A
(part 1 of 1)
length 164
from MET1
to HIS164
Model 0 Chain A
(part 1 of 1)
length 1011
from MET1
to THR1011
Model 0 Chain A
(part 1 of 1)
length 288
from MET1
to THR288
Model 0 Chain A
(part 1 of 1)
length 424
from MET1
to LEU424
Model 0 Chain A
(part 1 of 1)
length 152
from MET1
to SER152
Model 0 Chain A
(part 1 of 1)
length 775
from MET1
to VAL775
Model 0 Chain A
(par

In [13]:
concat_dihedrals = pd.concat(list_of_dfs)


In [14]:
concat_dihedrals

Unnamed: 0,Protein ID,Residue Name,Residue Position,PHI,PSI
0,O14617,MET,1,,-1.015463
1,O14617,ALA,2,-0.954294,-0.783044
2,O14617,LEU,3,-1.047323,-0.604341
3,O14617,LYS,4,-1.189739,-0.662262
4,O14617,MET,5,-1.213749,-0.669750
...,...,...,...,...,...
251,Q8C3X2,PHE,252,-1.180788,-0.653133
252,Q8C3X2,TRP,253,-1.329307,-0.276255
253,Q8C3X2,LYS,254,-1.518433,-0.312795
254,Q8C3X2,GLU,255,-1.663209,0.034239


In [30]:
#concat_dihedrals.to_csv('dihedral_angles.csv', index=False)

# Below we verify that the amino acid sequences extracted from the the alpha fold structures match those found in the uniprot database

In [5]:
concat_dihedrals = pd.read_csv('dihedral_angles.csv')

In [6]:
concat_dihedrals

Unnamed: 0,Protein ID,Residue Name,Residue Position,PHI,PSI,PSI degrees,PHI degrees
0,O14617,MET,1,,-1.015463,-58.181772,
1,O14617,ALA,2,-0.954294,-0.783044,-44.865122,-54.677005
2,O14617,LEU,3,-1.047323,-0.604341,-34.626186,-60.007208
3,O14617,LYS,4,-1.189739,-0.662262,-37.944790,-68.167023
4,O14617,MET,5,-1.213749,-0.669750,-38.373822,-69.542706
...,...,...,...,...,...,...,...
923460,Q8C3X2,PHE,252,-1.180788,-0.653133,-37.421783,-67.654193
923461,Q8C3X2,TRP,253,-1.329307,-0.276255,-15.828235,-76.163670
923462,Q8C3X2,LYS,254,-1.518433,-0.312795,-17.921836,-86.999812
923463,Q8C3X2,GLU,255,-1.663209,0.034239,1.961770,-95.294845


In [7]:
len(concat_dihedrals['Protein ID'].unique())

1696

In [13]:
def get_protein_seq(cID):
    baseUrl="http://www.uniprot.org/uniprot/"
    currentUrl=baseUrl+cID+".fasta"
    response = r.post(currentUrl)
    cData=''.join(response.text)
    
    Seq=StringIO(cData)
    pSeq=list(SeqIO.parse(Seq,'fasta'))

    return str(pSeq[0].seq)

In [16]:
uniprot_protein_sequences = pd.DataFrame({'Protein ID':concat_dihedrals['Protein ID'].unique()})

In [18]:
#uniprot_protein_sequences = pd.read_csv('../global_data/uniprotID_to_complete_sequence_mapping.csv').drop(columns=['Unnamed: 0'])
uniprot_protein_sequences['Complete Sequence'] = uniprot_protein_sequences['Protein ID'].apply(get_protein_seq)

In [19]:
uniprot_protein_sequences

Unnamed: 0,Protein ID,Complete Sequence
0,O14617,MALKMVKGSIDRMFDKNLQDLVRGIRNHKEDEAKYISQCIDEIKQE...
1,Q9D404,MLSKCLQHFLKATISHPYPASYSWLISKHRFYGTVPAAMLRRRVVI...
2,P19525,MAGDLSAGFFMEELNTYRQKQGVVLKYQELPNSGPPHDRRFTFQVI...
3,P62829,MSKRGRGGSSGAKFRISLGLPVGAVINCADNTGAKNLYIISVKGIK...
4,Q96PK6,MKIFVGNVDGADTTPEELAALFAPYGTVMSCAVMKQFAFVHMRENA...
...,...,...
1691,Q5JVF3,MAHITINQYLQQVYEAIDSRDGASCAELVSFKHPHVANPRLQMASP...
1692,Q9JJL8,MAASMARLWWPFLARQGLRSRGRCVCSQNPRRSFATEKRVRNLLYE...
1693,O09111,MAARLLSLYGRCLSAAGAMRGLPAARVRWESSRAVIAPSGVEKKRQ...
1694,Q8WUM0,MFPAAPSPRTPGTGSRRGPLAGLGPGSTPRTASRKGLPLGSAVSSP...


In [59]:
uniprot_protein_sequences.to_csv('uniprot_protein_sequences.csv', index=False)

In [20]:
amino_acid_map = {
    "ALA": "A",
    "ARG": "R",
    "ASN": "N",
    "ASP": "D",
    "CYS": "C",
    "GLU": "E",
    "GLN": "Q",
    "GLY": "G",
    "HIS": "H",
    "ILE": "I",
    "LEU": "L",
    "LYS": "K",
    "MET": "M",
    "PHE": "F",
    "PRO": "P",
    "SER": "S",
    "THR": "T",
    "TRP": "W",
    "TYR": "Y",
    "VAL": "V",
    "SEC": "U",
    "PYL": "O"
}

In [38]:
# Function to verify sequences
def verify_sequences(df_residues, df_sequences):
    mismatches = pd.DataFrame(
        columns=['Protein ID', 'Residue Position', 'AlphaFold Residue', 'UniProt Residue'])

    missing_sequences = 0
    for protein_id in df_residues['Protein ID'].unique():
        # Get the complete sequence for the current Protein ID
        if protein_id in df_sequences['Protein ID'].values:
            complete_seq = df_sequences[df_sequences['Protein ID'] == protein_id]['Complete Sequence'].values[0]
        else:
            missing_sequences += 1
            print(f'{protein_id} is not one of the completed sequences we queried from UniProt', missing_sequences)
            continue

        # Filter residues for the current Protein ID
        residues = df_residues[df_residues['Protein ID'] == protein_id]

        for _, row in residues.iterrows():
            residue_name = row['Residue Name']
            residue_position = row['Residue Position']
            
            # Get the expected residue from the complete sequence
            if (residue_position - 1) <  len(complete_seq):
                expected_residue = complete_seq[residue_position - 1]  # position - 1 for zero-based indexing
                actual_residue = amino_acid_map[residue_name]
            else:
                expected_residue = 'X'
                actual_residue = amino_acid_map[residue_name]
            
            if expected_residue != actual_residue:
                print(protein_id)
                mismatches = mismatches._append({'Protein ID':protein_id,
                        'Residue Position':residue_position,
                        'AlphaFold Residue':actual_residue,
                        'UniProt Residue': expected_residue
                        }, ignore_index = True)
                #mismatches.loc[len(mismatches.index)] = [protein_id, residue_position, actual_residue, expected_residue]
                #mismatches.append((protein_id, residue_position, actual_residue, expected_residue))

    return mismatches


In [39]:
mismatches = verify_sequences(concat_dihedrals, uniprot_protein_sequences)


Q14160
P27635
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5
Q8NFD5

In [40]:
mismatches

Unnamed: 0,Protein ID,Residue Position,AlphaFold Residue,UniProt Residue
0,Q14160,674,V,E
1,P27635,202,N,S
2,Q8NFD5,3,H,A
3,Q8NFD5,4,N,R
4,Q8NFD5,6,G,A
...,...,...,...,...
4498,Q8R0F8,224,R,X
4499,Q8R0F8,225,S,X
4500,Q8R0F8,226,E,X
4501,Q8R0F8,227,Y,X


In [42]:
unique_mismatches = mismatches['Protein ID'].unique()

In [43]:
unique_mismatches

array(['Q14160', 'P27635', 'Q8NFD5', 'Q9NX55', 'Q9Y2D5', 'Q9ULT8',
       'P22315', 'Q92616', 'P62861', 'P49411', 'O94851', 'Q00341',
       'O09167', 'O75396', 'Q9UJX3', 'Q8R0F8', 'Q96BZ8'], dtype=object)

In [44]:
len(unique_mismatches)

17

# Next we append the the PSI and PHI angles to our RvsS dataset

In [50]:
pd.set_option('display.max_columns', None)

In [56]:
RvsS = pd.read_csv('../RvsS/RvsS_peptides_with_alphafold.csv').drop(columns = ['Unnamed: 0'])

In [57]:
RvsS

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,4 Log2 Ratio HL,5 Log2 Ratio HL,6 Log2 Ratio HL,7 Log2 Ratio HL,8 Log2 Ratio HL,9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,label,Complete Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,3.310961,,,,,,sp|Q8C196|CPSM_MOUSE,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",4.058191e-03,2.391668,3.289988,green,MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTA...,575,12,AADTIGYPV,9,584,FAVESMEDALKAADTIGYPV,IRSAYALGGLGSGICPNKET,Q8C196,66,M,584,96.55,-14.238,-12.741,-11.922,-12.356,-21.547,-21.776,-20.583,-22.969,-8.140,-7.953,-8.470,-8.702,STRN,STRN,0,0,1,0,0,0,0,0,0,2,3,3,4,7,8,11,31,85,144,5,0.0,0.0,0.095238,0.095238,2.000000,2.142857,2.380952,3.666667,4.857143,6.190476,7.857143,22.190476,59.952381,120.857143,6.285714,0
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,,3.309030,2.577856,2.251824,,,sp|Q07417|ACADS_MOUSE,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,green,MAAALLARARGPLRRALGVRDWRRLHTVYQSVELPETHQMLRQTCR...,262,10,IA,2,264,DCRIPKENLLGEPGMGFKIA,QTLDMGRIGIASQALGIAQA,Q07417,47,M,264,98.03,-3.998,-3.218,-3.663,-1.765,5.685,7.004,7.867,6.799,-16.050,-16.071,-17.261,-16.097,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,6,8,8,19,90,173,1,0.0,0.0,0.000000,0.000000,2.047619,2.047619,2.761905,4.857143,6.285714,8.047619,9.047619,30.571429,90.857143,176.571429,8.285714,0
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,2.727931,,,,,,sp|Q91YI0|ARLY_MOUSE,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,4.283587e-02,1.368192,2.555706,green,MASESGKLWGGRFVGAVDPIMEKFNSSISYDRHLWNVDVQGSKAYS...,12,11,FVGAVDPI,8,20,MASESGKLWGGRFVGAVDPI,EKFNSSISYDRHLWNVDVQG,Q91YI0,79,M,20,93.06,0.281,1.383,0.792,2.350,4.013,3.236,2.284,4.136,34.874,35.596,36.649,36.227,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,6,7,7,7,9,24,52,2,0.0,0.0,0.000000,0.000000,1.952381,2.000000,2.238095,3.285714,4.380952,5.238095,5.523810,11.238095,29.571429,61.952381,2.380952,0
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,,,2.559564,,2.839492,2.607501,sp|P50247|SAHH_MOUSE,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,green,MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMREMYSASKPLK...,412,20,QAQYLG,6,418,LGKLNVKLTKLTEKQAQYLG,PINGPFKPDHYRY,P50247,26,M,418,96.03,-31.883,-31.393,-30.160,-32.451,-1.725,-2.820,-2.363,-3.286,-2.198,-3.157,-3.955,-4.067,BEND,BEND,1,0,0,0,0,0,0,0,0,2,2,2,3,4,5,6,15,24,39,5,0.0,0.0,0.095238,0.095238,2.000000,2.095238,2.095238,3.238095,4.428571,5.380952,5.952381,13.380952,27.047619,48.190476,3.047619,0
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,2.078252,,,,,,sp|P33267|CP2F2_MOUSE,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,2.032232e-04,3.692027,2.389057,green,MDGVSTAILLLLLAVISLSLTFSSRGKGQLPPGPKPLPILGNLLQL...,358,12,FADVIP,6,364,SMPYTDAVIHEVQRFADVIP,NLPHRVTRDTPFRGFLIPKG,P33267,20,M,364,96.29,1.166,0.040,0.595,-0.973,1.621,2.637,4.050,2.668,-9.734,-9.982,-10.213,-8.915,TURN_TY1_P,TURN,0,0,0,1,0,0,0,0,0,2,2,3,3,6,7,9,27,104,207,6,0.0,0.0,0.000000,0.000000,2.000000,2.285714,2.857143,4.238095,6.095238,7.380952,8.904762,27.523810,85.285714,171.619048,7.809524,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,AHMVTLDYTVQVPGTGR,AHMVTLDYTVQVPGTGR,AHM[649.3660]VTLDYTVQVPGTGR,AHM[655.3735]VTLDYTVQVPGTGR,-1.822067,-1.687501,-1.706146,-1.647231,-1.793966,-1.407199,-3.203481,-2.025144,-1.873531,-1.746197,-1.613004,-1.760150,sp|Q9QXF8|GNMT_MOUSE,Q9QXF8,GNMT_MOUSE,Gnmt,Glycine N-methyltransferase,1.880219e-08,7.725792,-1.857135,yellow,MVDSVYRTRSLGVAAEGLPDQYADGEAARVWQLYIGDTRSRTAEYK...,213,17,AH,2,215,KSDLTKDITTSVLTVNNKAH,VTLDYTVQVPGTGRDGSPGF,Q9QXF8,104,M,215,95.79,17.127,17.414,18.275,18.120,-2.387,-2.142,-0.893,-3.274,-3.437,-1.957,-1.736,-1.371,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,3,4,6,9,10,25,60,104,2,0.0,0.0,0.000000,0.000000,2.000000,2.142857,2.523810,3.380952,4.428571,6.095238,7.666667,16.476190,42.238095,79.904762,2.380952,0
199,KEQESEVDMK,KEQESEVDMK,KEQESEVDM[649.3660]K,KEQESEVDM[655.3735]K,-1.759694,,-1.821773,-1.960428,-1.781592,-1.988603,,-1.791248,-1.912515,-1.928952,-1.891596,-1.881011,sp|Q8K3J1|NDUS8_MOUSE,Q8K3J1,NDUS8_MOUSE,Ndufs8,NADH dehydrogenase [ubiquinone] iron-sulfur pr...,7.034064e-14,13.152794,-1.871741,yellow,MYRLSSSMLPRALAQAMRTGHLNGQSLHSSAVAATYKYVNKKEQES...,41,10,KEQESEVD,8,49,SAVAATYKYVNKKEQESEVD,KSATDNAARILMWTELIRGL,Q8K3J1,72,M,49,82.35,-36.502,-36.519,-37.547,-36.824,10.719,11.823,11.524,13.124,50.492,51.559,52.662,50.954,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,1,1,2,2,2,2,2,7,11,19,0,0.0,0.0,0.000000,0.000000,1.333333,1.333333,2.000000,2.904762,3.238095,4.000000,4.238095,7.190476,12.523810,19.428571,0.857143,1
200,RGVMLAVDAVIAELK,RGVMLAVDAVIAELK,RGVM[649.3660]LAVDAVIAELK,RGVM[655.3735]LAVDAVIAELK,-1.812349,-1.980371,-2.307386,-2.492858,-1.405837,-2.094407,-1.994187,-1.873064,-2.015018,-1.664986,-1.829820,-1.935236,sp|P63038|CH60_MOUSE,P63038,CH60_MOUSE,Hspd1,"60 kDa heat shock protein, mitochondrial",7.289026e-11,10.137330,-1.950460,yellow,MLRLPTVLRQMRPVSRALAPHLTRAYAKDVKFGADARALMLQGVDL...,141,15,RGV,3,144,KEGFEKISKGANPVEIRRGV,LAVDAVIAELKKQSKPVTTP,P63038,38,M,144,97.14,19.270,18.162,17.695,18.579,-5.132,-5.161,-6.600,-4.520,-13.015,-14.075,-14.333,-15.329,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,5,6,8,8,26,71,128,2,0.0,0.0,0.000000,0.000000,2.047619,2.142857,2.857143,5.285714,6.238095,8.476190,8.952381,26.095238,62.571429,115.380952,7.571429,0
201,MQLLEIITTDK,MQLLEIITTDK,M[649.3660]QLLEIITTDK,M[655.3735]QLLEIITTDK,-2.286954,-2.700322,-1.941018,-1.860306,-2.069488,,-2.131793,-2.980788,-2.198685,,-2.326194,-2.205174,sp|Q8BMS1|ECHA_MOUSE,Q8BMS1,ECHA_MOUSE,Hadha,"Trifunctional enzyme subunit alpha, mitochondrial",5.582467e-09,8.253174,-2.270072,yellow,MVASRAIGSLSRFSAFRILRSRGCICRSFTTSSALLTRTHINYGVK...,505,11,,0,505,AVSKRPEKVIGMHYFSPVDK,QLLEIITTDKTSKDTTASAV,Q8BMS1,65,M,505,95.41,2.614,3.983,5.051,3.903,-3.420,-4.006,-3.584,-5.460,-5.193,-4.857,-5.870,-4.770,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,2,5,6,7,15,55,158,8,0.0,0.0,0.000000,0.095238,2.000000,2.095238,2.571429,3.428571,4.904762,6.761905,8.190476,25.952381,80.380952,173.428571,7.190476,0


In [58]:
RvsS[RvsS['Protein ID'].isin(unique_mismatches)]

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,4 Log2 Ratio HL,5 Log2 Ratio HL,6 Log2 Ratio HL,7 Log2 Ratio HL,8 Log2 Ratio HL,9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,label,Complete Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR


In [67]:
def extract_PSI_and_PHI(residue_df, psi_and_phi_df):
    df = pd.DataFrame(columns=['PSI Radians', 'PHI Radians', 'PSI Degrees', 'PHI Degrees'])
    for _, row in residue_df.iterrows():
        protein_id = row['Protein ID']
        methionine_posn = row['Methionine Location'] + 1
        row_of_interest = psi_and_phi_df.loc[(psi_and_phi_df['Protein ID'] == protein_id) & (psi_and_phi_df['Residue Position'] == methionine_posn)]
        print(row_of_interest)
        new_row = {'PSI Radians': float('nan'), 'PHI Radians':float('nan'), 'PSI Degrees': float('nan'), 'PHI Degrees': float('nan')}
        if row_of_interest.shape[0] == 1 and row_of_interest['Residue Name'].iloc[0] == 'MET':
            new_row = {'PSI Radians': row_of_interest['PSI'].iloc[0], 'PHI Radians':row_of_interest['PHI'].iloc[0], 'PSI Degrees': row_of_interest['PSI degrees'].iloc[0], 'PHI Degrees': row_of_interest['PHI degrees'].iloc[0]}
        df = df._append(new_row, ignore_index = True)
    return df
        


In [68]:
RvsS_psi_phi_map = extract_PSI_and_PHI(RvsS, concat_dihedrals)

       Protein ID Residue Name  Residue Position      PHI       PSI  \
172607     Q8C196          MET               585 -1.70319  2.417496   

        PSI degrees  PHI degrees  
172607   138.512335   -97.585598  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
689465     Q07417          MET               265 -1.115351 -0.760387   

        PSI degrees  PHI degrees  
689465   -43.566981   -63.904919  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
699164     Q91YI0          MET                21 -1.154374 -0.700934   

        PSI degrees  PHI degrees  
699164   -40.160542    -66.14074  
      Protein ID Residue Name  Residue Position       PHI       PSI  \
17296     P50247          MET               419 -2.345182  2.752273   

       PSI degrees  PHI degrees  
17296   157.693617  -134.369058  


  df = df._append(new_row, ignore_index = True)


       Protein ID Residue Name  Residue Position       PHI       PSI  \
738405     P33267          MET               365 -1.695822  0.106409   

        PSI degrees  PHI degrees  
738405      6.09681   -97.163466  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
183757     P97872          MET               416 -1.139703 -0.730922   

        PSI degrees  PHI degrees  
183757   -41.878743   -65.300181  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
108423     Q9DBT9          MET               346 -1.120107 -0.616198   

        PSI degrees  PHI degrees  
108423   -35.305535   -64.177393  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
918880     P32020          MET                21 -2.587119  2.772957   

        PSI degrees  PHI degrees  
918880   158.878716   -148.23101  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
919371     P32020          MET               512 -2.181015  2.282071

In [69]:
RvsS_psi_phi_map

Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,2.417496,-1.703190,138.512335,-97.585598
1,-0.760387,-1.115351,-43.566981,-63.904919
2,-0.700934,-1.154374,-40.160542,-66.140740
3,2.752273,-2.345182,157.693617,-134.369058
4,0.106409,-1.695822,6.096810,-97.163466
...,...,...,...,...
198,2.473794,-2.763199,141.737969,-158.319617
199,-0.581368,-0.945924,-33.309960,-54.197445
200,-0.701559,-1.121504,-40.196397,-64.257462
201,2.377333,-1.234595,136.211150,-70.737087


In [71]:
sum(RvsS_psi_phi_map['PHI Degrees'].isna())

0

In [72]:
RvsS_with_PSI_and_PHI = pd.concat([RvsS, RvsS_psi_phi_map], axis=1)

In [73]:
RvsS_with_PSI_and_PHI

Unnamed: 0,Peptide Sequence,Modified Peptide,Light Modified Peptide,Heavy Modified Peptide,1 Log2 Ratio HL,10 Log2 Ratio HL,11 Log2 Ratio HL,12 Log2 Ratio HL,2 Log2 Ratio HL,3 Log2 Ratio HL,4 Log2 Ratio HL,5 Log2 Ratio HL,6 Log2 Ratio HL,7 Log2 Ratio HL,8 Log2 Ratio HL,9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,p-value,neglogp,Log2HL avg,label,Complete Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,AADTIGYPVMIR,AADTIGYPVMIR,AADTIGYPVM[649.3660]IR,AADTIGYPVM[655.3735]IR,,,3.269016,,,,3.310961,,,,,,sp|Q8C196|CPSM_MOUSE,Q8C196,CPSM_MOUSE,Cps1,"Carbamoyl-phosphate synthase [ammonia], mitoch...",4.058191e-03,2.391668,3.289988,green,MTRILTACKVVKTLKSGFGFANVTTKRQWDFSRPGIRLLSVKAKTA...,575,12,AADTIGYPV,9,584,FAVESMEDALKAADTIGYPV,IRSAYALGGLGSGICPNKET,Q8C196,66,M,584,96.55,-14.238,-12.741,-11.922,-12.356,-21.547,-21.776,-20.583,-22.969,-8.140,-7.953,-8.470,-8.702,STRN,STRN,0,0,1,0,0,0,0,0,0,2,3,3,4,7,8,11,31,85,144,5,0.0,0.0,0.095238,0.095238,2.000000,2.142857,2.380952,3.666667,4.857143,6.190476,7.857143,22.190476,59.952381,120.857143,6.285714,0,2.417496,-1.703190,138.512335,-97.585598
1,IAMQTLDMGR,IAMQTLDMGR,IAM[649.3660]QTLDMGR,IAM[655.3735]QTLDMGR,,2.783695,3.114945,2.697822,,,,3.309030,2.577856,2.251824,,,sp|Q07417|ACADS_MOUSE,Q07417,ACADS_MOUSE,Acads,"Short-chain specific acyl-CoA dehydrogenase, m...",9.647931e-06,5.015566,2.789195,green,MAAALLARARGPLRRALGVRDWRRLHTVYQSVELPETHQMLRQTCR...,262,10,IA,2,264,DCRIPKENLLGEPGMGFKIA,QTLDMGRIGIASQALGIAQA,Q07417,47,M,264,98.03,-3.998,-3.218,-3.663,-1.765,5.685,7.004,7.867,6.799,-16.050,-16.071,-17.261,-16.097,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,6,8,8,19,90,173,1,0.0,0.0,0.000000,0.000000,2.047619,2.047619,2.761905,4.857143,6.285714,8.047619,9.047619,30.571429,90.857143,176.571429,8.285714,0,-0.760387,-1.115351,-43.566981,-63.904919
2,FVGAVDPIMEK,FVGAVDPIMEK,FVGAVDPIM[649.3660]EK,FVGAVDPIM[655.3735]EK,,,,,,2.383482,2.727931,,,,,,sp|Q91YI0|ARLY_MOUSE,Q91YI0,ARLY_MOUSE,Asl,Argininosuccinate lyase,4.283587e-02,1.368192,2.555706,green,MASESGKLWGGRFVGAVDPIMEKFNSSISYDRHLWNVDVQGSKAYS...,12,11,FVGAVDPI,8,20,MASESGKLWGGRFVGAVDPI,EKFNSSISYDRHLWNVDVQG,Q91YI0,79,M,20,93.06,0.281,1.383,0.792,2.350,4.013,3.236,2.284,4.136,34.874,35.596,36.649,36.227,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,6,7,7,7,9,24,52,2,0.0,0.0,0.000000,0.000000,1.952381,2.000000,2.238095,3.285714,4.380952,5.238095,5.523810,11.238095,29.571429,61.952381,2.380952,0,-0.700934,-1.154374,-40.160542,-66.140740
3,QAQYLGMPINGPFKPDHYRY,QAQYLGMPINGPFKPDHYRY,QAQYLGM[649.3660]PINGPFKPDHYRY,QAQYLGM[655.3735]PINGPFKPDHYRY,2.394458,2.380664,2.682897,,2.435014,2.412394,,,2.559564,,2.839492,2.607501,sp|P50247|SAHH_MOUSE,P50247,SAHH_MOUSE,Ahcy,Adenosylhomocysteinase,8.561078e-10,9.067472,2.538998,green,MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMREMYSASKPLK...,412,20,QAQYLG,6,418,LGKLNVKLTKLTEKQAQYLG,PINGPFKPDHYRY,P50247,26,M,418,96.03,-31.883,-31.393,-30.160,-32.451,-1.725,-2.820,-2.363,-3.286,-2.198,-3.157,-3.955,-4.067,BEND,BEND,1,0,0,0,0,0,0,0,0,2,2,2,3,4,5,6,15,24,39,5,0.0,0.0,0.095238,0.095238,2.000000,2.095238,2.095238,3.238095,4.428571,5.380952,5.952381,13.380952,27.047619,48.190476,3.047619,0,2.752273,-2.345182,157.693617,-134.369058
4,FADVIPMNLPHR,FADVIPMNLPHR,FADVIPM[649.3660]NLPHR,FADVIPM[655.3735]NLPHR,,,2.580647,,2.446614,2.450718,2.078252,,,,,,sp|P33267|CP2F2_MOUSE,P33267,CP2F2_MOUSE,Cyp2f2,Cytochrome P450 2F2,2.032232e-04,3.692027,2.389057,green,MDGVSTAILLLLLAVISLSLTFSSRGKGQLPPGPKPLPILGNLLQL...,358,12,FADVIP,6,364,SMPYTDAVIHEVQRFADVIP,NLPHRVTRDTPFRGFLIPKG,P33267,20,M,364,96.29,1.166,0.040,0.595,-0.973,1.621,2.637,4.050,2.668,-9.734,-9.982,-10.213,-8.915,TURN_TY1_P,TURN,0,0,0,1,0,0,0,0,0,2,2,3,3,6,7,9,27,104,207,6,0.0,0.0,0.000000,0.000000,2.000000,2.285714,2.857143,4.238095,6.095238,7.380952,8.904762,27.523810,85.285714,171.619048,7.809524,0,0.106409,-1.695822,6.096810,-97.163466
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,AHMVTLDYTVQVPGTGR,AHMVTLDYTVQVPGTGR,AHM[649.3660]VTLDYTVQVPGTGR,AHM[655.3735]VTLDYTVQVPGTGR,-1.822067,-1.687501,-1.706146,-1.647231,-1.793966,-1.407199,-3.203481,-2.025144,-1.873531,-1.746197,-1.613004,-1.760150,sp|Q9QXF8|GNMT_MOUSE,Q9QXF8,GNMT_MOUSE,Gnmt,Glycine N-methyltransferase,1.880219e-08,7.725792,-1.857135,yellow,MVDSVYRTRSLGVAAEGLPDQYADGEAARVWQLYIGDTRSRTAEYK...,213,17,AH,2,215,KSDLTKDITTSVLTVNNKAH,VTLDYTVQVPGTGRDGSPGF,Q9QXF8,104,M,215,95.79,17.127,17.414,18.275,18.120,-2.387,-2.142,-0.893,-3.274,-3.437,-1.957,-1.736,-1.371,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,3,4,6,9,10,25,60,104,2,0.0,0.0,0.000000,0.000000,2.000000,2.142857,2.523810,3.380952,4.428571,6.095238,7.666667,16.476190,42.238095,79.904762,2.380952,0,2.473794,-2.763199,141.737969,-158.319617
199,KEQESEVDMK,KEQESEVDMK,KEQESEVDM[649.3660]K,KEQESEVDM[655.3735]K,-1.759694,,-1.821773,-1.960428,-1.781592,-1.988603,,-1.791248,-1.912515,-1.928952,-1.891596,-1.881011,sp|Q8K3J1|NDUS8_MOUSE,Q8K3J1,NDUS8_MOUSE,Ndufs8,NADH dehydrogenase [ubiquinone] iron-sulfur pr...,7.034064e-14,13.152794,-1.871741,yellow,MYRLSSSMLPRALAQAMRTGHLNGQSLHSSAVAATYKYVNKKEQES...,41,10,KEQESEVD,8,49,SAVAATYKYVNKKEQESEVD,KSATDNAARILMWTELIRGL,Q8K3J1,72,M,49,82.35,-36.502,-36.519,-37.547,-36.824,10.719,11.823,11.524,13.124,50.492,51.559,52.662,50.954,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,1,1,2,2,2,2,2,7,11,19,0,0.0,0.0,0.000000,0.000000,1.333333,1.333333,2.000000,2.904762,3.238095,4.000000,4.238095,7.190476,12.523810,19.428571,0.857143,1,-0.581368,-0.945924,-33.309960,-54.197445
200,RGVMLAVDAVIAELK,RGVMLAVDAVIAELK,RGVM[649.3660]LAVDAVIAELK,RGVM[655.3735]LAVDAVIAELK,-1.812349,-1.980371,-2.307386,-2.492858,-1.405837,-2.094407,-1.994187,-1.873064,-2.015018,-1.664986,-1.829820,-1.935236,sp|P63038|CH60_MOUSE,P63038,CH60_MOUSE,Hspd1,"60 kDa heat shock protein, mitochondrial",7.289026e-11,10.137330,-1.950460,yellow,MLRLPTVLRQMRPVSRALAPHLTRAYAKDVKFGADARALMLQGVDL...,141,15,RGV,3,144,KEGFEKISKGANPVEIRRGV,LAVDAVIAELKKQSKPVTTP,P63038,38,M,144,97.14,19.270,18.162,17.695,18.579,-5.132,-5.161,-6.600,-4.520,-13.015,-14.075,-14.333,-15.329,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,5,6,8,8,26,71,128,2,0.0,0.0,0.000000,0.000000,2.047619,2.142857,2.857143,5.285714,6.238095,8.476190,8.952381,26.095238,62.571429,115.380952,7.571429,0,-0.701559,-1.121504,-40.196397,-64.257462
201,MQLLEIITTDK,MQLLEIITTDK,M[649.3660]QLLEIITTDK,M[655.3735]QLLEIITTDK,-2.286954,-2.700322,-1.941018,-1.860306,-2.069488,,-2.131793,-2.980788,-2.198685,,-2.326194,-2.205174,sp|Q8BMS1|ECHA_MOUSE,Q8BMS1,ECHA_MOUSE,Hadha,"Trifunctional enzyme subunit alpha, mitochondrial",5.582467e-09,8.253174,-2.270072,yellow,MVASRAIGSLSRFSAFRILRSRGCICRSFTTSSALLTRTHINYGVK...,505,11,,0,505,AVSKRPEKVIGMHYFSPVDK,QLLEIITTDKTSKDTTASAV,Q8BMS1,65,M,505,95.41,2.614,3.983,5.051,3.903,-3.420,-4.006,-3.584,-5.460,-5.193,-4.857,-5.870,-4.770,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,2,5,6,7,15,55,158,8,0.0,0.0,0.000000,0.095238,2.000000,2.095238,2.571429,3.428571,4.904762,6.761905,8.190476,25.952381,80.380952,173.428571,7.190476,0,2.377333,-1.234595,136.211150,-70.737087


In [74]:
RvsS_with_PSI_and_PHI.to_csv('RvsS_with_PSI_and_PHI.csv', index=False)

# Next we append the the PSI and PHI angles to our MsrAKD datasets

In [91]:
MsrAKD_with_alphafold = pd.read_csv('../MsrKD/MsrAKD_with_alphafold.csv').drop(columns=['Unnamed: 0'])

In [92]:
MsrAKD_with_alphafold

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,DHFEEAM[649.3660]R,DHFEEAM[655.3735]R,,,,,,,-5.758896,,-5.815074,,-5.929618,-6.270312,sp|P55072|TERA_HUMAN,P55072,TERA_HUMAN,VCP,Transitional endoplasmic reticulum ATPase,0.000016,4.801791,-5.943475,740,M740,TERA_M740,blue,MASGADSKGDDLSTAILKQKNRPNRLIVDEAINEDNSVVSLSQPKM...,DHFEEAMR,733,8,DHFEEA,6,739,MEVEEDDPVPEIRRDHFEEA,RFARRSVSDNDIRKYEMFAQ,P55072,175.0,M,739.0,92.10,30.347,29.227,28.676,29.658,16.244,16.898,18.159,17.253,-33.989,-33.168,-33.851,-31.817,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,7.0,7.0,19.0,46.0,77.0,6.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.238095,3.380952,4.190476,5.047619,5.761905,13.714286,38.714286,67.380952,3.190476,0.0
1,LRHSEREM[649.3660]R,LRHSEREM[655.3735]R,-5.516297,,,,,-6.230529,,,,,,-5.846635,sp|Q9NTJ3|SMC4_HUMAN,Q9NTJ3,SMC4_HUMAN,SMC4,Structural maintenance of chromosomes protein 4,0.001236,2.907956,-5.864487,814,M814,SMC4_M814,blue,MPRKGTQPSTARRREEGPPPPSPDGASSDAEPEPPSGRTESPATAA...,LRHSEREMR,806,9,LRHSERE,7,813,QEQKVQLEERVVKLRHSERE,RNTLEKFTASIQRLIEQEEY,Q9NTJ3,357.0,M,813.0,90.68,-18.555,-19.815,-20.944,-19.544,21.971,21.111,21.879,19.856,-7.705,-7.575,-6.871,-6.876,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,19.0,39.0,56.0,9.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.047619,4.238095,6.142857,7.904762,8.000000,14.666667,35.619048,52.333333,3.952381,0.0
2,VIAHTQM[649.3660]R,VIAHTQM[655.3735]R,,,,,,-5.784255,,,,-5.238105,,,sp|P39023|RL3_HUMAN,P39023,RL3_HUMAN,RPL3,Large ribosomal subunit protein uL3,0.031518,1.501438,-5.511180,168,M168,RL3_M168,blue,MSHRKFSAPRHGSLGFLPRKRSSRHRGKVKSFPKDDPSKPVHLTAF...,VIAHTQMR,161,8,VIAHTQ,6,167,KDFSSMKKYCQVIRVIAHTQ,RLLPLRQKKAHLMEIQVNGG,P39023,130.0,M,167.0,96.88,-17.863,-16.506,-15.864,-16.603,-5.811,-5.715,-7.097,-5.241,-0.443,0.241,0.188,1.627,HELX_RH_3T_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,5.0,6.0,19.0,62.0,124.0,8.0,0.0,0.0,0.0,0.0,2.000000,2.238095,2.380952,3.428571,4.571429,5.952381,7.666667,22.476190,65.476190,125.333333,5.857143,0.0
3,TTGFGM[649.3660]IYDSLDYAK,TTGFGM[655.3735]IYDSLDYAK,,,-5.731299,,-5.257074,,,,,,,,sp|P62847|RS24_HUMAN,P62847,RS24_HUMAN,RPS24,Small ribosomal subunit protein eS24,0.027458,1.561338,-5.494187,74,M74,RS24_M74,blue,MNDTVTIRTRKFMTNRLLQRKQMVIDVLHPGKATVPKTEIREKLAK...,TTGFGMIYDSLDYAK,68,15,TTGFG,5,73,VIFVFGFRTHFGGGKTTGFG,IYDSLDYAKKNEPKHRLARH,P62847,198.0,M,73.0,96.97,-4.509,-3.440,-2.281,-4.063,10.103,9.181,8.978,7.905,5.208,5.789,4.803,6.106,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,8.0,10.0,22.0,67.0,83.0,2.0,0.0,0.0,0.0,0.0,2.000000,2.142857,2.476190,3.476190,4.714286,6.000000,7.333333,16.857143,42.238095,63.095238,2.619048,0.0
4,QM[649.3660]QVLHPAAR,QM[655.3735]QVLHPAAR,,,,-5.202635,,,,,,,,-5.484248,sp|P50991|TCPD_HUMAN,P50991,TCPD_HUMAN,CCT4,T-complex protein 1 subunit delta,0.016772,1.775418,-5.343441,81,M81,TCPD_M81,blue,MPENVAPRSGATAGAAGGRGKGAYQDRDKPAQIRFSNISAAKAVAD...,QMQVLHPAAR,79,10,Q,1,80,IQDGKGDVTITNDGATILKQ,QVLHPAARMLVELSKAQDIE,P50991,161.0,M,80.0,87.61,-13.321,-13.160,-13.997,-11.760,10.965,9.434,8.723,9.017,-10.127,-10.178,-9.101,-10.069,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,3.0,4.0,10.0,43.0,77.0,3.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.095238,3.571429,4.333333,5.428571,5.904762,16.952381,55.571429,105.619048,5.380952,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
651,TWM[649.3660]WWHNFR,TWM[655.3735]WWHNFR,,,-0.384153,,,,,-0.379949,0.660619,,-0.343055,,sp|O14744|ANM5_HUMAN,O14744,ANM5_HUMAN,PRMT5,Protein arginine N-methyltransferase 5,0.694002,0.158639,-0.111634,187,M187,ANM5_M187,gray,MAAMAVGGAGGSRVSSGRDLNCVPEIADTLGAVAKQGFDFLCMPVF...,TWMWWHNFR,184,9,TW,2,186,IIENAPTTHTEEYSGEEKTW,WWHNFRTLCDYSKRIAVALE,O14744,8.0,M,186.0,94.73,-7.056,-5.524,-4.932,-4.929,-18.274,-18.277,-19.603,-17.159,12.457,12.363,12.863,13.098,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,6.0,6.0,8.0,8.0,18.0,76.0,140.0,2.0,0.0,0.0,0.0,0.0,1.666667,1.666667,2.238095,3.761905,4.571429,5.714286,6.190476,16.047619,50.666667,105.571429,4.619048,0.0
652,M[649.3660]EELHNQEVQK,M[655.3735]EELHNQEVQK,-2.289166,-2.214471,,-2.123250,3.901065,4.142890,-1.931592,3.755303,-2.070951,-2.075727,,3.876525,sp|Q15233|NONO_HUMAN,Q15233,NONO_HUMAN,NONO,Non-POU domain-containing octamer-binding protein,0.770188,0.113403,0.297063,326,M326,NONO_M326,gray,MQSNKTFNLEKQNHTPRKHHQHHHQQQHHQQQQQQPPPPPIPANGQ...,MEELHNQEVQK,325,11,,0,325,EHQVMLMRQDLMRRQEELRR,EELHNQEVQKRKQLELRQEE,Q15233,264.0,M,325.0,91.62,-38.460,-37.724,-36.207,-38.218,-12.406,-13.018,-12.828,-12.483,45.845,44.646,44.784,43.371,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,12.0,18.0,26.0,1.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.428571,5.047619,5.857143,7.619048,7.619048,11.904762,18.952381,26.047619,2.238095,1.0
653,LAM[649.3660]QLEEQASR,LAM[655.3735]QLEEQASR,,0.426347,,,,,,-0.483441,,,0.191181,,sp|Q99661|KIF2C_HUMAN,Q99661,KIF2C_HUMAN,KIF2C,Kinesin-like protein KIF2C,0.884856,0.053127,0.044696,708,M708,KIF2C_M708,gray,MAMDSSLQARLFPGLAIKIQRSNGLIHSANVRTVNLEKSCVSVEWA...,LAMQLEEQASR,705,11,LA,2,707,AQQAKHFSALRDVIKALRLA,QLEEQASRQISSKKRPQ,Q99661,334.0,M,707.0,88.83,-24.981,-25.086,-25.539,-25.988,1.897,1.274,-0.189,2.016,-16.063,-14.663,-14.792,-13.773,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,15.0,38.0,61.0,2.0,0.0,0.0,0.0,0.0,1.952381,1.952381,2.095238,4.761905,5.333333,7.095238,7.333333,13.714286,31.571429,51.142857,3.047619,0.0
654,QNFHM[649.3660]EQLK,QNFHM[655.3735]EQLK,,1.544593,,,-0.553917,,0.962793,-2.636370,,1.440900,,-1.252720,sp|Q92922|SMRC1_HUMAN,Q92922,SMRC1_HUMAN,SMARCC1,SWI/SNF complex subunit SMARCC1,0.909191,0.041345,-0.082454,944,M944,SMRC1_M944,gray,MAAAAGGGGPGTAVGATGSGIAAAAAGLAVYRRKDGGPATKFWESP...,QNFHMEQLK,939,9,QNFH,4,943,EKEALEQQRQQLLTERQNFH,EQLKYAELRARQQMEQQQHG,Q92922,322.0,M,943.0,88.51,-85.839,-85.271,-84.262,-86.313,48.138,48.568,47.535,48.764,-55.863,-54.509,-53.981,-53.499,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,7.0,8.0,13.0,20.0,26.0,4.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.619048,4.238095,5.666667,6.666667,7.285714,11.333333,18.714286,24.904762,2.428571,1.0


In [93]:
MsrAKD_with_alphafold[MsrAKD_with_alphafold['Protein ID'].isin(unique_mismatches)]

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
229,RRM[649.3660]QYNR,RRM[655.3735]QYNR,-3.506345,-3.704064,,-4.418432,-3.873315,,,,-3.361222,-3.725144,-4.123304,,sp|P62861|RS30_HUMAN,P62861,RS30_HUMAN,FAU,Ubiquitin-like FUBI-ribosomal protein eS30 fus...,1.394322e-07,6.855637,-3.815975,110,M110,RS30_M110,green,MQLFVRAQELHTFEVTGQETVAQIKAHVASLEGIAPEDQVVLLAGA...,RRMQYNR,107,7,RR,2,109,TPKVAKQEKKKKKTGRAKRR,QYNRRFVNVVPTFGKKKGPN,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
365,FNADEFEDM[649.3660]VAEKR,FNADEFEDM[655.3735]VAEKR,,,,-2.716301,,,,,-3.053042,,-2.913356,,sp|P27635|RL10_HUMAN,P27635,RL10_HUMAN,RPL10,Large ribosomal subunit protein uL16,0.001137061,2.944216,-2.894233,184,M184,RL10_M184,green,MGRRPARCYRYCKNKPYPKSRFCRGVPDAKIRIFDLGRKKAKVDEF...,FNADEFEDMVAEKR,175,14,FNADEFED,8,183,KIHISKKWGFTKFNADEFED,VAEKRLIPDGCGVKYIPSRG,P27635,107.0,M,183.0,93.74,-24.168,-24.495,-23.644,-24.28,-0.909,0.487,0.727,1.564,14.269,13.711,12.465,14.681,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,9.0,10.0,15.0,36.0,57.0,4.0,0.0,0.0,0.0,0.0,2.0,2.047619,2.380952,3.190476,4.285714,5.52381,6.619048,13.714286,36.571429,61.52381,2.904762,0.0
497,IM[649.3660]VANIEEVLQR,IM[655.3735]VANIEEVLQR,-2.040931,-1.316275,,,-1.904369,-1.83446,-1.749982,-1.672267,-1.100205,,-1.3732,-1.480174,sp|O75396|SC22B_HUMAN,O75396,SC22B_HUMAN,SEC22B,Vesicle-trafficking protein SEC22b,2.846295e-07,6.54572,-1.607985,149,M149,SC22B_M149,green,MVLLTMIARVADGLPLAASMQEDEQSGRDLQQYQSQAKQLFRKLNE...,IMVANIEEVLQR,147,12,I,1,148,SRARRNLGSINTELQDVQRI,VANIEEVLQRGEALSALDSK,O75396,31.0,M,148.0,89.25,-6.568,-7.906,-7.783,-8.932,-14.719,-14.634,-13.785,-14.095,-2.811,-3.548,-4.821,-2.656,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,4.0,5.0,11.0,33.0,73.0,2.0,0.0,0.0,0.0,0.0,1.333333,1.333333,2.047619,2.571429,2.809524,3.428571,3.619048,8.142857,22.666667,46.52381,1.190476,0.0
634,KEDLELIM[649.3660]TEMEISR,KEDLELIM[655.3735]TEMEISR,,,,,,-0.557871,,,,-0.144106,,,sp|Q9NX55|HYPK_HUMAN,Q9NX55,HYPK_HUMAN,HYPK,Huntingtin-interacting protein K,0.3390699,0.469711,-0.350988,91,M91,HYPK_M91,gray,MATEGDVELELETETSGPERPPEKPRKHDSGAADLERVTDYAEEKE...,KEDLELIMTEMEISR,83,15,KEDLELI,7,90,QEREKELAKVTIKKEDLELI,TEMEISRAAAERSLREHMGN,Q9NX55,363.0,K,90.0,91.77,12.002,13.217,13.843,12.835,5.543,4.63,4.437,3.35,-9.096,-9.016,-10.397,-8.41,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,9.0,25.0,42.0,2.0,0.0,0.0,0.0,0.0,1.333333,1.333333,2.0,2.952381,3.666667,4.285714,4.47619,9.761905,22.142857,30.428571,2.238095,1.0


In [108]:
MsrAKD_with_alphafold_wo_mismatches = MsrAKD_with_alphafold.drop(MsrAKD_with_alphafold[MsrAKD_with_alphafold['Protein ID'].isin(unique_mismatches)].index).reset_index(drop = True)

In [109]:
MsrAKD_with_alphafold_wo_mismatches

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,DHFEEAM[649.3660]R,DHFEEAM[655.3735]R,,,,,,,-5.758896,,-5.815074,,-5.929618,-6.270312,sp|P55072|TERA_HUMAN,P55072,TERA_HUMAN,VCP,Transitional endoplasmic reticulum ATPase,0.000016,4.801791,-5.943475,740,M740,TERA_M740,blue,MASGADSKGDDLSTAILKQKNRPNRLIVDEAINEDNSVVSLSQPKM...,DHFEEAMR,733,8,DHFEEA,6,739,MEVEEDDPVPEIRRDHFEEA,RFARRSVSDNDIRKYEMFAQ,P55072,175.0,M,739.0,92.10,30.347,29.227,28.676,29.658,16.244,16.898,18.159,17.253,-33.989,-33.168,-33.851,-31.817,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,7.0,7.0,19.0,46.0,77.0,6.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.238095,3.380952,4.190476,5.047619,5.761905,13.714286,38.714286,67.380952,3.190476,0.0
1,LRHSEREM[649.3660]R,LRHSEREM[655.3735]R,-5.516297,,,,,-6.230529,,,,,,-5.846635,sp|Q9NTJ3|SMC4_HUMAN,Q9NTJ3,SMC4_HUMAN,SMC4,Structural maintenance of chromosomes protein 4,0.001236,2.907956,-5.864487,814,M814,SMC4_M814,blue,MPRKGTQPSTARRREEGPPPPSPDGASSDAEPEPPSGRTESPATAA...,LRHSEREMR,806,9,LRHSERE,7,813,QEQKVQLEERVVKLRHSERE,RNTLEKFTASIQRLIEQEEY,Q9NTJ3,357.0,M,813.0,90.68,-18.555,-19.815,-20.944,-19.544,21.971,21.111,21.879,19.856,-7.705,-7.575,-6.871,-6.876,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,19.0,39.0,56.0,9.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.047619,4.238095,6.142857,7.904762,8.000000,14.666667,35.619048,52.333333,3.952381,0.0
2,VIAHTQM[649.3660]R,VIAHTQM[655.3735]R,,,,,,-5.784255,,,,-5.238105,,,sp|P39023|RL3_HUMAN,P39023,RL3_HUMAN,RPL3,Large ribosomal subunit protein uL3,0.031518,1.501438,-5.511180,168,M168,RL3_M168,blue,MSHRKFSAPRHGSLGFLPRKRSSRHRGKVKSFPKDDPSKPVHLTAF...,VIAHTQMR,161,8,VIAHTQ,6,167,KDFSSMKKYCQVIRVIAHTQ,RLLPLRQKKAHLMEIQVNGG,P39023,130.0,M,167.0,96.88,-17.863,-16.506,-15.864,-16.603,-5.811,-5.715,-7.097,-5.241,-0.443,0.241,0.188,1.627,HELX_RH_3T_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,5.0,6.0,19.0,62.0,124.0,8.0,0.0,0.0,0.0,0.0,2.000000,2.238095,2.380952,3.428571,4.571429,5.952381,7.666667,22.476190,65.476190,125.333333,5.857143,0.0
3,TTGFGM[649.3660]IYDSLDYAK,TTGFGM[655.3735]IYDSLDYAK,,,-5.731299,,-5.257074,,,,,,,,sp|P62847|RS24_HUMAN,P62847,RS24_HUMAN,RPS24,Small ribosomal subunit protein eS24,0.027458,1.561338,-5.494187,74,M74,RS24_M74,blue,MNDTVTIRTRKFMTNRLLQRKQMVIDVLHPGKATVPKTEIREKLAK...,TTGFGMIYDSLDYAK,68,15,TTGFG,5,73,VIFVFGFRTHFGGGKTTGFG,IYDSLDYAKKNEPKHRLARH,P62847,198.0,M,73.0,96.97,-4.509,-3.440,-2.281,-4.063,10.103,9.181,8.978,7.905,5.208,5.789,4.803,6.106,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,8.0,10.0,22.0,67.0,83.0,2.0,0.0,0.0,0.0,0.0,2.000000,2.142857,2.476190,3.476190,4.714286,6.000000,7.333333,16.857143,42.238095,63.095238,2.619048,0.0
4,QM[649.3660]QVLHPAAR,QM[655.3735]QVLHPAAR,,,,-5.202635,,,,,,,,-5.484248,sp|P50991|TCPD_HUMAN,P50991,TCPD_HUMAN,CCT4,T-complex protein 1 subunit delta,0.016772,1.775418,-5.343441,81,M81,TCPD_M81,blue,MPENVAPRSGATAGAAGGRGKGAYQDRDKPAQIRFSNISAAKAVAD...,QMQVLHPAAR,79,10,Q,1,80,IQDGKGDVTITNDGATILKQ,QVLHPAARMLVELSKAQDIE,P50991,161.0,M,80.0,87.61,-13.321,-13.160,-13.997,-11.760,10.965,9.434,8.723,9.017,-10.127,-10.178,-9.101,-10.069,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,3.0,4.0,10.0,43.0,77.0,3.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.095238,3.571429,4.333333,5.428571,5.904762,16.952381,55.571429,105.619048,5.380952,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,TWM[649.3660]WWHNFR,TWM[655.3735]WWHNFR,,,-0.384153,,,,,-0.379949,0.660619,,-0.343055,,sp|O14744|ANM5_HUMAN,O14744,ANM5_HUMAN,PRMT5,Protein arginine N-methyltransferase 5,0.694002,0.158639,-0.111634,187,M187,ANM5_M187,gray,MAAMAVGGAGGSRVSSGRDLNCVPEIADTLGAVAKQGFDFLCMPVF...,TWMWWHNFR,184,9,TW,2,186,IIENAPTTHTEEYSGEEKTW,WWHNFRTLCDYSKRIAVALE,O14744,8.0,M,186.0,94.73,-7.056,-5.524,-4.932,-4.929,-18.274,-18.277,-19.603,-17.159,12.457,12.363,12.863,13.098,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,6.0,6.0,8.0,8.0,18.0,76.0,140.0,2.0,0.0,0.0,0.0,0.0,1.666667,1.666667,2.238095,3.761905,4.571429,5.714286,6.190476,16.047619,50.666667,105.571429,4.619048,0.0
648,M[649.3660]EELHNQEVQK,M[655.3735]EELHNQEVQK,-2.289166,-2.214471,,-2.123250,3.901065,4.142890,-1.931592,3.755303,-2.070951,-2.075727,,3.876525,sp|Q15233|NONO_HUMAN,Q15233,NONO_HUMAN,NONO,Non-POU domain-containing octamer-binding protein,0.770188,0.113403,0.297063,326,M326,NONO_M326,gray,MQSNKTFNLEKQNHTPRKHHQHHHQQQHHQQQQQQPPPPPIPANGQ...,MEELHNQEVQK,325,11,,0,325,EHQVMLMRQDLMRRQEELRR,EELHNQEVQKRKQLELRQEE,Q15233,264.0,M,325.0,91.62,-38.460,-37.724,-36.207,-38.218,-12.406,-13.018,-12.828,-12.483,45.845,44.646,44.784,43.371,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,12.0,18.0,26.0,1.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.428571,5.047619,5.857143,7.619048,7.619048,11.904762,18.952381,26.047619,2.238095,1.0
649,LAM[649.3660]QLEEQASR,LAM[655.3735]QLEEQASR,,0.426347,,,,,,-0.483441,,,0.191181,,sp|Q99661|KIF2C_HUMAN,Q99661,KIF2C_HUMAN,KIF2C,Kinesin-like protein KIF2C,0.884856,0.053127,0.044696,708,M708,KIF2C_M708,gray,MAMDSSLQARLFPGLAIKIQRSNGLIHSANVRTVNLEKSCVSVEWA...,LAMQLEEQASR,705,11,LA,2,707,AQQAKHFSALRDVIKALRLA,QLEEQASRQISSKKRPQ,Q99661,334.0,M,707.0,88.83,-24.981,-25.086,-25.539,-25.988,1.897,1.274,-0.189,2.016,-16.063,-14.663,-14.792,-13.773,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,15.0,38.0,61.0,2.0,0.0,0.0,0.0,0.0,1.952381,1.952381,2.095238,4.761905,5.333333,7.095238,7.333333,13.714286,31.571429,51.142857,3.047619,0.0
650,QNFHM[649.3660]EQLK,QNFHM[655.3735]EQLK,,1.544593,,,-0.553917,,0.962793,-2.636370,,1.440900,,-1.252720,sp|Q92922|SMRC1_HUMAN,Q92922,SMRC1_HUMAN,SMARCC1,SWI/SNF complex subunit SMARCC1,0.909191,0.041345,-0.082454,944,M944,SMRC1_M944,gray,MAAAAGGGGPGTAVGATGSGIAAAAAGLAVYRRKDGGPATKFWESP...,QNFHMEQLK,939,9,QNFH,4,943,EKEALEQQRQQLLTERQNFH,EQLKYAELRARQQMEQQQHG,Q92922,322.0,M,943.0,88.51,-85.839,-85.271,-84.262,-86.313,48.138,48.568,47.535,48.764,-55.863,-54.509,-53.981,-53.499,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,7.0,8.0,13.0,20.0,26.0,4.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.619048,4.238095,5.666667,6.666667,7.285714,11.333333,18.714286,24.904762,2.428571,1.0


In [110]:
MsrAKD_with_alphafold_wo_mismatches[MsrAKD_with_alphafold_wo_mismatches['Protein ID'].isin(unique_mismatches)]

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR


In [111]:
MsrAKD_psi_and_psi_map = extract_PSI_and_PHI(MsrAKD_with_alphafold_wo_mismatches, concat_dihedrals)

       Protein ID Residue Name  Residue Position       PHI       PSI  \
166655     P55072          MET               740 -1.146523 -0.555659   

        PSI degrees  PHI degrees  
166655   -31.836928   -65.690956  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
408757     Q9NTJ3          MET               814 -1.057627 -0.870103   

        PSI degrees  PHI degrees  
408757   -49.853215   -60.597577  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
128567     P39023          MET               168 -1.322379 -0.491176   

        PSI degrees  PHI degrees  
128567   -28.142283   -75.766714  


  df = df._append(new_row, ignore_index = True)


       Protein ID Residue Name  Residue Position       PHI      PSI  \
706406     P62847          MET                74 -2.178688  2.27145   

        PSI degrees  PHI degrees  
706406   130.144505  -124.829609  
       Protein ID Residue Name  Residue Position      PHI       PSI  \
481261     P50991          MET                81 -1.22962  2.297773   

        PSI degrees  PHI degrees  
481261   131.652676   -70.452048  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
499457     P78371          MET               160 -1.093004 -0.753688   

        PSI degrees  PHI degrees  
499457   -43.183138     -62.6245  
       Protein ID Residue Name  Residue Position      PHI       PSI  \
691942     P62304          MET                14 -1.51569  2.372517   

        PSI degrees  PHI degrees  
691942   135.935232   -86.842626  
      Protein ID Residue Name  Residue Position       PHI       PSI  \
88430     P51991          MET               158 -1.532683  2.136565   

   

In [112]:
sum(MsrAKD_psi_and_psi_map['PHI Degrees'].isna())

0

In [113]:
MsrAKD_psi_and_psi_map

Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,-0.555659,-1.146523,-31.836928,-65.690956
1,-0.870103,-1.057627,-49.853215,-60.597577
2,-0.491176,-1.322379,-28.142283,-75.766714
3,2.271450,-2.178688,130.144505,-124.829609
4,2.297773,-1.229620,131.652676,-70.452048
...,...,...,...,...
647,-0.668824,-1.106320,-38.320821,-63.387457
648,-0.724554,-1.152084,-41.513904,-66.009532
649,-0.693338,-1.070541,-39.725343,-61.337507
650,-0.650127,-1.307328,-37.249556,-74.904401


In [114]:
MsrAKD_with_PSI_and_PHI = pd.concat([MsrAKD_with_alphafold_wo_mismatches, MsrAKD_psi_and_psi_map], axis=1)

In [115]:
MsrAKD_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,DHFEEAM[649.3660]R,DHFEEAM[655.3735]R,,,,,,,-5.758896,,-5.815074,,-5.929618,-6.270312,sp|P55072|TERA_HUMAN,P55072,TERA_HUMAN,VCP,Transitional endoplasmic reticulum ATPase,0.000016,4.801791,-5.943475,740,M740,TERA_M740,blue,MASGADSKGDDLSTAILKQKNRPNRLIVDEAINEDNSVVSLSQPKM...,DHFEEAMR,733,8,DHFEEA,6,739,MEVEEDDPVPEIRRDHFEEA,RFARRSVSDNDIRKYEMFAQ,P55072,175.0,M,739.0,92.10,30.347,29.227,28.676,29.658,16.244,16.898,18.159,17.253,-33.989,-33.168,-33.851,-31.817,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,7.0,7.0,19.0,46.0,77.0,6.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.238095,3.380952,4.190476,5.047619,5.761905,13.714286,38.714286,67.380952,3.190476,0.0,-0.555659,-1.146523,-31.836928,-65.690956
1,LRHSEREM[649.3660]R,LRHSEREM[655.3735]R,-5.516297,,,,,-6.230529,,,,,,-5.846635,sp|Q9NTJ3|SMC4_HUMAN,Q9NTJ3,SMC4_HUMAN,SMC4,Structural maintenance of chromosomes protein 4,0.001236,2.907956,-5.864487,814,M814,SMC4_M814,blue,MPRKGTQPSTARRREEGPPPPSPDGASSDAEPEPPSGRTESPATAA...,LRHSEREMR,806,9,LRHSERE,7,813,QEQKVQLEERVVKLRHSERE,RNTLEKFTASIQRLIEQEEY,Q9NTJ3,357.0,M,813.0,90.68,-18.555,-19.815,-20.944,-19.544,21.971,21.111,21.879,19.856,-7.705,-7.575,-6.871,-6.876,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,19.0,39.0,56.0,9.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.047619,4.238095,6.142857,7.904762,8.000000,14.666667,35.619048,52.333333,3.952381,0.0,-0.870103,-1.057627,-49.853215,-60.597577
2,VIAHTQM[649.3660]R,VIAHTQM[655.3735]R,,,,,,-5.784255,,,,-5.238105,,,sp|P39023|RL3_HUMAN,P39023,RL3_HUMAN,RPL3,Large ribosomal subunit protein uL3,0.031518,1.501438,-5.511180,168,M168,RL3_M168,blue,MSHRKFSAPRHGSLGFLPRKRSSRHRGKVKSFPKDDPSKPVHLTAF...,VIAHTQMR,161,8,VIAHTQ,6,167,KDFSSMKKYCQVIRVIAHTQ,RLLPLRQKKAHLMEIQVNGG,P39023,130.0,M,167.0,96.88,-17.863,-16.506,-15.864,-16.603,-5.811,-5.715,-7.097,-5.241,-0.443,0.241,0.188,1.627,HELX_RH_3T_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,5.0,6.0,19.0,62.0,124.0,8.0,0.0,0.0,0.0,0.0,2.000000,2.238095,2.380952,3.428571,4.571429,5.952381,7.666667,22.476190,65.476190,125.333333,5.857143,0.0,-0.491176,-1.322379,-28.142283,-75.766714
3,TTGFGM[649.3660]IYDSLDYAK,TTGFGM[655.3735]IYDSLDYAK,,,-5.731299,,-5.257074,,,,,,,,sp|P62847|RS24_HUMAN,P62847,RS24_HUMAN,RPS24,Small ribosomal subunit protein eS24,0.027458,1.561338,-5.494187,74,M74,RS24_M74,blue,MNDTVTIRTRKFMTNRLLQRKQMVIDVLHPGKATVPKTEIREKLAK...,TTGFGMIYDSLDYAK,68,15,TTGFG,5,73,VIFVFGFRTHFGGGKTTGFG,IYDSLDYAKKNEPKHRLARH,P62847,198.0,M,73.0,96.97,-4.509,-3.440,-2.281,-4.063,10.103,9.181,8.978,7.905,5.208,5.789,4.803,6.106,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,8.0,10.0,22.0,67.0,83.0,2.0,0.0,0.0,0.0,0.0,2.000000,2.142857,2.476190,3.476190,4.714286,6.000000,7.333333,16.857143,42.238095,63.095238,2.619048,0.0,2.271450,-2.178688,130.144505,-124.829609
4,QM[649.3660]QVLHPAAR,QM[655.3735]QVLHPAAR,,,,-5.202635,,,,,,,,-5.484248,sp|P50991|TCPD_HUMAN,P50991,TCPD_HUMAN,CCT4,T-complex protein 1 subunit delta,0.016772,1.775418,-5.343441,81,M81,TCPD_M81,blue,MPENVAPRSGATAGAAGGRGKGAYQDRDKPAQIRFSNISAAKAVAD...,QMQVLHPAAR,79,10,Q,1,80,IQDGKGDVTITNDGATILKQ,QVLHPAARMLVELSKAQDIE,P50991,161.0,M,80.0,87.61,-13.321,-13.160,-13.997,-11.760,10.965,9.434,8.723,9.017,-10.127,-10.178,-9.101,-10.069,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,3.0,4.0,10.0,43.0,77.0,3.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.095238,3.571429,4.333333,5.428571,5.904762,16.952381,55.571429,105.619048,5.380952,0.0,2.297773,-1.229620,131.652676,-70.452048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,TWM[649.3660]WWHNFR,TWM[655.3735]WWHNFR,,,-0.384153,,,,,-0.379949,0.660619,,-0.343055,,sp|O14744|ANM5_HUMAN,O14744,ANM5_HUMAN,PRMT5,Protein arginine N-methyltransferase 5,0.694002,0.158639,-0.111634,187,M187,ANM5_M187,gray,MAAMAVGGAGGSRVSSGRDLNCVPEIADTLGAVAKQGFDFLCMPVF...,TWMWWHNFR,184,9,TW,2,186,IIENAPTTHTEEYSGEEKTW,WWHNFRTLCDYSKRIAVALE,O14744,8.0,M,186.0,94.73,-7.056,-5.524,-4.932,-4.929,-18.274,-18.277,-19.603,-17.159,12.457,12.363,12.863,13.098,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,6.0,6.0,8.0,8.0,18.0,76.0,140.0,2.0,0.0,0.0,0.0,0.0,1.666667,1.666667,2.238095,3.761905,4.571429,5.714286,6.190476,16.047619,50.666667,105.571429,4.619048,0.0,-0.668824,-1.106320,-38.320821,-63.387457
648,M[649.3660]EELHNQEVQK,M[655.3735]EELHNQEVQK,-2.289166,-2.214471,,-2.123250,3.901065,4.142890,-1.931592,3.755303,-2.070951,-2.075727,,3.876525,sp|Q15233|NONO_HUMAN,Q15233,NONO_HUMAN,NONO,Non-POU domain-containing octamer-binding protein,0.770188,0.113403,0.297063,326,M326,NONO_M326,gray,MQSNKTFNLEKQNHTPRKHHQHHHQQQHHQQQQQQPPPPPIPANGQ...,MEELHNQEVQK,325,11,,0,325,EHQVMLMRQDLMRRQEELRR,EELHNQEVQKRKQLELRQEE,Q15233,264.0,M,325.0,91.62,-38.460,-37.724,-36.207,-38.218,-12.406,-13.018,-12.828,-12.483,45.845,44.646,44.784,43.371,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,12.0,18.0,26.0,1.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.428571,5.047619,5.857143,7.619048,7.619048,11.904762,18.952381,26.047619,2.238095,1.0,-0.724554,-1.152084,-41.513904,-66.009532
649,LAM[649.3660]QLEEQASR,LAM[655.3735]QLEEQASR,,0.426347,,,,,,-0.483441,,,0.191181,,sp|Q99661|KIF2C_HUMAN,Q99661,KIF2C_HUMAN,KIF2C,Kinesin-like protein KIF2C,0.884856,0.053127,0.044696,708,M708,KIF2C_M708,gray,MAMDSSLQARLFPGLAIKIQRSNGLIHSANVRTVNLEKSCVSVEWA...,LAMQLEEQASR,705,11,LA,2,707,AQQAKHFSALRDVIKALRLA,QLEEQASRQISSKKRPQ,Q99661,334.0,M,707.0,88.83,-24.981,-25.086,-25.539,-25.988,1.897,1.274,-0.189,2.016,-16.063,-14.663,-14.792,-13.773,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,15.0,38.0,61.0,2.0,0.0,0.0,0.0,0.0,1.952381,1.952381,2.095238,4.761905,5.333333,7.095238,7.333333,13.714286,31.571429,51.142857,3.047619,0.0,-0.693338,-1.070541,-39.725343,-61.337507
650,QNFHM[649.3660]EQLK,QNFHM[655.3735]EQLK,,1.544593,,,-0.553917,,0.962793,-2.636370,,1.440900,,-1.252720,sp|Q92922|SMRC1_HUMAN,Q92922,SMRC1_HUMAN,SMARCC1,SWI/SNF complex subunit SMARCC1,0.909191,0.041345,-0.082454,944,M944,SMRC1_M944,gray,MAAAAGGGGPGTAVGATGSGIAAAAAGLAVYRRKDGGPATKFWESP...,QNFHMEQLK,939,9,QNFH,4,943,EKEALEQQRQQLLTERQNFH,EQLKYAELRARQQMEQQQHG,Q92922,322.0,M,943.0,88.51,-85.839,-85.271,-84.262,-86.313,48.138,48.568,47.535,48.764,-55.863,-54.509,-53.981,-53.499,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,6.0,7.0,8.0,13.0,20.0,26.0,4.0,0.0,0.0,0.0,0.0,2.000000,2.000000,2.619048,4.238095,5.666667,6.666667,7.285714,11.333333,18.714286,24.904762,2.428571,1.0,-0.650127,-1.307328,-37.249556,-74.904401


In [116]:
MsrAKD_with_PSI_and_PHI.to_csv('MsrAKD_with_PSI_and_PHI.csv')

# Next we append the the PSI and PHI angles to our MsrB2KD dataset

In [119]:
MsrBKD_with_alphafold = pd.read_csv('../MsrKD/MsrB2KD_with_alphafold.csv').drop(columns=['Unnamed: 0'])

In [120]:
MsrBKD_with_alphafold

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrB2_KD_1 Log2 Ratio HL,MsrB2_KD_10 Log2 Ratio HL,MsrB2_KD_11 Log2 Ratio HL,MsrB2_KD_12 Log2 Ratio HL,MsrB2_KD_2 Log2 Ratio HL,MsrB2_KD_3 Log2 Ratio HL,MsrB2_KD_4 Log2 Ratio HL,MsrB2_KD_5 Log2 Ratio HL,MsrB2_KD_6 Log2 Ratio HL,MsrB2_KD_7 Log2 Ratio HL,MsrB2_KD_8 Log2 Ratio HL,MsrB2_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,FAGLHFFNPVPVM[649.3660]K,FAGLHFFNPVPVM[655.3735]K,-4.205662,-3.548803,-4.189766,-4.160031,-4.436141,-3.632403,-3.887475,-4.186961,-4.153877,-3.975911,-4.619462,-4.402592,sp|Q16836|HCDH_HUMAN,Q16836,HCDH_HUMAN,HADH,"Hydroxyacyl-coenzyme A dehydrogenase, mitochon...",7.317853e-14,13.135616,-4.116590,178,M178,HCDH_M178,blue,MAFVTRQFMRSVSSSSTASASAKKIIVKHVTVIGGGLMGAGIAQVA...,FAGLHFFNPVPVMK,165,14,FAGLHFFNPVPV,12,177,NATTRQDRFAGLHFFNPVPV,KLVEVIKTPMTSQKTFESLV,Q16836,314.0,M,177.0,96.23,5.346,6.696,6.549,7.369,11.783,11.146,9.997,10.694,3.255,2.932,1.926,4.145,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,6.0,7.0,8.0,17.0,53.0,141.0,6.0,0.0,0.0,0.000000,0.095238,2.000000,2.142857,2.571429,3.571429,5.333333,6.571429,8.333333,26.047619,71.952381,142.619048,6.809524,0.0
1,IGM[649.3660]SVNAIR,IGM[655.3735]SVNAIR,,,,-4.399577,,-3.417119,-3.768051,,,,,-3.827056,sp|P23193|TCEA1_HUMAN,P23193,TCEA1_HUMAN,TCEA1,Transcription elongation factor A protein 1,3.213832e-04,3.492977,-3.852951,48,M48,TCEA1_M48,blue,MEDEVVRFAKKMDKMVQKKNAAGALDLLKELKNIPMTLELLQSTRI...,IGMSVNAIR,45,9,IG,2,47,LKELKNIPMTLELLQSTRIG,SVNAIRKQSTDEEVTSLAKS,P23193,118.0,M,47.0,85.20,-13.510,-13.575,-12.453,-13.473,16.092,15.494,14.467,16.531,14.769,13.367,13.214,12.342,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,5.0,6.0,7.0,10.0,47.0,76.0,1.0,0.0,0.0,0.000000,0.000000,1.904762,1.904762,2.047619,3.238095,3.809524,4.857143,5.714286,12.904762,38.190476,60.285714,3.523810,0.0
2,KMEM[649.3660]EMEQVFEMK,KMEM[655.3735]EMEQVFEMK,-2.274200,-2.855665,-2.947620,-2.942285,-3.161963,-2.854557,-2.779239,-2.276797,-2.444157,-2.012105,-2.494225,-2.825982,sp|Q16181|SEPT7_HUMAN,Q16181,SEPT7_HUMAN,SEPTIN7,Septin-7,2.521873e-11,10.598277,-2.655733,355,M355,SEPT7_M355,blue,MSVSARSAAAEERSVNSSTMVAQQKNLEGYVGFANLPNQVYRKSVK...,KMEMEMEQVFEMK,351,13,KME,3,354,PLAQMEEERREHVAKMKKME,EMEQVFEMKVKEKVQKLKDS,Q16181,308.0,M,354.0,85.48,32.745,31.824,30.377,32.192,18.975,17.770,18.274,17.012,-43.629,-43.383,-43.276,-42.178,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,7.0,7.0,12.0,18.0,25.0,1.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.285714,4.809524,5.571429,7.190476,7.380952,11.857143,18.476190,25.095238,2.047619,1.0
3,LRLEVNLQAM[649.3660]K,LRLEVNLQAM[655.3735]K,,,,,,,-2.033939,,,-1.993405,-2.328148,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,2.469697e-03,2.607356,-2.118498,1565,M1565,MYH9_M1565,blue,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,LRLEVNLQAMK,1555,11,LRLEVNLQA,9,1564,EDELQATEDAKLRLEVNLQA,KAQFERDLQGRDEQSEEKKK,P35579,136.0,M,1564.0,90.10,-40.426,-41.789,-41.686,-42.334,-22.898,-22.643,-21.476,-23.840,36.861,37.517,38.512,38.171,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,12.0,20.0,30.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.476190,5.047619,6.380952,7.904762,7.904762,12.095238,20.142857,29.714286,2.523810,1.0
4,AASDIAM[649.3660]TELPPTHPIR,AASDIAM[655.3735]TELPPTHPIR,-2.630824,,-2.145969,-1.169837,,-2.919210,,,,-1.363350,,,sp|P62258|1433E_HUMAN,P62258,1433E_HUMAN,YWHAE,14-3-3 protein epsilon,3.952954e-03,2.403078,-2.045838,160,M160,1433E_M160,blue,MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLS...,AASDIAMTELPPTHPIR,153,17,AASDIA,6,159,DRKEAAENSLVAYKAASDIA,TELPPTHPIRLGLALNFSVF,P62258,210.0,M,159.0,97.22,17.412,16.157,16.538,15.465,10.734,10.389,10.193,9.199,1.475,2.283,3.757,1.781,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,4.0,7.0,7.0,15.0,48.0,94.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.619048,4.523810,5.857143,7.428571,8.142857,23.523810,60.000000,109.238095,7.190476,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,DQGLSIM[649.3660]VSGK,DQGLSIM[655.3735]VSGK,,2.936602,,,,,,,4.366853,0.658841,,,sp|Q00341|VIGLN_HUMAN,Q00341,VIGLN_HUMAN,HDLBP,Vigilin,1.332075e-01,0.875471,2.654099,128,M128,VIGLN_M128,gray,MSSVAVLTQESFAEHRSGLVPQQIKVATLNSEEESDPPTYKDAFPP...,DQGLSIMVSGK,121,11,DQGLSI,6,127,MQRTGAHLELSLAKDQGLSI,VSGKLDAVMKARKDIVARLQ,Q00341,240.0,M,127.0,86.22,-72.101,-73.128,-73.684,-74.215,15.469,14.420,13.631,15.066,16.503,16.927,15.737,17.661,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,3.0,3.0,6.0,18.0,44.0,63.0,0.0,0.0,0.0,0.000000,0.000000,1.809524,1.809524,2.095238,2.619048,3.190476,4.000000,4.904762,13.142857,29.952381,49.190476,1.952381,0.0
738,EAM[649.3660]NHPGHLK,EAM[655.3735]NHPGHLK,,,,,,,,,2.708239,3.195753,,,sp|P00374|DYR_HUMAN,P00374,DYR_HUMAN,DHFR,Dihydrofolate reductase,5.244905e-02,1.280262,2.951996,126,M126,DYR_M126,gray,MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEG...,EAMNHPGHLK,123,10,EA,2,125,LANKVDMVWIVGGSSVYKEA,NHPGHLKLFVTRIMQDFESD,P00374,58.0,M,125.0,97.14,-15.547,-14.112,-13.186,-14.011,7.590,7.240,7.261,5.943,-7.446,-7.036,-8.259,-6.349,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,5.0,6.0,6.0,28.0,63.0,118.0,9.0,0.0,0.0,0.047619,0.047619,2.000000,2.000000,2.333333,3.714286,5.095238,6.809524,7.857143,22.428571,67.000000,123.000000,5.809524,0.0
739,ALEEAM[649.3660]EQK,ALEEAM[655.3735]EQK,2.683003,,3.269254,,,,,,,,,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,6.250049e-02,1.204117,2.976128,1489,M1489,MYH9_M1489,gray,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,ALEEAMEQK,1483,9,ALEEA,5,1488,AEAREKETKALSLARALEEA,EQKAELERLNKQFRTEMEDL,P35579,136.0,M,1488.0,90.36,-36.962,-36.200,-36.893,-34.802,3.001,4.301,5.533,4.235,77.633,77.927,77.320,77.493,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,6.0,8.0,8.0,12.0,20.0,26.0,3.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.714286,5.000000,6.142857,7.523810,7.523810,11.904762,19.571429,26.333333,2.333333,1.0
740,VTM[649.3660]LFLGLHNVR,VTM[655.3735]LFLGLHNVR,,,,,,,2.743130,,,,,3.350140,sp|P14868|SYDC_HUMAN,P14868,SYDC_HUMAN,DARS1,"Aspartate--tRNA ligase, cytoplasmic",6.321136e-02,1.199205,3.046635,478,M478,SYDC_M478,gray,MPSASASRKSQEKPREIMDAAEDYAKERYGISSMIQSQEKPDRVLV...,VTMLFLGLHNVR,475,12,VT,2,477,SFRFGAPPHAGGGIGLERVT,LFLGLHNVRQTSMFPRDPKR,P14868,99.0,M,477.0,98.53,8.996,7.689,7.922,7.027,0.321,-0.283,-1.634,0.630,4.774,4.265,3.563,3.331,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,11.0,11.0,11.0,27.0,94.0,180.0,11.0,0.0,0.0,0.000000,0.000000,2.047619,2.142857,2.619048,4.142857,6.047619,7.666667,8.428571,28.761905,87.285714,172.428571,8.761905,0.0


In [121]:
MsrBKD_with_alphafold[MsrBKD_with_alphafold['Protein ID'].isin(unique_mismatches)]

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrB2_KD_1 Log2 Ratio HL,MsrB2_KD_10 Log2 Ratio HL,MsrB2_KD_11 Log2 Ratio HL,MsrB2_KD_12 Log2 Ratio HL,MsrB2_KD_2 Log2 Ratio HL,MsrB2_KD_3 Log2 Ratio HL,MsrB2_KD_4 Log2 Ratio HL,MsrB2_KD_5 Log2 Ratio HL,MsrB2_KD_6 Log2 Ratio HL,MsrB2_KD_7 Log2 Ratio HL,MsrB2_KD_8 Log2 Ratio HL,MsrB2_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
266,FNADEFEDM[649.3660]VAEKR,FNADEFEDM[655.3735]VAEKR,,2.620562,,2.630807,,3.185583,4.025693,3.236847,3.320524,,3.316047,,sp|P27635|RL10_HUMAN,P27635,RL10_HUMAN,RPL10,Large ribosomal subunit protein uL16,2.11347e-06,5.675004,3.190866,184,M184,RL10_M184,green,MGRRPARCYRYCKNKPYPKSRFCRGVPDAKIRIFDLGRKKAKVDEF...,FNADEFEDMVAEKR,175,14,FNADEFED,8,183,KIHISKKWGFTKFNADEFED,VAEKRLIPDGCGVKYIPSRG,P27635,126.0,M,183.0,93.74,-24.168,-24.495,-23.644,-24.28,-0.909,0.487,0.727,1.564,14.269,13.711,12.465,14.681,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,9.0,10.0,15.0,36.0,57.0,4.0,0.0,0.0,0.0,0.0,2.0,2.047619,2.380952,3.190476,4.285714,5.52381,6.619048,13.714286,36.571429,61.52381,2.904762,0.0
295,RRM[649.3660]QYNR,RRM[655.3735]QYNR,1.998028,2.366995,2.286614,2.303103,2.031332,1.902578,,2.012649,2.632543,,2.246685,2.169486,sp|P62861|RS30_HUMAN,P62861,RS30_HUMAN,FAU,Ubiquitin-like FUBI-ribosomal protein eS30 fus...,1.476245e-10,9.830842,2.195001,110,M110,RS30_M110,green,MQLFVRAQELHTFEVTGQETVAQIKAHVASLEGIAPEDQVVLLAGA...,RRMQYNR,107,7,RR,2,109,TPKVAKQEKKKKKTGRAKRR,QYNRRFVNVVPTFGKKKGPN,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
310,IM[649.3660]VANIEEVLQR,IM[655.3735]VANIEEVLQR,1.820905,2.000746,1.601487,1.987363,1.789481,2.344528,2.025588,,,2.04816,2.090983,1.510026,sp|O75396|SC22B_HUMAN,O75396,SC22B_HUMAN,SEC22B,Vesicle-trafficking protein SEC22b,1.396103e-09,8.855082,1.921927,149,M149,SC22B_M149,green,MVLLTMIARVADGLPLAASMQEDEQSGRDLQQYQSQAKQLFRKLNE...,IMVANIEEVLQR,147,12,I,1,148,SRARRNLGSINTELQDVQRI,VANIEEVLQRGEALSALDSK,O75396,38.0,M,148.0,89.25,-6.568,-7.906,-7.783,-8.932,-14.719,-14.634,-13.785,-14.095,-2.811,-3.548,-4.821,-2.656,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,4.0,5.0,11.0,33.0,73.0,2.0,0.0,0.0,0.0,0.0,1.333333,1.333333,2.047619,2.571429,2.809524,3.428571,3.619048,8.142857,22.666667,46.52381,1.190476,0.0
508,LSMVM[649.3660]YLSK,LSMVM[655.3735]YLSK,,0.943869,,,,,,,,,0.834543,,sp|O94851|MICA2_HUMAN,O94851,MICA2_HUMAN,MICAL2,[F-actin]-monooxygenase MICAL2,0.03908627,1.407976,0.889206,612,M612,MICA2_M612,red,MGENEDEKQAQAGQVFENFVQASTCKGTLQAFNILTRHLDLDPLDH...,LSMVMYLSK,607,9,LSMV,4,611,PVTTGKEMASAQEPDKLSMV,YLSKFYELFRGTPLRPVDSW,O94851,46.0,M,611.0,89.2,-2.752,-1.237,-0.735,-0.46,43.933,43.765,44.783,43.904,-4.532,-4.332,-3.296,-5.571,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,5.0,6.0,7.0,8.0,19.0,53.0,91.0,3.0,0.0,0.0,0.0,0.0,2.0,2.0,2.190476,3.47619,4.952381,5.857143,6.190476,15.380952,47.095238,80.571429,4.285714,0.0
627,TNATNNM[649.3660]NLSR,TNATNNM[655.3735]NLSR,,,-1.202873,,-1.711192,,,,,,,,sp|Q9ULT8|HECD1_HUMAN,Q9ULT8,HECD1_HUMAN,HECTD1,E3 ubiquitin-protein ligase HECTD1,0.1099435,0.95883,-1.457033,1564,M1564,HECD1_M1564,gray,MADVDPDTLLEWLQMGQGDERDMQLIALEQLCMLLLMSDNVDRCFE...,TNATNNMNLSR,1557,11,TNATNN,6,1563,SLESFVRRVANIARTNATNN,NLSRSSSDNNTNTLGRNVMS,Q9ULT8,434.0,M,1563.0,26.35,17.416,17.853,17.947,16.845,-0.245,1.184,1.989,1.817,-15.594,-15.927,-14.603,-16.819,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,11.0,0.0,0.0,0.0,0.0,0.0,0.952381,1.047619,1.904762,2.0,2.0,2.0,2.0,4.0,8.142857,10.952381,0.0,1.0
737,DQGLSIM[649.3660]VSGK,DQGLSIM[655.3735]VSGK,,2.936602,,,,,,,4.366853,0.658841,,,sp|Q00341|VIGLN_HUMAN,Q00341,VIGLN_HUMAN,HDLBP,Vigilin,0.1332075,0.875471,2.654099,128,M128,VIGLN_M128,gray,MSSVAVLTQESFAEHRSGLVPQQIKVATLNSEEESDPPTYKDAFPP...,DQGLSIMVSGK,121,11,DQGLSI,6,127,MQRTGAHLELSLAKDQGLSI,VSGKLDAVMKARKDIVARLQ,Q00341,240.0,M,127.0,86.22,-72.101,-73.128,-73.684,-74.215,15.469,14.42,13.631,15.066,16.503,16.927,15.737,17.661,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,3.0,3.0,6.0,18.0,44.0,63.0,0.0,0.0,0.0,0.0,0.0,1.809524,1.809524,2.095238,2.619048,3.190476,4.0,4.904762,13.142857,29.952381,49.190476,1.952381,0.0


In [122]:
MsrBKD_with_alphafold_wo_mismatches = MsrBKD_with_alphafold.drop(MsrBKD_with_alphafold[MsrBKD_with_alphafold['Protein ID'].isin(unique_mismatches)].index).reset_index(drop = True)

In [123]:
MsrBKD_with_alphafold_wo_mismatches

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrB2_KD_1 Log2 Ratio HL,MsrB2_KD_10 Log2 Ratio HL,MsrB2_KD_11 Log2 Ratio HL,MsrB2_KD_12 Log2 Ratio HL,MsrB2_KD_2 Log2 Ratio HL,MsrB2_KD_3 Log2 Ratio HL,MsrB2_KD_4 Log2 Ratio HL,MsrB2_KD_5 Log2 Ratio HL,MsrB2_KD_6 Log2 Ratio HL,MsrB2_KD_7 Log2 Ratio HL,MsrB2_KD_8 Log2 Ratio HL,MsrB2_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,FAGLHFFNPVPVM[649.3660]K,FAGLHFFNPVPVM[655.3735]K,-4.205662,-3.548803,-4.189766,-4.160031,-4.436141,-3.632403,-3.887475,-4.186961,-4.153877,-3.975911,-4.619462,-4.402592,sp|Q16836|HCDH_HUMAN,Q16836,HCDH_HUMAN,HADH,"Hydroxyacyl-coenzyme A dehydrogenase, mitochon...",7.317853e-14,13.135616,-4.116590,178,M178,HCDH_M178,blue,MAFVTRQFMRSVSSSSTASASAKKIIVKHVTVIGGGLMGAGIAQVA...,FAGLHFFNPVPVMK,165,14,FAGLHFFNPVPV,12,177,NATTRQDRFAGLHFFNPVPV,KLVEVIKTPMTSQKTFESLV,Q16836,314.0,M,177.0,96.23,5.346,6.696,6.549,7.369,11.783,11.146,9.997,10.694,3.255,2.932,1.926,4.145,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,6.0,7.0,8.0,17.0,53.0,141.0,6.0,0.0,0.0,0.000000,0.095238,2.000000,2.142857,2.571429,3.571429,5.333333,6.571429,8.333333,26.047619,71.952381,142.619048,6.809524,0.0
1,IGM[649.3660]SVNAIR,IGM[655.3735]SVNAIR,,,,-4.399577,,-3.417119,-3.768051,,,,,-3.827056,sp|P23193|TCEA1_HUMAN,P23193,TCEA1_HUMAN,TCEA1,Transcription elongation factor A protein 1,3.213832e-04,3.492977,-3.852951,48,M48,TCEA1_M48,blue,MEDEVVRFAKKMDKMVQKKNAAGALDLLKELKNIPMTLELLQSTRI...,IGMSVNAIR,45,9,IG,2,47,LKELKNIPMTLELLQSTRIG,SVNAIRKQSTDEEVTSLAKS,P23193,118.0,M,47.0,85.20,-13.510,-13.575,-12.453,-13.473,16.092,15.494,14.467,16.531,14.769,13.367,13.214,12.342,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,5.0,6.0,7.0,10.0,47.0,76.0,1.0,0.0,0.0,0.000000,0.000000,1.904762,1.904762,2.047619,3.238095,3.809524,4.857143,5.714286,12.904762,38.190476,60.285714,3.523810,0.0
2,KMEM[649.3660]EMEQVFEMK,KMEM[655.3735]EMEQVFEMK,-2.274200,-2.855665,-2.947620,-2.942285,-3.161963,-2.854557,-2.779239,-2.276797,-2.444157,-2.012105,-2.494225,-2.825982,sp|Q16181|SEPT7_HUMAN,Q16181,SEPT7_HUMAN,SEPTIN7,Septin-7,2.521873e-11,10.598277,-2.655733,355,M355,SEPT7_M355,blue,MSVSARSAAAEERSVNSSTMVAQQKNLEGYVGFANLPNQVYRKSVK...,KMEMEMEQVFEMK,351,13,KME,3,354,PLAQMEEERREHVAKMKKME,EMEQVFEMKVKEKVQKLKDS,Q16181,308.0,M,354.0,85.48,32.745,31.824,30.377,32.192,18.975,17.770,18.274,17.012,-43.629,-43.383,-43.276,-42.178,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,7.0,7.0,12.0,18.0,25.0,1.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.285714,4.809524,5.571429,7.190476,7.380952,11.857143,18.476190,25.095238,2.047619,1.0
3,LRLEVNLQAM[649.3660]K,LRLEVNLQAM[655.3735]K,,,,,,,-2.033939,,,-1.993405,-2.328148,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,2.469697e-03,2.607356,-2.118498,1565,M1565,MYH9_M1565,blue,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,LRLEVNLQAMK,1555,11,LRLEVNLQA,9,1564,EDELQATEDAKLRLEVNLQA,KAQFERDLQGRDEQSEEKKK,P35579,136.0,M,1564.0,90.10,-40.426,-41.789,-41.686,-42.334,-22.898,-22.643,-21.476,-23.840,36.861,37.517,38.512,38.171,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,12.0,20.0,30.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.476190,5.047619,6.380952,7.904762,7.904762,12.095238,20.142857,29.714286,2.523810,1.0
4,AASDIAM[649.3660]TELPPTHPIR,AASDIAM[655.3735]TELPPTHPIR,-2.630824,,-2.145969,-1.169837,,-2.919210,,,,-1.363350,,,sp|P62258|1433E_HUMAN,P62258,1433E_HUMAN,YWHAE,14-3-3 protein epsilon,3.952954e-03,2.403078,-2.045838,160,M160,1433E_M160,blue,MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLS...,AASDIAMTELPPTHPIR,153,17,AASDIA,6,159,DRKEAAENSLVAYKAASDIA,TELPPTHPIRLGLALNFSVF,P62258,210.0,M,159.0,97.22,17.412,16.157,16.538,15.465,10.734,10.389,10.193,9.199,1.475,2.283,3.757,1.781,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,4.0,7.0,7.0,15.0,48.0,94.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.619048,4.523810,5.857143,7.428571,8.142857,23.523810,60.000000,109.238095,7.190476,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,NM[649.3660]SIIDAFK,NM[655.3735]SIIDAFK,,,,3.080768,,,1.972472,,,,,,sp|P49959|MRE11_HUMAN,P49959,MRE11_HUMAN,MRE11,Double-strand break repair protein MRE11,1.374496e-01,0.861856,2.526620,618,M618,MRE11_M618,gray,MSTADALDDENTFKILVATDIHLGFMEKDAVRGNDTFVTLDEILRL...,NMSIIDAFK,616,9,N,1,617,GLETSTRSRNSKTAVSASRN,SIIDAFKSTRQQPSRNVTTK,P49959,173.0,M,617.0,39.20,-30.124,-30.749,-32.206,-29.952,1.516,2.634,2.891,3.850,9.244,10.076,9.659,9.903,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,8.0,13.0,0.0,0.0,0.0,0.000000,0.000000,1.285714,1.285714,2.000000,2.000000,2.000000,2.142857,2.380952,5.000000,8.047619,11.619048,0.285714,1.0
732,EAM[649.3660]NHPGHLK,EAM[655.3735]NHPGHLK,,,,,,,,,2.708239,3.195753,,,sp|P00374|DYR_HUMAN,P00374,DYR_HUMAN,DHFR,Dihydrofolate reductase,5.244905e-02,1.280262,2.951996,126,M126,DYR_M126,gray,MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEG...,EAMNHPGHLK,123,10,EA,2,125,LANKVDMVWIVGGSSVYKEA,NHPGHLKLFVTRIMQDFESD,P00374,58.0,M,125.0,97.14,-15.547,-14.112,-13.186,-14.011,7.590,7.240,7.261,5.943,-7.446,-7.036,-8.259,-6.349,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,5.0,6.0,6.0,28.0,63.0,118.0,9.0,0.0,0.0,0.047619,0.047619,2.000000,2.000000,2.333333,3.714286,5.095238,6.809524,7.857143,22.428571,67.000000,123.000000,5.809524,0.0
733,ALEEAM[649.3660]EQK,ALEEAM[655.3735]EQK,2.683003,,3.269254,,,,,,,,,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,6.250049e-02,1.204117,2.976128,1489,M1489,MYH9_M1489,gray,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,ALEEAMEQK,1483,9,ALEEA,5,1488,AEAREKETKALSLARALEEA,EQKAELERLNKQFRTEMEDL,P35579,136.0,M,1488.0,90.36,-36.962,-36.200,-36.893,-34.802,3.001,4.301,5.533,4.235,77.633,77.927,77.320,77.493,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,6.0,8.0,8.0,12.0,20.0,26.0,3.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.714286,5.000000,6.142857,7.523810,7.523810,11.904762,19.571429,26.333333,2.333333,1.0
734,VTM[649.3660]LFLGLHNVR,VTM[655.3735]LFLGLHNVR,,,,,,,2.743130,,,,,3.350140,sp|P14868|SYDC_HUMAN,P14868,SYDC_HUMAN,DARS1,"Aspartate--tRNA ligase, cytoplasmic",6.321136e-02,1.199205,3.046635,478,M478,SYDC_M478,gray,MPSASASRKSQEKPREIMDAAEDYAKERYGISSMIQSQEKPDRVLV...,VTMLFLGLHNVR,475,12,VT,2,477,SFRFGAPPHAGGGIGLERVT,LFLGLHNVRQTSMFPRDPKR,P14868,99.0,M,477.0,98.53,8.996,7.689,7.922,7.027,0.321,-0.283,-1.634,0.630,4.774,4.265,3.563,3.331,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,11.0,11.0,11.0,27.0,94.0,180.0,11.0,0.0,0.0,0.000000,0.000000,2.047619,2.142857,2.619048,4.142857,6.047619,7.666667,8.428571,28.761905,87.285714,172.428571,8.761905,0.0


In [124]:
MsrBKD_psi_and_psi_map = extract_PSI_and_PHI(MsrBKD_with_alphafold_wo_mismatches, concat_dihedrals)

       Protein ID Residue Name  Residue Position       PHI       PSI  \
161422     Q16836          MET               178 -1.189476  2.171959   

        PSI degrees  PHI degrees  
161422   124.444077   -68.151944  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
398321     P23193          MET                48 -1.241461 -0.818135   

        PSI degrees  PHI degrees  
398321   -46.875675    -71.13047  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
855841     Q16181          MET               355 -1.061428 -0.730723   

        PSI degrees  PHI degrees  
855841   -41.867347   -60.815364  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
185439     P35579          MET              1565 -1.089169 -0.665294   

        PSI degrees  PHI degrees  
185439    -38.11856   -62.404803  


  df = df._append(new_row, ignore_index = True)


       Protein ID Residue Name  Residue Position       PHI       PSI  \
686004     P62258          MET               160 -1.130868 -0.516803   

        PSI degrees  PHI degrees  
686004   -29.610647   -64.793967  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
297777     P46109          MET               172 -1.361575  2.357828   

        PSI degrees  PHI degrees  
297777   135.093596   -78.012513  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
166655     P55072          MET               740 -1.146523 -0.555659   

        PSI degrees  PHI degrees  
166655   -31.836928   -65.690956  
       Protein ID Residue Name  Residue Position       PHI       PSI  \
466917     Q9Y265          MET               113 -1.094776 -0.710135   

        PSI degrees  PHI degrees  
466917    -40.68774   -62.726016  
       Protein ID Residue Name  Residue Position       PHI      PSI  \
857795     P25205          MET               562 -1.455569  2.51504  

In [125]:
sum(MsrBKD_psi_and_psi_map['PHI Degrees'].isna())

0

In [126]:
MsrBKD_psi_and_psi_map

Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,2.171959,-1.189476,124.444077,-68.151944
1,-0.818135,-1.241461,-46.875675,-71.130470
2,-0.730723,-1.061428,-41.867347,-60.815364
3,-0.665294,-1.089169,-38.118560,-62.404803
4,-0.516803,-1.130868,-29.610647,-64.793967
...,...,...,...,...
731,2.407488,-1.480153,137.938905,-84.806547
732,-0.487434,-1.169793,-27.927937,-67.024224
733,-0.653646,-1.338789,-37.451150,-76.706966
734,-0.905823,-1.087117,-51.899856,-62.287222


In [127]:
MsrBKD_with_PSI_and_PHI = pd.concat([MsrBKD_with_alphafold_wo_mismatches, MsrBKD_psi_and_psi_map], axis=1)

In [128]:
MsrBKD_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrB2_KD_1 Log2 Ratio HL,MsrB2_KD_10 Log2 Ratio HL,MsrB2_KD_11 Log2 Ratio HL,MsrB2_KD_12 Log2 Ratio HL,MsrB2_KD_2 Log2 Ratio HL,MsrB2_KD_3 Log2 Ratio HL,MsrB2_KD_4 Log2 Ratio HL,MsrB2_KD_5 Log2 Ratio HL,MsrB2_KD_6 Log2 Ratio HL,MsrB2_KD_7 Log2 Ratio HL,MsrB2_KD_8 Log2 Ratio HL,MsrB2_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,FAGLHFFNPVPVM[649.3660]K,FAGLHFFNPVPVM[655.3735]K,-4.205662,-3.548803,-4.189766,-4.160031,-4.436141,-3.632403,-3.887475,-4.186961,-4.153877,-3.975911,-4.619462,-4.402592,sp|Q16836|HCDH_HUMAN,Q16836,HCDH_HUMAN,HADH,"Hydroxyacyl-coenzyme A dehydrogenase, mitochon...",7.317853e-14,13.135616,-4.116590,178,M178,HCDH_M178,blue,MAFVTRQFMRSVSSSSTASASAKKIIVKHVTVIGGGLMGAGIAQVA...,FAGLHFFNPVPVMK,165,14,FAGLHFFNPVPV,12,177,NATTRQDRFAGLHFFNPVPV,KLVEVIKTPMTSQKTFESLV,Q16836,314.0,M,177.0,96.23,5.346,6.696,6.549,7.369,11.783,11.146,9.997,10.694,3.255,2.932,1.926,4.145,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,6.0,7.0,8.0,17.0,53.0,141.0,6.0,0.0,0.0,0.000000,0.095238,2.000000,2.142857,2.571429,3.571429,5.333333,6.571429,8.333333,26.047619,71.952381,142.619048,6.809524,0.0,2.171959,-1.189476,124.444077,-68.151944
1,IGM[649.3660]SVNAIR,IGM[655.3735]SVNAIR,,,,-4.399577,,-3.417119,-3.768051,,,,,-3.827056,sp|P23193|TCEA1_HUMAN,P23193,TCEA1_HUMAN,TCEA1,Transcription elongation factor A protein 1,3.213832e-04,3.492977,-3.852951,48,M48,TCEA1_M48,blue,MEDEVVRFAKKMDKMVQKKNAAGALDLLKELKNIPMTLELLQSTRI...,IGMSVNAIR,45,9,IG,2,47,LKELKNIPMTLELLQSTRIG,SVNAIRKQSTDEEVTSLAKS,P23193,118.0,M,47.0,85.20,-13.510,-13.575,-12.453,-13.473,16.092,15.494,14.467,16.531,14.769,13.367,13.214,12.342,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,5.0,6.0,7.0,10.0,47.0,76.0,1.0,0.0,0.0,0.000000,0.000000,1.904762,1.904762,2.047619,3.238095,3.809524,4.857143,5.714286,12.904762,38.190476,60.285714,3.523810,0.0,-0.818135,-1.241461,-46.875675,-71.130470
2,KMEM[649.3660]EMEQVFEMK,KMEM[655.3735]EMEQVFEMK,-2.274200,-2.855665,-2.947620,-2.942285,-3.161963,-2.854557,-2.779239,-2.276797,-2.444157,-2.012105,-2.494225,-2.825982,sp|Q16181|SEPT7_HUMAN,Q16181,SEPT7_HUMAN,SEPTIN7,Septin-7,2.521873e-11,10.598277,-2.655733,355,M355,SEPT7_M355,blue,MSVSARSAAAEERSVNSSTMVAQQKNLEGYVGFANLPNQVYRKSVK...,KMEMEMEQVFEMK,351,13,KME,3,354,PLAQMEEERREHVAKMKKME,EMEQVFEMKVKEKVQKLKDS,Q16181,308.0,M,354.0,85.48,32.745,31.824,30.377,32.192,18.975,17.770,18.274,17.012,-43.629,-43.383,-43.276,-42.178,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,7.0,7.0,12.0,18.0,25.0,1.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.285714,4.809524,5.571429,7.190476,7.380952,11.857143,18.476190,25.095238,2.047619,1.0,-0.730723,-1.061428,-41.867347,-60.815364
3,LRLEVNLQAM[649.3660]K,LRLEVNLQAM[655.3735]K,,,,,,,-2.033939,,,-1.993405,-2.328148,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,2.469697e-03,2.607356,-2.118498,1565,M1565,MYH9_M1565,blue,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,LRLEVNLQAMK,1555,11,LRLEVNLQA,9,1564,EDELQATEDAKLRLEVNLQA,KAQFERDLQGRDEQSEEKKK,P35579,136.0,M,1564.0,90.10,-40.426,-41.789,-41.686,-42.334,-22.898,-22.643,-21.476,-23.840,36.861,37.517,38.512,38.171,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,12.0,20.0,30.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.476190,5.047619,6.380952,7.904762,7.904762,12.095238,20.142857,29.714286,2.523810,1.0,-0.665294,-1.089169,-38.118560,-62.404803
4,AASDIAM[649.3660]TELPPTHPIR,AASDIAM[655.3735]TELPPTHPIR,-2.630824,,-2.145969,-1.169837,,-2.919210,,,,-1.363350,,,sp|P62258|1433E_HUMAN,P62258,1433E_HUMAN,YWHAE,14-3-3 protein epsilon,3.952954e-03,2.403078,-2.045838,160,M160,1433E_M160,blue,MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLS...,AASDIAMTELPPTHPIR,153,17,AASDIA,6,159,DRKEAAENSLVAYKAASDIA,TELPPTHPIRLGLALNFSVF,P62258,210.0,M,159.0,97.22,17.412,16.157,16.538,15.465,10.734,10.389,10.193,9.199,1.475,2.283,3.757,1.781,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,4.0,7.0,7.0,15.0,48.0,94.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.619048,4.523810,5.857143,7.428571,8.142857,23.523810,60.000000,109.238095,7.190476,0.0,-0.516803,-1.130868,-29.610647,-64.793967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,NM[649.3660]SIIDAFK,NM[655.3735]SIIDAFK,,,,3.080768,,,1.972472,,,,,,sp|P49959|MRE11_HUMAN,P49959,MRE11_HUMAN,MRE11,Double-strand break repair protein MRE11,1.374496e-01,0.861856,2.526620,618,M618,MRE11_M618,gray,MSTADALDDENTFKILVATDIHLGFMEKDAVRGNDTFVTLDEILRL...,NMSIIDAFK,616,9,N,1,617,GLETSTRSRNSKTAVSASRN,SIIDAFKSTRQQPSRNVTTK,P49959,173.0,M,617.0,39.20,-30.124,-30.749,-32.206,-29.952,1.516,2.634,2.891,3.850,9.244,10.076,9.659,9.903,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,8.0,13.0,0.0,0.0,0.0,0.000000,0.000000,1.285714,1.285714,2.000000,2.000000,2.000000,2.142857,2.380952,5.000000,8.047619,11.619048,0.285714,1.0,2.407488,-1.480153,137.938905,-84.806547
732,EAM[649.3660]NHPGHLK,EAM[655.3735]NHPGHLK,,,,,,,,,2.708239,3.195753,,,sp|P00374|DYR_HUMAN,P00374,DYR_HUMAN,DHFR,Dihydrofolate reductase,5.244905e-02,1.280262,2.951996,126,M126,DYR_M126,gray,MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEG...,EAMNHPGHLK,123,10,EA,2,125,LANKVDMVWIVGGSSVYKEA,NHPGHLKLFVTRIMQDFESD,P00374,58.0,M,125.0,97.14,-15.547,-14.112,-13.186,-14.011,7.590,7.240,7.261,5.943,-7.446,-7.036,-8.259,-6.349,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,5.0,6.0,6.0,28.0,63.0,118.0,9.0,0.0,0.0,0.047619,0.047619,2.000000,2.000000,2.333333,3.714286,5.095238,6.809524,7.857143,22.428571,67.000000,123.000000,5.809524,0.0,-0.487434,-1.169793,-27.927937,-67.024224
733,ALEEAM[649.3660]EQK,ALEEAM[655.3735]EQK,2.683003,,3.269254,,,,,,,,,,sp|P35579|MYH9_HUMAN,P35579,MYH9_HUMAN,MYH9,Myosin-9,6.250049e-02,1.204117,2.976128,1489,M1489,MYH9_M1489,gray,MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASL...,ALEEAMEQK,1483,9,ALEEA,5,1488,AEAREKETKALSLARALEEA,EQKAELERLNKQFRTEMEDL,P35579,136.0,M,1488.0,90.36,-36.962,-36.200,-36.893,-34.802,3.001,4.301,5.533,4.235,77.633,77.927,77.320,77.493,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,6.0,8.0,8.0,12.0,20.0,26.0,3.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.714286,5.000000,6.142857,7.523810,7.523810,11.904762,19.571429,26.333333,2.333333,1.0,-0.653646,-1.338789,-37.451150,-76.706966
734,VTM[649.3660]LFLGLHNVR,VTM[655.3735]LFLGLHNVR,,,,,,,2.743130,,,,,3.350140,sp|P14868|SYDC_HUMAN,P14868,SYDC_HUMAN,DARS1,"Aspartate--tRNA ligase, cytoplasmic",6.321136e-02,1.199205,3.046635,478,M478,SYDC_M478,gray,MPSASASRKSQEKPREIMDAAEDYAKERYGISSMIQSQEKPDRVLV...,VTMLFLGLHNVR,475,12,VT,2,477,SFRFGAPPHAGGGIGLERVT,LFLGLHNVRQTSMFPRDPKR,P14868,99.0,M,477.0,98.53,8.996,7.689,7.922,7.027,0.321,-0.283,-1.634,0.630,4.774,4.265,3.563,3.331,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,11.0,11.0,11.0,27.0,94.0,180.0,11.0,0.0,0.0,0.000000,0.000000,2.047619,2.142857,2.619048,4.142857,6.047619,7.666667,8.428571,28.761905,87.285714,172.428571,8.761905,0.0,-0.905823,-1.087117,-51.899856,-62.287222


In [129]:
MsrBKD_with_PSI_and_PHI.to_csv('MsrBKD_with_PSI_and_PHI.csv', index=False)

In [130]:
MsrBKD_with_alphafold[MsrBKD_with_alphafold['color'] == 'green']

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrB2_KD_1 Log2 Ratio HL,MsrB2_KD_10 Log2 Ratio HL,MsrB2_KD_11 Log2 Ratio HL,MsrB2_KD_12 Log2 Ratio HL,MsrB2_KD_2 Log2 Ratio HL,MsrB2_KD_3 Log2 Ratio HL,MsrB2_KD_4 Log2 Ratio HL,MsrB2_KD_5 Log2 Ratio HL,MsrB2_KD_6 Log2 Ratio HL,MsrB2_KD_7 Log2 Ratio HL,MsrB2_KD_8 Log2 Ratio HL,MsrB2_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
40,VEAM[649.3660]LNDR,VEAM[655.3735]LNDR,,,,0.700121,,,,0.958972,,0.978685,0.959687,,sp|P05067|A4_HUMAN,P05067,A4_HUMAN,APP,Amyloid-beta precursor protein,8.770640e-04,3.056969,0.899366,464,M464,A4_M464,green,MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMN...,VEAMLNDR,460,8,VEA,3,463,QEAANERQQLVETHMARVEA,LNDRRRLALENYITALQAVP,P05067,62.0,M,463.0,86.95,-22.078,-22.171,-22.213,-21.055,6.510,7.455,6.603,8.412,-6.406,-7.618,-8.895,-7.694,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,8.0,8.0,13.0,42.0,75.0,3.0,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.142857,4.761905,6.142857,8.238095,8.333333,16.428571,44.809524,82.047619,4.523810,0.0
41,HQGVM[15.9949]VGM[649.3660]GQK,HQGVM[15.9949]VGM[655.3735]GQK,1.577324,1.505739,1.482676,1.531861,1.495393,1.450624,1.340681,1.555433,1.748100,1.915088,1.423399,1.478074,sp|P60709|ACTB_HUMAN,P60709,ACTB_HUMAN,ACTB,"Actin, cytoplasmic 1",1.270410e-12,11.896056,1.542033,47,M47,ACTB_M47,green,MDDDIAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVG...,HQGVMVGMGQK,39,11,HQGVMVG,7,46,PRAVFPSIVGRPRHQGVMVG,GQKDSYVGDEAQSKRGILTL,P60709,197.0,M,46.0,61.75,12.267,11.409,9.991,11.355,7.885,7.325,6.967,8.175,30.308,31.460,30.979,32.674,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,6.0,15.0,1.0,0.0,0.0,0.0,0.000000,1.571429,1.571429,2.333333,2.714286,3.095238,3.714286,4.571429,10.619048,27.190476,49.714286,1.857143,0.0
42,YPIEHGIITNWDDM[649.3660]EK,YPIEHGIITNWDDM[655.3735]EK,1.561588,,1.378782,,1.422322,,1.373396,,,,,1.371021,sp|P68032|ACTC_HUMAN,P68032,ACTC_HUMAN,ACTC1,"Actin, alpha cardiac muscle 1",2.533007e-06,5.596364,1.421422,84,M84,ACTC_M84,green,MCDDEETTALVCDNGSGLVKAGFAGDDAPRAVFPSIVGRPRHQGVM...,YPIEHGIITNWDDMEK,70,16,YPIEHGIITNWDD,13,83,RGILTLKYPIEHGIITNWDD,EKIWHHTFYNELRVAPEEHP,P68032,227.0,M,83.0,98.30,6.616,5.123,4.820,4.264,15.109,14.877,14.995,15.782,7.068,6.785,5.284,7.549,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,5.0,7.0,9.0,9.0,27.0,81.0,152.0,10.0,0.0,0.0,0.0,0.000000,2.000000,2.095238,2.523810,4.380952,5.476190,7.142857,8.047619,22.047619,66.714286,128.238095,6.142857,0.0
43,M[649.3660]LDNLGYR,M[655.3735]LDNLGYR,0.148525,2.170646,2.166039,,0.327642,,1.251783,1.324078,,1.797857,1.393138,0.391504,sp|P35611|ADDA_HUMAN,P35611,ADDA_HUMAN,ADD1,Alpha-adducin,1.504693e-03,2.822552,1.219024,383,M383,ADDA_M383,green,MNGDSRAAVVTSPPPTTAPHKERYFDRVDENNPEYLRERNMAPDLR...,MLDNLGYR,382,8,,0,382,GTGSPPKWQIGEQEFEALMR,LDNLGYRTGYPYRYPALREK,P35611,137.0,M,382.0,91.87,0.064,0.140,-1.022,1.421,-13.451,-14.014,-13.523,-13.683,-6.785,-5.357,-4.478,-4.718,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,5.0,6.0,7.0,8.0,16.0,49.0,85.0,4.0,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.190476,3.952381,5.142857,6.428571,7.190476,16.714286,48.190476,92.666667,5.000000,0.0
44,LAAEVYKDM[649.3660]PETSFTR,LAAEVYKDM[655.3735]PETSFTR,1.459202,,1.293908,,0.119468,1.083899,0.407497,,,,1.103427,0.869660,sp|P55196|AFAD_HUMAN,P55196,AFAD_HUMAN,AFDN,Afadin,2.536124e-03,2.595830,0.905294,206,M206,AFAD_M206,green,MSAGGRDEERRKLADIIHHWNANRLDLFEISQPTEDLEFHGVMRFY...,LAAEVYKDMPETSFTR,197,16,LAAEVYKD,8,205,RPFQGEDVENSRLAAEVYKD,PETSFTRTISNPEVVMKRRR,P55196,192.0,M,205.0,52.18,-12.180,-12.049,-13.334,-11.737,-22.065,-21.411,-20.678,-22.407,-27.588,-28.974,-29.380,-29.997,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,18.0,40.0,2.0,0.0,0.0,0.0,0.000000,1.571429,1.571429,2.095238,2.428571,2.809524,3.190476,3.809524,7.142857,16.761905,32.523810,1.047619,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
411,HQM[649.3660]LHHQR,HQM[655.3735]LHHQR,1.468272,1.390230,1.397217,1.042584,1.414421,1.522004,1.276547,1.524151,1.307795,1.763019,1.216185,1.257959,sp|P49750|YLPM1_HUMAN,P49750,YLPM1_HUMAN,YLPM1,YLP motif-containing protein 1,2.961233e-11,10.528527,1.381699,127,M127,YLPM1_M127,green,MYPNWGRYGGSSHYPPPPVPPPPPVALPEASPGPGYSSSTTPAAPS...,HQMLHHQR,124,8,HQ,2,126,MPPPPGPALSYQKQQQYKHQ,LHHQRDGPPGLVPMELESPP,P49750,169.0,M,126.0,26.42,-15.347,-15.831,-17.321,-15.432,34.015,33.489,33.832,32.086,67.469,66.110,65.904,65.862,BEND,BEND,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,7.0,10.0,0.0,0.0,0.0,0.0,0.619048,1.523810,1.666667,2.000000,2.000000,2.000000,2.000000,2.047619,4.047619,7.285714,10.238095,0.333333,1.0
412,TYPEERM[649.3660]PLPAPSLSHQPPPAPR,TYPEERM[655.3735]PLPAPSLSHQPPPAPR,,,,,1.717833,1.543232,,,,,,,sp|P49750|YLPM1_HUMAN,P49750,YLPM1_HUMAN,YLPM1,YLP motif-containing protein 1,3.405266e-02,1.467849,1.630533,1793,M1793,YLPM1_M1793,green,MYPNWGRYGGSSHYPPPPVPPPPPVALPEASPGPGYSSSTTPAAPS...,TYPEERMPLPAPSLSHQPPPAPR,1786,23,TYPEER,6,1792,PVYEGPSMFGGERRTYPEER,PLPAPSLSHQPPPAPRVEKK,P49750,169.0,M,1792.0,32.29,25.969,26.824,26.135,27.220,67.666,68.526,69.850,67.717,-41.724,-42.691,-43.093,-43.863,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,4.0,6.0,10.0,0.0,0.0,0.0,0.0,0.380952,1.238095,1.428571,2.000000,2.000000,2.000000,2.000000,2.000000,3.857143,6.619048,9.904762,0.095238,1.0
413,HNM[649.3660]DIGTWDNKGPVAK,HNM[655.3735]DIGTWDNKGPVAK,1.057377,0.944735,0.383428,0.873900,1.002462,0.748663,0.976972,0.819735,0.983006,0.940692,0.744930,0.769443,sp|Q9Y5A9|YTHD2_HUMAN,Q9Y5A9,YTHD2_HUMAN,YTHDF2,YTH domain-containing family protein 2,4.871369e-09,8.312349,0.853779,273,M273,YTHD2_M273,green,MSASSLLEQRPKGQGNKVQNGSVHQKDGLNDDDFEPYLSPQARPNN...,HNMDIGTWDNKGPVAK,270,16,HN,2,272,LKTKNGIAGSSLPPPPIKHN,DIGTWDNKGPVAKAPSQALV,Q9Y5A9,457.0,M,272.0,59.27,34.441,35.916,36.140,36.420,3.917,3.644,2.221,4.675,-30.707,-30.458,-29.927,-29.551,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,10.0,0.0,0.0,0.0,0.0,0.000000,1.380952,1.380952,2.000000,2.000000,2.000000,2.000000,2.000000,3.904762,6.571429,9.809524,0.047619,1.0
414,LRPGFM[649.3660]EDR,LRPGFM[655.3735]EDR,1.586558,1.350037,1.637100,1.510475,1.436025,1.547713,1.399287,1.127181,1.938248,1.129483,1.519621,1.159451,sp|Q5BKZ1|ZN326_HUMAN,Q5BKZ1,ZN326_HUMAN,ZNF326,DBIRD complex subunit ZNF326,2.872655e-10,9.541716,1.445098,146,M146,ZN326_M146,green,MDFEDDYTHSACRNTYQGFNGMDRDYGPGSYGGMDRDYGHGSYGGQ...,LRPGFMEDR,140,9,LRPGF,5,145,NQGGSSWEAPYSRSKLRPGF,EDRGRENYSSYSSFSSPHMK,Q5BKZ1,320.0,M,145.0,43.07,17.644,16.877,17.735,15.681,60.629,60.983,61.787,61.811,22.246,20.940,19.937,21.242,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,4.0,6.0,8.0,0.0,0.0,0.0,0.0,0.428571,1.000000,1.428571,1.904762,1.904762,2.000000,2.000000,2.000000,3.714286,6.142857,9.190476,0.000000,1.0


In [131]:
MsrAKD_with_alphafold[MsrAKD_with_alphafold['Color'] == 'red']

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,MsrA_KD_1 Log2 Ratio HL,MsrA_KD_10 Log2 Ratio HL,MsrA_KD_11 Log2 Ratio HL,MsrA_KD_12 Log2 Ratio HL,MsrA_KD_2 Log2 Ratio HL,MsrA_KD_3 Log2 Ratio HL,MsrA_KD_4 Log2 Ratio HL,MsrA_KD_5 Log2 Ratio HL,MsrA_KD_6 Log2 Ratio HL,MsrA_KD_7 Log2 Ratio HL,MsrA_KD_8 Log2 Ratio HL,MsrA_KD_9 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,neglogp,Log2HL avg,Site Number,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
541,NLRVM[649.3660]LESER,NLRVM[655.3735]LESER,3.338341,3.551582,3.030476,3.864905,3.352572,,3.111153,,,3.443699,3.801472,3.476981,sp|Q5JRA6|TGO1_HUMAN,Q5JRA6,TGO1_HUMAN,MIA3,Transport and Golgi organization protein 1 hom...,3.044121e-10,9.516538,3.441243,1289,M1289,TGO1_M1289,red,MAAAPGLLVWLLVLRLPWRVPGQLDPSTGRRFSEHKLCADDECSML...,NLRVMLESER,1284,10,NLRV,4,1288,IKTLEKNQEILDDTAKNLRV,LESEREQNVKNQDLISENKK,Q5JRA6,284.0,M,1288.0,92.68,26.66,28.052,28.29,29.118,14.838,15.321,16.737,14.394,-35.676,-35.251,-35.79,-35.655,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,6.0,6.0,8.0,8.0,12.0,20.0,28.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.666667,5.333333,5.952381,7.952381,7.952381,11.904762,19.809524,27.238095,2.238095,1.0
542,VLHHMGGMAGLQSM[649.3660]MR,VLHHMGGMAGLQSM[655.3735]MR,2.237388,,,,,,2.576305,,3.223912,4.279829,3.674864,,sp|P61011|SRP54_HUMAN,P61011,SRP54_HUMAN,SRP54,Signal recognition particle subunit SRP54,0.00096417,3.015846,3.198459,483,M483,SRP54_M483,red,MVLADLGRKITSALRSLSNATIINEEVLNAMLKEVCTALLEADVNI...,VLHHMGGMAGLQSMMR,469,16,VLHHMGGMAGLQS,13,482,AKMMDPRVLHHMGGMAGLQS,MRQFQQGAAGNMKGMMGFNN,P61011,182.0,M,482.0,67.69,7.216,5.754,5.057,5.64,36.094,36.502,36.876,37.626,23.438,23.672,22.359,24.602,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,6.0,6.0,6.0,6.0,11.0,19.0,30.0,2.0,0.0,0.0,0.0,0.0,1.761905,1.761905,2.0,2.904762,3.0,3.761905,4.428571,9.47619,16.047619,25.333333,1.761905,1.0
543,RM[649.3660]ATEVAADALGEEWK,RM[655.3735]ATEVAADALGEEWK,,2.989205,2.881681,3.270686,3.472202,2.316379,3.585909,2.685923,3.203315,3.028107,3.725768,2.927216,sp|P62753|RS6_HUMAN,P62753,RS6_HUMAN,RPS6,Small ribosomal subunit protein eS6,2.374944e-10,9.624347,3.098763,32,M32,RS6_M32,red,MKLNISFPATGCQKLIEVDDERKLRTFYEKRMATEVAADALGEEWK...,RMATEVAADALGEEWK,30,16,R,1,31,CQKLIEVDDERKLRTFYEKR,ATEVAADALGEEWKGYVVRI,P62753,194.0,M,31.0,96.28,-16.687,-16.442,-15.135,-17.538,-3.619,-3.972,-4.738,-4.768,11.513,12.994,13.201,13.537,TURN_TY1_P,TURN,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,5.0,6.0,9.0,25.0,60.0,96.0,9.0,0.0,0.0,0.0,0.0,2.0,2.047619,2.190476,3.142857,4.666667,5.761905,6.904762,17.142857,49.666667,82.904762,3.761905,0.0
544,GTITIQDTGIGM[649.3660]TQEELVSNLGTIAR,GTITIQDTGIGM[655.3735]TQEELVSNLGTIAR,,,,2.692972,2.685418,,,,,,2.930274,,sp|Q12931|TRAP1_HUMAN,Q12931,TRAP1_HUMAN,TRAP1,"Heat shock protein 75 kDa, mitochondrial",0.000841451,3.074971,2.769555,163,M163,TRAP1_M163,red,MARELRALLLWGRRLRPLLRAPALAAVPGGKPILCPRRTTAQLGPR...,GTITIQDTGIGMTQEELVSNLGTIAR,151,26,GTITIQDTGIG,11,162,IHLQTNAEKGTITIQDTGIG,TQEELVSNLGTIARSGSKAF,Q12931,234.0,M,162.0,97.3,24.778,23.804,23.089,24.503,6.1,5.801,7.087,5.187,-38.561,-37.419,-36.969,-36.288,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,4.0,7.0,7.0,9.0,29.0,76.0,129.0,9.0,0.0,0.0,0.0,0.0,2.047619,2.333333,2.904762,4.619048,6.47619,7.285714,8.904762,24.809524,68.714286,128.761905,5.380952,0.0
545,IHQIEYAM[649.3660]EAVK,IHQIEYAM[655.3735]EAVK,2.422233,2.776447,3.266472,2.469264,2.613234,2.875086,2.628537,3.005414,2.770888,2.729841,2.495031,2.701913,sp|P25786|PSA1_HUMAN,P25786,PSA1_HUMAN,PSMA1,Proteasome subunit alpha type-1,3.343455e-13,12.475805,2.72953,26,M26,PSA1_M26,red,MFRNQYDNDVTVWSPQGRIHQIEYAMEAVKQGSATVGLKSKTHAVL...,IHQIEYAMEAVK,18,12,IHQIEYA,7,25,YDNDVTVWSPQGRIHQIEYA,EAVKQGSATVGLKSKTHAVL,P25786,104.0,M,25.0,97.37,5.503,4.872,4.148,5.856,-4.913,-4.412,-5.564,-3.771,-7.759,-9.062,-9.775,-9.949,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,6.0,7.0,8.0,10.0,24.0,61.0,123.0,3.0,0.0,0.0,0.0,0.0,2.0,2.238095,2.761905,4.857143,6.238095,7.52381,9.095238,24.238095,59.190476,107.904762,6.238095,0.0
546,DKANM[649.3660]QHR,DKANM[655.3735]QHR,,3.036833,2.959287,1.385111,,3.4801,,0.693433,0.645962,,3.152764,,sp|P31943|HNRH1_HUMAN,P31943,HNRH1_HUMAN,HNRNPH1,Heterogeneous nuclear ribonucleoprotein H,0.003348241,2.475183,2.193356,352,M352,HNRH1_M352,red,MMLGTEGGEGFVVKVRGLPWSCSADEVQRFFSDCKIQNGAQGIRFI...,DKANMQHR,347,8,DKAN,4,351,DVEFATHEDAVAAMSKDKAN,QHRYVELFLNSTAGASGGAY,P31943,113.0,M,351.0,69.22,30.357,30.422,29.139,31.573,-11.6,-10.079,-9.471,-9.77,8.599,8.402,7.82,7.555,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,10.0,23.0,46.0,3.0,0.0,0.0,0.0,0.0,1.857143,1.857143,2.095238,2.52381,2.714286,3.190476,3.952381,10.952381,30.47619,55.52381,1.714286,0.0
547,SMM[649.3660]SSYER,SMM[655.3735]SSYER,,,,2.176399,,,,1.706019,,2.589644,2.089357,1.812057,sp|P18583|SON_HUMAN,P18583,SON_HUMAN,SON,Protein SON,0.0001802623,3.744095,2.074695,1010,M1010,SON_M1010,red,MATNIEQIFRSFVVSKFREIQQELSSGRNEGQLNGETNTPIEGNQA...,SMMSSYER,1007,8,SM,2,1009,PLMLASRRSMMMSYAAERSM,SSYERSMMSYERSMMSPMAE,P18583,94.0,M,1009.0,80.78,-10.676,-10.721,-12.162,-10.148,25.596,26.106,26.106,27.453,-25.211,-23.763,-23.226,-23.657,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,3.0,4.0,5.0,6.0,12.0,18.0,24.0,3.0,0.0,0.0,0.0,0.0,2.0,2.0,2.238095,3.285714,4.333333,5.904762,6.238095,11.238095,17.571429,23.904762,1.619048,1.0
548,WDEM[649.3660]NILATYHPADK,WDEM[655.3735]NILATYHPADK,-0.064299,1.196788,1.375941,1.248033,1.230014,1.199729,1.235817,1.364177,0.708099,1.46448,1.524418,0.987979,sp|P41236|IPP2_HUMAN,P41236,IPP2_HUMAN,PPP1R2,Protein phosphatase inhibitor 2,2.083129e-06,5.681284,1.122598,50,M50,IPP2_M50,red,MAASTASHRPIKGILKNKTSTTSSMVASAEQPRGNVDEELSKKSQK...,WDEMNILATYHPADK,46,15,WDE,3,49,EQPRGNVDEELSKKSQKWDE,NILATYHPADKDYGLMKIDE,P41236,135.0,M,49.0,92.13,-7.096,-6.996,-8.189,-6.927,0.746,-0.415,-0.417,-1.706,-19.784,-20.78,-21.747,-20.09,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,6.0,6.0,9.0,12.0,15.0,0.0,0.0,0.0,0.0,0.0,1.380952,1.380952,1.952381,2.761905,3.095238,3.47619,3.857143,6.333333,10.333333,16.285714,0.619048,1.0
549,IHPM[649.3660]AYQLQLQAASNFK,IHPM[655.3735]AYQLQLQAASNFK,1.1407,1.28281,1.202389,1.213669,1.321224,1.50885,-2.080445,1.357823,1.333832,1.334026,1.414129,1.251989,sp|P67870|CSK2B_HUMAN,P67870,CSK2B_HUMAN,CSNK2B,Casein kinase II subunit beta,0.004107773,2.386394,1.023416,195,M195,CSK2B_M195,red,MSSSEEVSWISWFCGLRGNEFFCEVDEDYIQDKFNLTGLNEQVPHY...,IHPMAYQLQLQAASNFK,191,17,IHP,3,194,RPKRPANQFVPRLYGFKIHP,AYQLQLQAASNFKSPVKTIR,P67870,208.0,M,194.0,96.42,-9.91,-10.391,-10.899,-11.416,-2.36,-1.013,-0.123,-1.119,37.899,37.332,38.477,36.279,TURN_TY1_P,TURN,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,3.0,5.0,8.0,18.0,25.0,0.0,0.0,0.0,0.0,0.0,2.0,2.095238,2.142857,3.52381,4.333333,5.761905,6.333333,9.47619,15.761905,23.571429,1.190476,1.0
550,ELTPLQAM[649.3660]MLR,ELTPLQAM[655.3735]MLR,,,,,0.989488,,0.947038,0.900598,,0.967833,,,sp|Q9Y2W2|WBP11_HUMAN,Q9Y2W2,WBP11_HUMAN,WBP11,WW domain-binding protein 11,1.747847e-05,4.757497,0.951239,333,M333,WBP11_M333,red,MGRRSTSSTKSGKFMNPTDQARKEARKRELKKNKKQRMMVRAAVLK...,ELTPLQAMMLR,325,11,ELTPLQA,7,332,MPGKSRKKKKNMKELTPLQA,MLRMAGQEIPEEGREVEEFS,Q9Y2W2,392.0,M,332.0,87.63,9.05,10.383,11.563,10.667,12.144,12.908,11.967,13.668,-38.345,-38.432,-38.72,-37.212,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,5.0,6.0,8.0,8.0,12.0,16.0,20.0,1.0,0.0,0.0,0.0,0.0,1.619048,1.619048,2.333333,3.047619,3.428571,4.190476,4.380952,7.619048,12.333333,17.52381,1.047619,1.0
