# Imports

In [1]:
import Bio.PDB
import pandas as pd
import numpy as np
import os
import re

# Ramachandran Analysis - Helper/Prelim Section

In [2]:
# Set correct pathing

curr_dir_path_str = "./"
curr_dir_path = os.path.abspath(curr_dir_path_str)

datasets_path_str = "../Crowdedness-Analyses"
datasets_path = os.path.abspath(datasets_path_str)

global_data_path_str = "../global_data"
global_data_path = os.path.abspath(global_data_path_str)

print("Current Directory: " + curr_dir_path)
print("Datasets Directory: " + datasets_path)
print("Global Data Directory: " + global_data_path)

Current Directory: /Users/ritwiksrinivas/Desktop/Projects/ChURRO_ABPP/Ramachandran-Analyses
Datasets Directory: /Users/ritwiksrinivas/Desktop/Projects/ChURRO_ABPP/Crowdedness-Analyses
Global Data Directory: /Users/ritwiksrinivas/Desktop/Projects/ChURRO_ABPP/global_data


In [3]:
# Helper function to extract phi/psi angles from a .cif file 
def get_phi_and_psi(Protein_ID, CIF_file_path, verbose=False):
    """Extracts the psi and phi angle for each residue in protein's 
    alphofold structure.

    Args:
      Protein_ID: uniprot id of protein.
      CIF_file_path: file path to proteins mmCIF file.

    Returns:
      Dataframe that stores the dihedral angles of each residue in the 
      specified protein.

    """

    df = pd.DataFrame()
    for model in Bio.PDB.MMCIFParser().get_structure(Protein_ID, CIF_file_path):
        for chain in model:
            polypeptides = Bio.PDB.PPBuilder().build_peptides(chain)
            res_index_list = []
            res_name_list = []
            phi_list = []
            psi_list = []
            for poly_index, poly in enumerate(polypeptides):
                if verbose:
                  print("Model %s Chain %s" % (str(model.id), str(chain.id)))
                  print ("(part %i of %i)" % (poly_index+1, len(polypeptides)))
                  print ("length %i" % (len(poly)))
                  print ("from %s%i" % (poly[0].resname, poly[0].id[1]))
                  print ("to %s%i" % (poly[-1].resname, poly[-1].id[1]))
                phi_psi = poly.get_phi_psi_list()
                res_index_sublist = []
                res_name_sublist = []
                phi_sublist = []
                psi_sublist = []
                for res_index, residue in enumerate(poly) :
                    res_name = "%s%i" % (residue.resname, residue.id[1])
                    # print(res_name, phi_psi[res_index])
                    res_index_sublist.append(residue.id[1])
                    res_name_sublist.append(residue.resname)
                    phi_sublist.append(phi_psi[res_index][0])
                    psi_sublist.append(phi_psi[res_index][1])
                res_index_list.extend(res_index_sublist)
                res_name_list.extend(res_name_sublist)
                phi_list.extend(phi_sublist)
                psi_list.extend(psi_sublist)
            df['Protein ID'] = [Protein_ID] * len(res_index_list)
            df['Residue Name'] = res_name_list
            df['Residue Position'] = res_index_list
            df['PHI'] = phi_list
            df['PSI'] = psi_list
    return df
    


In [4]:
directory = '../alphafold_data/cif'
file_paths = []

for root, directories, files in os.walk(directory):
    for file in files:
        file_path = os.path.join(root, file)
        file_paths.append(file_path)

print(file_paths)

['../alphafold_data/cif/P62829.cif', '../alphafold_data/cif/Q96PK6.cif', '../alphafold_data/cif/O60814.cif', '../alphafold_data/cif/P07900.cif', '../alphafold_data/cif/P47897.cif', '../alphafold_data/cif/Q14203.cif', '../alphafold_data/cif/P15121.cif', '../alphafold_data/cif/Q969T9.cif', '../alphafold_data/cif/P38919.cif', '../alphafold_data/cif/O60784.cif', '../alphafold_data/cif/O43175.cif', '../alphafold_data/cif/Q9UN86.cif', '../alphafold_data/cif/Q9NRY2.cif', '../alphafold_data/cif/O14950.cif', '../alphafold_data/cif/P35637.cif', '../alphafold_data/cif/Q9Y3U8.cif', '../alphafold_data/cif/P39019.cif', '../alphafold_data/cif/P62753.cif', '../alphafold_data/cif/Q9HD42.cif', '../alphafold_data/cif/Q9BVA1.cif', '../alphafold_data/cif/Q99536.cif', '../alphafold_data/cif/P12277.cif', '../alphafold_data/cif/O14776.cif', '../alphafold_data/cif/Q9UI36.cif', '../alphafold_data/cif/Q8N5G2.cif', '../alphafold_data/cif/P07108.cif', '../alphafold_data/cif/Q14160.cif', '../alphafold_data/cif/Q6YN

In [5]:
pattern = r'/([^/]+)\.cif$'
protein_ids =[]

for file_path in file_paths:
    match = re.search(pattern, file_path)
    desired_substring = match.group(1)
    protein_ids.append(desired_substring)

print(protein_ids)

['P62829', 'Q96PK6', 'O60814', 'P07900', 'P47897', 'Q14203', 'P15121', 'Q969T9', 'P38919', 'O60784', 'O43175', 'Q9UN86', 'Q9NRY2', 'O14950', 'P35637', 'Q9Y3U8', 'P39019', 'P62753', 'Q9HD42', 'Q9BVA1', 'Q99536', 'P12277', 'O14776', 'Q9UI36', 'Q8N5G2', 'P07108', 'Q14160', 'Q6YN16', 'P17858', 'P09496', 'P56192', 'Q8WXI9', 'Q9NYP7', 'Q15054', 'O00267', 'Q9UL46', 'Q13586', 'Q15691', 'Q9P2E9', 'P50454', 'P28074', 'Q5JSZ5', 'Q9BYJ9', 'Q9UJZ1', 'P55060', 'Q9H2G2', 'Q14980', 'P46778', 'P82909', 'Q99497', 'P63261', 'Q05682', 'Q9Y5A9', 'P35580', 'Q9Y230', 'Q9BUF5', 'Q86Y82', 'P27635', 'O15091', 'O14562', 'P34932', 'Q15056', 'O15126', 'Q92878', 'P23527', 'Q9NY27', 'Q9NYF8', 'Q08211', 'O43837', 'Q86XP3', 'P17480', 'Q96A33', 'Q9NRX4', 'Q96AE4', 'P62750', 'Q9UI09', 'P48634', 'Q16774', 'Q9NUP9', 'P08238', 'Q7Z5L9', 'Q92879', 'P27797', 'Q16576', 'Q16204', 'Q8IXM2', 'Q969Z0', 'Q9P2J5', 'O60341', 'P25786', 'Q04760', 'Q9NVA2', 'P35998', 'Q9UQ80', 'Q9GZZ9', 'P37802', 'P62195', 'P18615', 'Q9P013', 'O00116',

In [6]:
list_of_dfs = []

for cif_file, prot_id in zip(file_paths, protein_ids):
    list_of_dfs.append(get_phi_and_psi(prot_id, cif_file))

In [7]:
concat_dihedrals = pd.concat(list_of_dfs)
concat_dihedrals['PSI degrees'] = np.rad2deg(concat_dihedrals['PSI'])
concat_dihedrals['PHI degrees'] = np.rad2deg(concat_dihedrals['PHI'])
concat_dihedrals

Unnamed: 0,Protein ID,Residue Name,Residue Position,PHI,PSI,PSI degrees,PHI degrees
0,P62829,MET,1,,2.164332,124.007114,
1,P62829,SER,2,-1.036083,2.194941,125.760845,-59.363210
2,P62829,LYS,3,-1.054214,1.768404,101.322100,-60.402040
3,P62829,ARG,4,-0.931914,1.814638,103.971075,-53.394727
4,P62829,GLY,5,-1.024285,1.739223,99.650134,-58.687201
...,...,...,...,...,...,...,...
591,P41743,ALA,592,-1.460363,-0.203497,-11.659505,-83.672639
592,P41743,GLU,593,-1.886433,-0.425161,-24.359948,-108.084665
593,P41743,GLU,594,-1.403320,-0.176936,-10.137675,-80.404323
594,P41743,CYS,595,-1.599052,0.315555,18.079959,-91.618920


In [8]:
#concat_dihedrals.to_csv(os.path.join(global_data_path, "dihedral_angles.csv"), index=False)

Sanity Check: Here, we verify that the AA sequences extracted from the AlphaFold database match those found in the UniProt database

In [9]:
concat_dihedrals = pd.read_csv(os.path.join(global_data_path, "dihedral_angles.csv"))
concat_dihedrals

Unnamed: 0,Protein ID,Residue Name,Residue Position,PHI,PSI,PSI degrees,PHI degrees
0,P62829,MET,1,,2.164332,124.007114,
1,P62829,SER,2,-1.036083,2.194941,125.760845,-59.363210
2,P62829,LYS,3,-1.054214,1.768404,101.322100,-60.402040
3,P62829,ARG,4,-0.931914,1.814638,103.971075,-53.394727
4,P62829,GLY,5,-1.024285,1.739223,99.650134,-58.687201
...,...,...,...,...,...,...,...
513150,P41743,ALA,592,-1.460363,-0.203497,-11.659505,-83.672639
513151,P41743,GLU,593,-1.886433,-0.425161,-24.359948,-108.084665
513152,P41743,GLU,594,-1.403320,-0.176936,-10.137675,-80.404323
513153,P41743,CYS,595,-1.599052,0.315555,18.079959,-91.618920


In [10]:
len(concat_dihedrals['Protein ID'].unique())

856

In [11]:
uniprot_protein_sequences = pd.read_csv(os.path.join(global_data_path, "complete_sequence_cache.csv")).drop('Unnamed: 0', axis=1)
uniprot_protein_sequences

Unnamed: 0,Protein ID,Complete Sequence
0,P55072,MASGADSKGDDLSTAILKQKNRPNRLIVDEAINEDNSVVSLSQPKM...
1,Q9NTJ3,MPRKGTQPSTARRREEGPPPPSPDGASSDAEPEPPSGRTESPATAA...
2,P39023,MSHRKFSAPRHGSLGFLPRKRSSRHRGKVKSFPKDDPSKPVHLTAF...
3,P62847,MNDTVTIRTRKFMTNRLLQRKQMVIDVLHPGKATVPKTEIREKLAK...
4,P50991,MPENVAPRSGATAGAAGGRGKGAYQDRDKPAQIRFSNISAAKAVAD...
...,...,...
859,O43633,MDLLFGRRKTPEELLRQNQRALNRAMRELDRERQKLETQEKKIIAD...
860,Q9NR28,MAALKSWLSRSVTSFFRYRQCLCVPVVANFKKRCFSELIRPWHKTV...
861,P30084,MAALRVLLSCVRGPLRPPVRCPAWRPFASGANFEYIIAEKRGKNNT...
862,P46783,MLMPKKNRIAIYELLFKEGVMVAKKDVHMPKHPELADKNVPNLHVM...


In [12]:
amino_acid_map = {
    "ALA": "A",
    "ARG": "R",
    "ASN": "N",
    "ASP": "D",
    "CYS": "C",
    "GLU": "E",
    "GLN": "Q",
    "GLY": "G",
    "HIS": "H",
    "ILE": "I",
    "LEU": "L",
    "LYS": "K",
    "MET": "M",
    "PHE": "F",
    "PRO": "P",
    "SER": "S",
    "THR": "T",
    "TRP": "W",
    "TYR": "Y",
    "VAL": "V",
    "SEC": "U",
    "PYL": "O"
}

In [13]:
# Function to verify sequences
def verify_sequences(df_residues, df_sequences, verbose=False):
    mismatches = pd.DataFrame(
        columns=['Protein ID', 'Residue Position', 'AlphaFold Residue', 'UniProt Residue'])

    missing_sequences = 0
    for protein_id in df_residues['Protein ID'].unique():
        # Get the complete sequence for the current Protein ID
        if protein_id in df_sequences['Protein ID'].values:
            complete_seq = df_sequences[df_sequences['Protein ID'] == protein_id]['Complete Sequence'].values[0]
        else:
            missing_sequences += 1
            print(f'{protein_id} is not one of the completed sequences we queried from UniProt', missing_sequences)
            continue

        # Filter residues for the current Protein ID
        residues = df_residues[df_residues['Protein ID'] == protein_id]

        for _, row in residues.iterrows():
            residue_name = row['Residue Name']
            residue_position = row['Residue Position']
            
            # Get the expected residue from the complete sequence
            if (residue_position - 1) <  len(complete_seq):
                expected_residue = complete_seq[residue_position - 1]  # position - 1 for zero-based indexing
                actual_residue = amino_acid_map[residue_name]
            else:
                expected_residue = 'X'
                actual_residue = amino_acid_map[residue_name]
            
            if expected_residue != actual_residue:
                if verbose:
                    print(protein_id)
                mismatches = mismatches._append({'Protein ID': protein_id,
                        'Residue Position': residue_position,
                        'AlphaFold Residue': actual_residue,
                        'UniProt Residue': expected_residue
                        }, ignore_index = True)
                #mismatches.loc[len(mismatches.index)] = [protein_id, residue_position, actual_residue, expected_residue]
                #mismatches.append((protein_id, residue_position, actual_residue, expected_residue))

    return mismatches

In [14]:
mismatches = verify_sequences(concat_dihedrals, uniprot_protein_sequences)
mismatches

Unnamed: 0,Protein ID,Residue Position,AlphaFold Residue,UniProt Residue
0,Q14160,674,V,E
1,P27635,202,N,S
2,Q8NFD5,3,H,A
3,Q8NFD5,4,N,R
4,Q8NFD5,6,G,A
...,...,...,...,...
3361,Q9NX58,265,H,R
3362,O75396,71,D,Y
3363,O75396,82,T,K
3364,O75396,130,C,R


In [15]:
unique_mismatches = mismatches['Protein ID'].unique()
print(unique_mismatches)
print(len(unique_mismatches))

['Q14160' 'P27635' 'Q8NFD5' 'Q9NX55' 'Q8NFV4' 'Q92616' 'P49411' 'Q10567'
 'P26599' 'Q00341' 'A0A2R8Y4L2' 'O14929' 'Q9NX58' 'O75396']
14


# Ramachandran Analysis - ChURRO_1 Dataset

In [16]:
path = os.path.join(datasets_path, "ChURRO_1_with_alphafold.csv")
CHURRO_1_df = pd.read_csv(path).drop(columns = ['Unnamed: 0'])

pd.set_option('display.max_columns', None)
display(CHURRO_1_df)
pd.reset_option('display.max_columns')

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,avg ratio,neglogpval,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,NSLESYAFNM[577.3085]K,NSLESYAFNM[583.3160]K,,,3.408509,,4.603527,4.107654,,4.407757,sp|P11142|HSP7C_HUMAN,P11142,HSP7C_HUMAN,HSPA8,Heat shock cognate 71 kDa protein,5.529095e-04,4.131862,3.257346,M549,HSP7C_M549,red,MSKGPAVGIDLGTTYSCVGVFQHGKVEIIANDQGNRTTPSYVAFTD...,NSLESYAFNMK,539,11,NSLESYAFN,9,548,DEKQRDKVSSKNSLESYAFN,KATVEDEKLQGKINDEDKQK,P11142,99.0,M,548.0,89.69,37.293,36.147,36.471,34.907,18.565,18.150,16.869,17.958,13.623,14.548,15.317,13.811,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,6.0,6.0,8.0,8.0,20.0,54.0,74.0,9.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.095238,4.380952,5.285714,6.714286,6.952381,15.666667,40.666667,61.238095,4.380952,0.0
1,LQHVEDGVLSM[577.3085]QVASAR,LQHVEDGVLSM[583.3160]QVASAR,3.190389,3.374976,3.092695,2.789313,3.451202,4.105873,3.316610,4.075215,sp|Q07065|CKAP4_HUMAN,Q07065,CKAP4_HUMAN,CKAP4,Cytoskeleton-associated protein 4,1.337810e-07,3.424534,6.873606,M423,CKAP4_M423,red,MPSAKQRGSKGGHGAASPSEKGAHPSGGADDVAKKPPPAPQQPPPP...,LQHVEDGVLSMQVASAR,412,17,LQHVEDGVLS,10,422,QQKSQGLDSRLQHVEDGVLS,QVASARQTESLESLLSKSQE,Q07065,306.0,M,422.0,71.71,-75.225,-76.396,-77.149,-77.368,-23.796,-23.479,-22.216,-24.573,41.658,40.716,41.163,40.667,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,8.0,8.0,12.0,27.0,46.0,3.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0.0
2,TLSHPQQM[577.3085]ALLDQTK,TLSHPQQM[583.3160]ALLDQTK,2.766721,2.927959,3.144626,,,,,,sp|Q9Y490|TLN1_HUMAN,Q9Y490,TLN1_HUMAN,TLN1,Talin-1,1.377828e-03,2.946435,2.860805,M1759,TLN1_M1759,red,MVALSLKISIGNVVKTMQFEPSTMVYDACRIIRERIPEAPAGPPSD...,TLSHPQQMALLDQTK,1751,15,TLSHPQQ,7,1758,EPLTLAAVGAASKTLSHPQQ,ALLDQTKTLAESALQLLYTA,Q9Y490,550.0,M,1758.0,89.36,-10.855,-10.915,-10.721,-9.924,22.782,24.283,24.555,25.018,-10.557,-10.846,-12.345,-10.051,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,7.0,8.0,8.0,16.0,49.0,92.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.380952,3.714286,5.238095,6.619048,7.047619,18.047619,57.952381,95.047619,5.380952,0.0
3,GIIWGEDTLM[577.3085]EYLENPK,GIIWGEDTLM[583.3160]EYLENPK,2.820631,2.664639,,3.143147,,,,,sp|P99999|CYC_HUMAN,P99999,CYC_HUMAN,CYCS,Cytochrome c,2.391139e-03,2.876139,2.621395,M66,CYC_M66,red,MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPG...,GIIWGEDTLMEYLENPK,56,17,GIIWGEDTL,9,65,GYSYTAANKNKGIIWGEDTL,EYLENPKKYIPGTKMIFVGI,P99999,288.0,M,65.0,98.52,-3.727,-4.683,-6.135,-4.359,-5.119,-3.953,-4.466,-3.242,2.159,1.908,1.881,0.670,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,6.0,8.0,9.0,26.0,61.0,94.0,1.0,0.0,0.0,0.000000,0.000000,2.000000,2.047619,2.380952,3.857143,6.000000,7.285714,8.190476,20.333333,51.047619,87.857143,4.619048,0.0
4,VDM[577.3085]VWIVGGSSVYK,VDM[583.3160]VWIVGGSSVYK,,,2.888442,,,2.775411,,,sp|P00374|DYR_HUMAN,P00374,DYR_HUMAN,DHFR,Dihydrofolate reductase,1.270312e-02,2.831927,1.896090,M112,DYR_M112,red,MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEG...,VDMVWIVGGSSVYK,109,14,VD,2,111,RSLDDALKLTEQPELANKVD,VWIVGGSSVYKEAMNHPGHL,P00374,66.0,M,111.0,96.12,-9.211,-8.992,-7.786,-8.817,5.026,6.276,7.064,5.967,7.772,8.629,8.105,10.054,STRN,STRN,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,3.0,3.0,5.0,7.0,27.0,70.0,123.0,8.0,0.0,0.0,0.047619,0.047619,2.000000,2.000000,2.476190,3.333333,4.857143,6.666667,8.095238,21.428571,59.619048,108.380952,5.523810,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1022,LRDTEEM[577.3085]LSK,LRDTEEM[583.3160]LSK,0.138583,0.354235,-0.005030,0.084883,-0.302264,-0.280001,-0.009539,,sp|Q9H444|CHM4B_HUMAN,Q9H444,CHM4B_HUMAN,CHMP4B,Charged multivesicular body protein 4b,9.760989e-01,-0.002733,0.010506,M35,CHM4B_M35,grey,MSVFGKLFGAGGGKAGKGGPTPQEAIQRLRDTEEMLSKKQEFLEKK...,LRDTEEMLSK,28,10,LRDTEE,6,34,AGKGGPTPQEAIQRLRDTEE,LSKKQEFLEKKIEQELTAAK,Q9H444,481.0,M,34.0,97.87,11.882,11.620,12.417,10.198,9.090,9.850,9.267,9.861,-2.216,-0.910,0.265,-0.558,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,5.0,6.0,8.0,8.0,16.0,35.0,55.0,4.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.428571,5.095238,6.047619,8.380952,8.476190,18.142857,36.428571,52.190476,4.619048,0.0
1023,FGQAATM[577.3085]EGIGAIGGTPPAFNR,FGQAATM[583.3160]EGIGAIGGTPPAFNR,0.094201,0.207053,0.114255,0.243672,0.047130,-0.064950,-0.586272,-0.036869,sp|Q15233|NONO_HUMAN,Q15233,NONO_HUMAN,NONO,Non-POU domain-containing octamer-binding protein,9.809736e-01,0.002278,0.008343,M441,NONO_M441,grey,MQSNKTFNLEKQNHTPRKHHQHHHQQQHHQQQQQQPPPPPIPANGQ...,FGQAATMEGIGAIGGTPPAFNR,434,22,FGQAAT,6,440,PDGTLGLTPPTTERFGQAAT,EGIGAIGGTPPAFNRAAPGA,Q15233,361.0,M,440.0,46.92,-38.877,-40.364,-40.886,-40.628,-17.279,-17.076,-15.706,-17.258,-25.729,-25.385,-25.856,-23.949,unstructured,unstructured,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,4.0,6.0,10.0,0.0,0.0,0.0,0.000000,0.190476,1.095238,1.285714,2.000000,2.000000,2.000000,2.000000,2.000000,4.000000,6.285714,10.000000,0.095238,1.0
1024,IHVSYQETQQM[15.9949]QM[577.3085]K,IHVSYQETQQM[15.9949]QM[583.3160]K,0.835447,0.089325,,0.549177,,-0.204260,-1.313063,,sp|Q86UP2|KTN1_HUMAN,Q86UP2,KTN1_HUMAN,KTN1,Kinectin,9.825290e-01,-0.008675,0.007655,M406,KTN1_M406,grey,MEFYESAYFIVLIPSIVITVIFLFFWLFMKETLYDEVLAKQKREQK...,IHVSYQETQQMQMK,393,14,IHVSYQETQQMQ,12,405,EHNVFQNKIHVSYQETQQMQ,KFQQVREQMEAEIAHLKQEN,Q86UP2,402.0,M,405.0,85.68,4.533,3.747,4.416,2.340,27.322,28.247,28.263,27.838,9.502,10.447,11.832,10.583,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,8.0,8.0,12.0,20.0,27.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.571429,5.285714,6.095238,7.952381,8.000000,12.047619,19.904762,27.476190,2.142857,1.0
1025,YHSLAPM[577.3085]YYR,YHSLAPM[583.3160]YYR,,,-0.729042,,,,0.704989,,sp|P51148|RAB5C_HUMAN,P51148,RAB5C_HUMAN,RAB5C,Ras-related protein Rab-5C,9.893231e-01,-0.012026,0.004662,M89,RAB5C_M89,grey,MAGRGGAARPNGPAAGNKICQFKLVLLGESAVGKSSLVLRFVKGQF...,YHSLAPMYYR,82,10,YHSLAP,6,88,TVKFEIWDTAGQERYHSLAP,YYRGAQAAIVVYDITNTDTF,P51148,221.0,M,88.0,94.05,-0.736,0.581,0.992,1.668,11.471,11.845,13.287,10.953,10.816,11.513,11.189,11.114,HELX_RH_AL_P,HELX,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,4.0,5.0,5.0,6.0,9.0,46.0,78.0,2.0,0.0,0.0,0.000000,0.000000,2.000000,2.000000,2.523810,3.428571,4.619048,5.952381,6.904762,18.904762,55.714286,94.666667,4.857143,0.0


In [17]:
# 9 of the proteins in the CHURRO_1 dataset have mismatched sequences, so we remove them
clean_CHURRO_1 = CHURRO_1_df[CHURRO_1_df['Protein ID'].isin(unique_mismatches) == False].reset_index(drop=True)
clean_CHURRO_1

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,...,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,NSLESYAFNM[577.3085]K,NSLESYAFNM[583.3160]K,,,3.408509,,4.603527,4.107654,,4.407757,...,2.095238,4.380952,5.285714,6.714286,6.952381,15.666667,40.666667,61.238095,4.380952,0.0
1,LQHVEDGVLSM[577.3085]QVASAR,LQHVEDGVLSM[583.3160]QVASAR,3.190389,3.374976,3.092695,2.789313,3.451202,4.105873,3.316610,4.075215,...,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0.0
2,TLSHPQQM[577.3085]ALLDQTK,TLSHPQQM[583.3160]ALLDQTK,2.766721,2.927959,3.144626,,,,,,...,2.380952,3.714286,5.238095,6.619048,7.047619,18.047619,57.952381,95.047619,5.380952,0.0
3,GIIWGEDTLM[577.3085]EYLENPK,GIIWGEDTLM[583.3160]EYLENPK,2.820631,2.664639,,3.143147,,,,,...,2.380952,3.857143,6.000000,7.285714,8.190476,20.333333,51.047619,87.857143,4.619048,0.0
4,VDM[577.3085]VWIVGGSSVYK,VDM[583.3160]VWIVGGSSVYK,,,2.888442,,,2.775411,,,...,2.476190,3.333333,4.857143,6.666667,8.095238,21.428571,59.619048,108.380952,5.523810,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1013,LRDTEEM[577.3085]LSK,LRDTEEM[583.3160]LSK,0.138583,0.354235,-0.005030,0.084883,-0.302264,-0.280001,-0.009539,,...,2.428571,5.095238,6.047619,8.380952,8.476190,18.142857,36.428571,52.190476,4.619048,0.0
1014,FGQAATM[577.3085]EGIGAIGGTPPAFNR,FGQAATM[583.3160]EGIGAIGGTPPAFNR,0.094201,0.207053,0.114255,0.243672,0.047130,-0.064950,-0.586272,-0.036869,...,2.000000,2.000000,2.000000,2.000000,2.000000,4.000000,6.285714,10.000000,0.095238,1.0
1015,IHVSYQETQQM[15.9949]QM[577.3085]K,IHVSYQETQQM[15.9949]QM[583.3160]K,0.835447,0.089325,,0.549177,,-0.204260,-1.313063,,...,2.571429,5.285714,6.095238,7.952381,8.000000,12.047619,19.904762,27.476190,2.142857,1.0
1016,YHSLAPM[577.3085]YYR,YHSLAPM[583.3160]YYR,,,-0.729042,,,,0.704989,,...,2.523810,3.428571,4.619048,5.952381,6.904762,18.904762,55.714286,94.666667,4.857143,0.0


In [18]:
def extract_PSI_and_PHI(residue_df, psi_and_phi_df, verbose=False):
    df = pd.DataFrame(columns=['PSI Radians', 'PHI Radians', 'PSI Degrees', 'PHI Degrees'])
    for _, row in residue_df.iterrows():
        protein_id = row['Protein ID']
        methionine_posn = row['Methionine Location'] + 1
        row_of_interest = psi_and_phi_df.loc[(psi_and_phi_df['Protein ID'] == protein_id) & (psi_and_phi_df['Residue Position'] == methionine_posn)]
        if verbose:
            print(row_of_interest)
        new_row = {'PSI Radians': float('nan'), 'PHI Radians':float('nan'), 'PSI Degrees': float('nan'), 'PHI Degrees': float('nan')}
        if row_of_interest.shape[0] == 1 and row_of_interest['Residue Name'].iloc[0] == 'MET':
            new_row = {'PSI Radians': row_of_interest['PSI'].iloc[0], 'PHI Radians':row_of_interest['PHI'].iloc[0], 'PSI Degrees': row_of_interest['PSI degrees'].iloc[0], 'PHI Degrees': row_of_interest['PHI degrees'].iloc[0]}
        df = df._append(new_row, ignore_index = True)
    return df

In [19]:
CHURRO_1_psi_phi_map = extract_PSI_and_PHI(clean_CHURRO_1, concat_dihedrals)
CHURRO_1_psi_phi_map

  df = df._append(new_row, ignore_index = True)


Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,-0.742821,-1.181290,-42.560482,-67.682903
1,-0.781354,-1.069864,-44.768303,-61.298705
2,-0.943332,-1.174102,-54.048956,-67.271110
3,-0.697202,-1.018011,-39.946739,-58.327732
4,2.600141,-1.846402,148.977115,-105.791046
...,...,...,...,...
1013,-0.761189,-1.120597,-43.612912,-64.205451
1014,2.177622,-2.618134,124.768543,-150.008032
1015,-0.762902,-1.032733,-43.711045,-59.171241
1016,-0.675357,-1.130696,-38.695083,-64.784116


In [20]:
sum(CHURRO_1_psi_phi_map['PHI Degrees'].isna())

0

In [21]:
CHURRO_1_with_PSI_and_PHI = pd.concat([clean_CHURRO_1, CHURRO_1_psi_phi_map], axis=1)
CHURRO_1_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,...,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,NSLESYAFNM[577.3085]K,NSLESYAFNM[583.3160]K,,,3.408509,,4.603527,4.107654,,4.407757,...,6.952381,15.666667,40.666667,61.238095,4.380952,0.0,-0.742821,-1.181290,-42.560482,-67.682903
1,LQHVEDGVLSM[577.3085]QVASAR,LQHVEDGVLSM[583.3160]QVASAR,3.190389,3.374976,3.092695,2.789313,3.451202,4.105873,3.316610,4.075215,...,7.523810,11.952381,23.952381,43.190476,2.333333,0.0,-0.781354,-1.069864,-44.768303,-61.298705
2,TLSHPQQM[577.3085]ALLDQTK,TLSHPQQM[583.3160]ALLDQTK,2.766721,2.927959,3.144626,,,,,,...,7.047619,18.047619,57.952381,95.047619,5.380952,0.0,-0.943332,-1.174102,-54.048956,-67.271110
3,GIIWGEDTLM[577.3085]EYLENPK,GIIWGEDTLM[583.3160]EYLENPK,2.820631,2.664639,,3.143147,,,,,...,8.190476,20.333333,51.047619,87.857143,4.619048,0.0,-0.697202,-1.018011,-39.946739,-58.327732
4,VDM[577.3085]VWIVGGSSVYK,VDM[583.3160]VWIVGGSSVYK,,,2.888442,,,2.775411,,,...,8.095238,21.428571,59.619048,108.380952,5.523810,0.0,2.600141,-1.846402,148.977115,-105.791046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1013,LRDTEEM[577.3085]LSK,LRDTEEM[583.3160]LSK,0.138583,0.354235,-0.005030,0.084883,-0.302264,-0.280001,-0.009539,,...,8.476190,18.142857,36.428571,52.190476,4.619048,0.0,-0.761189,-1.120597,-43.612912,-64.205451
1014,FGQAATM[577.3085]EGIGAIGGTPPAFNR,FGQAATM[583.3160]EGIGAIGGTPPAFNR,0.094201,0.207053,0.114255,0.243672,0.047130,-0.064950,-0.586272,-0.036869,...,2.000000,4.000000,6.285714,10.000000,0.095238,1.0,2.177622,-2.618134,124.768543,-150.008032
1015,IHVSYQETQQM[15.9949]QM[577.3085]K,IHVSYQETQQM[15.9949]QM[583.3160]K,0.835447,0.089325,,0.549177,,-0.204260,-1.313063,,...,8.000000,12.047619,19.904762,27.476190,2.142857,1.0,-0.762902,-1.032733,-43.711045,-59.171241
1016,YHSLAPM[577.3085]YYR,YHSLAPM[583.3160]YYR,,,-0.729042,,,,0.704989,,...,6.904762,18.904762,55.714286,94.666667,4.857143,0.0,-0.675357,-1.130696,-38.695083,-64.784116


In [22]:
#CHURRO_1_with_PSI_and_PHI.to_csv('ChURRO_1_with_alphafold_and_PSI_and_PHI_angles.csv', index=False)

# Ramachandran Analysis - ChURRO_2 Dataset

In [23]:
path = os.path.join(datasets_path, "ChURRO_2_with_alphafold.csv")
CHURRO_2_df = pd.read_csv(path).drop(columns = ['Unnamed: 0'])

pd.set_option('display.max_columns', None)
display(CHURRO_2_df)
pd.reset_option('display.max_columns')

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,3_3 Log2 Ratio HL,Protein,Protein ID,Entry Name,Protein Description,pvalue,avg ratio,neglogpvalue,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,LQHVEDGVLSM[649.3660]QVASAR,LQHVEDGVLSM[655.3735]QVASAR,,,,7.233420,6.853712,6.550813,,5.946325,5.847717,sp|Q07065|CKAP4_HUMAN,Q07065,CKAP4_HUMAN,Cytoskeleton-associated protein 4,1.634911e-05,6.486397,4.786506,M423,CKAP4_M423,red,MPSAKQRGSKGGHGAASPSEKGAHPSGGADDVAKKPPPAPQQPPPP...,LQHVEDGVLSMQVASAR,412,17,LQHVEDGVLS,10,422,QQKSQGLDSRLQHVEDGVLS,QVASARQTESLESLLSKSQE,Q07065,214,M,422,71.71,-75.225,-76.396,-77.149,-77.368,-23.796,-23.479,-22.216,-24.573,41.658,40.716,41.163,40.667,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,4,6,8,8,12,27,46,3,0.0,0.0,0.0,0.0,2.000000,2.000000,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0
1,IHQIEYAM[649.3660]EAVK,IHQIEYAM[655.3735]EAVK,7.437563,,,5.649850,,7.006431,5.829334,,5.759984,sp|P25786|PSA1_HUMAN,P25786,PSA1_HUMAN,Proteasome subunit alpha type-1,6.741449e-05,6.336632,4.171247,M26,PSA1_M26,red,MFRNQYDNDVTVWSPQGRIHQIEYAMEAVKQGSATVGLKSKTHAVL...,IHQIEYAMEAVK,18,12,IHQIEYA,7,25,YDNDVTVWSPQGRIHQIEYA,EAVKQGSATVGLKSKTHAVL,P25786,93,M,25,97.37,5.503,4.872,4.148,5.856,-4.913,-4.412,-5.564,-3.771,-7.759,-9.062,-9.775,-9.949,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,6,7,8,10,24,61,123,3,0.0,0.0,0.0,0.0,2.000000,2.238095,2.761905,4.857143,6.238095,7.523810,9.095238,24.238095,59.190476,107.904762,6.238095,0
2,YHTSQSGDEM[649.3660]TSLSEYVSR,YHTSQSGDEM[655.3735]TSLSEYVSR,,3.404261,3.587883,4.018111,3.963818,4.068696,3.961067,3.689166,3.758318,sp|P08238|HS90B_HUMAN,P08238,HS90B_HUMAN,Heat shock protein HSP 90-beta,6.241545e-10,3.806415,9.204708,M466,HS90B_M466,red,MPEEVHHGEEEVETFAFQAEIAQLMSLIINTFYSNKEIFLRELISN...,YHTSQSGDEMTSLSEYVSR,456,19,YHTSQSGDE,9,465,TNRRRLSELLRYHTSQSGDE,TSLSEYVSRMKETQKSIYYI,P08238,49,M,465,94.26,-22.104,-21.074,-19.781,-20.843,-11.517,-11.834,-11.055,-13.275,5.204,6.289,6.033,6.334,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,4,4,5,5,6,17,56,109,4,0.0,0.0,0.0,0.0,2.000000,2.047619,2.428571,3.952381,4.476190,5.523810,6.761905,18.714286,54.571429,106.047619,5.333333,0
3,AIGISNFNHLQVEM[649.3660]ILNKPGLK,AIGISNFNHLQVEM[655.3735]ILNKPGLK,2.572725,2.859067,5.304034,2.905442,2.890081,2.809590,2.710625,2.643557,2.647510,sp|P15121|ALDR_HUMAN,P15121,ALDR_HUMAN,Aldo-keto reductase family 1 member B1,5.401038e-06,3.038070,5.267523,M169,ALDR_M169,red,MASRLLLNNGAKMPILGLGTWKSPPGQVTEAVKVAIDVGYRHIDCA...,AIGISNFNHLQVEMILNKPGLK,155,22,AIGISNFNHLQVE,13,168,VDEGLVKAIGISNFNHLQVE,ILNKPGLKYKPAVNQIECHP,P15121,70,M,168,98.71,20.086,20.577,20.813,19.636,-2.492,-3.338,-2.495,-4.409,6.225,5.043,3.782,4.730,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,5,6,8,8,17,65,104,6,0.0,0.0,0.0,0.0,2.000000,2.047619,2.523810,3.761905,4.666667,6.380952,7.238095,22.333333,70.333333,127.857143,6.619048,0
4,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[649.3660]IMQDK,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[655.3735]IMQDK,,,,,2.259787,,,2.211374,2.647917,sp|P34932|HSP74_HUMAN,P34932,HSP74_HUMAN,Heat shock 70 kDa protein 4,3.372263e-03,2.373026,2.472079,M604,HSP74_M604,red,MSVVGIDLGFQSCYVAVARAGGIETIANEYSDRCTPACISFGPKNR...,TSTVDLPIENQLLWQIDREMLNLYIENEGKMIMQDK,573,36,TSTVDLPIENQLLWQIDREMLNLYIENEGK,30,603,QLLWQIDREMLNLYIENEGK,IMQDKLEKERNDAKNAVEEY,P34932,110,M,603,97.99,-20.599,-20.345,-18.895,-21.281,-8.766,-9.890,-9.871,-9.830,-4.016,-5.031,-5.523,-6.156,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,5,6,8,8,22,64,129,7,0.0,0.0,0.0,0.0,2.000000,2.095238,2.571429,5.380952,6.142857,8.238095,8.523810,19.190476,50.190476,111.476190,5.380952,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,TDFFIGGEEGM[649.3660]AEK,TDFFIGGEEGM[655.3735]AEK,0.008080,-0.318884,0.097640,-0.137602,0.183818,-0.011744,0.116157,0.026499,0.007357,sp|Q9Y266|NUDC_HUMAN,Q9Y266,NUDC_HUMAN,Nuclear migration protein nudC,9.505145e-01,-0.003187,0.022041,M50,NUDC_M50,grey,MGGEQEEERFDGMLLAMAQQHEGGVQELVNTFFSFLRRKTDFFIGG...,TDFFIGGEEGMAEK,39,14,TDFFIGGEEG,10,49,NTFFSFLRRKTDFFIGGEEG,AEKLITQTFSHHNQLAQKTR,Q9Y266,353,M,49,91.20,-15.089,-15.004,-15.535,-15.713,11.110,11.291,10.032,12.490,39.320,40.840,41.542,41.330,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,3,5,6,6,12,23,33,3,0.0,0.0,0.0,0.0,2.000000,2.000000,2.000000,3.285714,4.571429,5.571429,6.000000,11.476190,23.333333,37.857143,2.476190,0
610,EDIERM[649.3660]VQEAEK,EDIERM[655.3735]VQEAEK,-0.909764,,,,,,,,0.822103,sp|P11142|HSP7C_HUMAN,P11142,HSP7C_HUMAN,Heat shock cognate 71 kDa protein,9.678040e-01,-0.043831,0.014213,M518,HSP7C_M518,grey,MSKGPAVGIDLGTTYSCVGVFQHGKVEIIANDQGNRTTPSYVAFTD...,EDIERMVQEAEK,512,12,EDIER,5,517,ENKITITNDKGRLSKEDIER,VQEAEKYKAEDEKQRDKVSS,P11142,58,M,517,85.91,1.504,1.185,0.139,0.703,-10.336,-11.627,-11.327,-12.695,12.828,12.076,11.004,12.959,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,4,6,7,8,8,20,43,110,11,0.0,0.0,0.0,0.0,1.952381,1.952381,2.666667,5.000000,5.285714,6.761905,6.809524,12.857143,36.238095,81.238095,2.857143,0
611,SM[649.3660]PWNVDTLSK,SM[655.3735]PWNVDTLSK,0.276155,-0.230254,0.361472,0.135673,-0.067806,-0.089783,0.210097,-0.544967,-0.083871,sp|Q16543|CDC37_HUMAN,Q16543,CDC37_HUMAN,Hsp90 co-chaperone Cdc37,9.696497e-01,-0.003698,0.013385,M112,CDC37_M112,grey,MVDYSVWDHIEVSDDEDETHPNIDTASLFRWRHQARVERMEQFQKE...,SMPWNVDTLSK,110,11,S,1,111,RKEERSWEQKLEEMRKKEKS,PWNVDTLSKDGFSKSMVNTK,Q16543,257,M,111,82.15,-44.628,-44.223,-42.705,-44.732,-3.314,-2.910,-2.751,-3.908,37.475,38.896,39.034,39.826,BEND,BEND,1,0,0,0,0,0,0,0,0,2,2,2,2,2,2,4,7,23,47,1,0.0,0.0,0.0,0.0,1.809524,1.809524,2.333333,3.476190,3.523810,4.380952,4.857143,9.238095,22.095238,40.380952,1.619048,0
612,LSLQEQDAAIVKNM[649.3660]K,LSLQEQDAAIVKNM[655.3735]K,,,,,,0.222761,,,-0.239224,sp|Q9Y6D9|MD1L1_HUMAN,Q9Y6D9,MD1L1_HUMAN,Mitotic spindle assembly checkpoint protein MAD1,9.773242e-01,-0.008231,0.009961,M245,MD1L1_M245,grey,MEDLGENTMVLSTLRSLNNFISQRVEGGSGLDISTSAPGSLQMQYQ...,LSLQEQDAAIVKNMK,231,15,LSLQEQDAAIVKN,13,244,IKDLEQKLSLQEQDAAIVKN,KSELVRLPRLERELKQLREE,Q9Y6D9,364,M,244,75.60,87.343,87.513,87.320,86.601,-0.769,-0.405,-1.603,0.663,-76.769,-75.283,-74.342,-74.862,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,1,1,2,2,2,2,2,8,14,22,2,0.0,0.0,0.0,0.0,0.619048,0.619048,1.809524,1.809524,2.142857,2.666667,3.000000,8.047619,14.857143,20.857143,1.142857,1


In [24]:
# 4 of the proteins in the CHURRO_2 dataset have mismatched sequences, so we remove them
clean_CHURRO_2 = CHURRO_2_df[CHURRO_2_df['Protein ID'].isin(unique_mismatches) == False].reset_index(drop=True)
clean_CHURRO_2

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,LQHVEDGVLSM[649.3660]QVASAR,LQHVEDGVLSM[655.3735]QVASAR,,,,7.233420,6.853712,6.550813,,5.946325,...,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0
1,IHQIEYAM[649.3660]EAVK,IHQIEYAM[655.3735]EAVK,7.437563,,,5.649850,,7.006431,5.829334,,...,2.761905,4.857143,6.238095,7.523810,9.095238,24.238095,59.190476,107.904762,6.238095,0
2,YHTSQSGDEM[649.3660]TSLSEYVSR,YHTSQSGDEM[655.3735]TSLSEYVSR,,3.404261,3.587883,4.018111,3.963818,4.068696,3.961067,3.689166,...,2.428571,3.952381,4.476190,5.523810,6.761905,18.714286,54.571429,106.047619,5.333333,0
3,AIGISNFNHLQVEM[649.3660]ILNKPGLK,AIGISNFNHLQVEM[655.3735]ILNKPGLK,2.572725,2.859067,5.304034,2.905442,2.890081,2.809590,2.710625,2.643557,...,2.523810,3.761905,4.666667,6.380952,7.238095,22.333333,70.333333,127.857143,6.619048,0
4,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[649.3660]IMQDK,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[655.3735]IMQDK,,,,,2.259787,,,2.211374,...,2.571429,5.380952,6.142857,8.238095,8.523810,19.190476,50.190476,111.476190,5.380952,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,TDFFIGGEEGM[649.3660]AEK,TDFFIGGEEGM[655.3735]AEK,0.008080,-0.318884,0.097640,-0.137602,0.183818,-0.011744,0.116157,0.026499,...,2.000000,3.285714,4.571429,5.571429,6.000000,11.476190,23.333333,37.857143,2.476190,0
606,EDIERM[649.3660]VQEAEK,EDIERM[655.3735]VQEAEK,-0.909764,,,,,,,,...,2.666667,5.000000,5.285714,6.761905,6.809524,12.857143,36.238095,81.238095,2.857143,0
607,SM[649.3660]PWNVDTLSK,SM[655.3735]PWNVDTLSK,0.276155,-0.230254,0.361472,0.135673,-0.067806,-0.089783,0.210097,-0.544967,...,2.333333,3.476190,3.523810,4.380952,4.857143,9.238095,22.095238,40.380952,1.619048,0
608,LSLQEQDAAIVKNM[649.3660]K,LSLQEQDAAIVKNM[655.3735]K,,,,,,0.222761,,,...,1.809524,1.809524,2.142857,2.666667,3.000000,8.047619,14.857143,20.857143,1.142857,1


In [25]:
CHURRO_psi_and_psi_map = extract_PSI_and_PHI(clean_CHURRO_2, concat_dihedrals)
CHURRO_psi_and_psi_map

  df = df._append(new_row, ignore_index = True)


Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,-0.781354,-1.069864,-44.768303,-61.298705
1,-0.587313,-1.102210,-33.650528,-63.151991
2,2.594126,-1.566903,148.632480,-89.776905
3,-0.789571,-1.016269,-45.239094,-58.227902
4,-0.710617,-1.157256,-40.715336,-66.305895
...,...,...,...,...
605,-0.692984,-1.066275,-39.705080,-61.093071
606,-0.622047,-1.144587,-35.640643,-65.580007
607,2.254328,-1.198355,129.163484,-68.660701
608,-0.646178,-1.162867,-37.023279,-66.627367


In [26]:
sum(CHURRO_psi_and_psi_map['PHI Degrees'].isna())

0

In [27]:
CHURRO_2_with_PSI_and_PHI = pd.concat([clean_CHURRO_2, CHURRO_psi_and_psi_map], axis=1)
CHURRO_2_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,LQHVEDGVLSM[649.3660]QVASAR,LQHVEDGVLSM[655.3735]QVASAR,,,,7.233420,6.853712,6.550813,,5.946325,...,7.523810,11.952381,23.952381,43.190476,2.333333,0,-0.781354,-1.069864,-44.768303,-61.298705
1,IHQIEYAM[649.3660]EAVK,IHQIEYAM[655.3735]EAVK,7.437563,,,5.649850,,7.006431,5.829334,,...,9.095238,24.238095,59.190476,107.904762,6.238095,0,-0.587313,-1.102210,-33.650528,-63.151991
2,YHTSQSGDEM[649.3660]TSLSEYVSR,YHTSQSGDEM[655.3735]TSLSEYVSR,,3.404261,3.587883,4.018111,3.963818,4.068696,3.961067,3.689166,...,6.761905,18.714286,54.571429,106.047619,5.333333,0,2.594126,-1.566903,148.632480,-89.776905
3,AIGISNFNHLQVEM[649.3660]ILNKPGLK,AIGISNFNHLQVEM[655.3735]ILNKPGLK,2.572725,2.859067,5.304034,2.905442,2.890081,2.809590,2.710625,2.643557,...,7.238095,22.333333,70.333333,127.857143,6.619048,0,-0.789571,-1.016269,-45.239094,-58.227902
4,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[649.3660]IMQDK,TSTVDLPIENQLLWQIDREMLNLYIENEGKM[655.3735]IMQDK,,,,,2.259787,,,2.211374,...,8.523810,19.190476,50.190476,111.476190,5.380952,0,-0.710617,-1.157256,-40.715336,-66.305895
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,TDFFIGGEEGM[649.3660]AEK,TDFFIGGEEGM[655.3735]AEK,0.008080,-0.318884,0.097640,-0.137602,0.183818,-0.011744,0.116157,0.026499,...,6.000000,11.476190,23.333333,37.857143,2.476190,0,-0.692984,-1.066275,-39.705080,-61.093071
606,EDIERM[649.3660]VQEAEK,EDIERM[655.3735]VQEAEK,-0.909764,,,,,,,,...,6.809524,12.857143,36.238095,81.238095,2.857143,0,-0.622047,-1.144587,-35.640643,-65.580007
607,SM[649.3660]PWNVDTLSK,SM[655.3735]PWNVDTLSK,0.276155,-0.230254,0.361472,0.135673,-0.067806,-0.089783,0.210097,-0.544967,...,4.857143,9.238095,22.095238,40.380952,1.619048,0,2.254328,-1.198355,129.163484,-68.660701
608,LSLQEQDAAIVKNM[649.3660]K,LSLQEQDAAIVKNM[655.3735]K,,,,,,0.222761,,,...,3.000000,8.047619,14.857143,20.857143,1.142857,1,-0.646178,-1.162867,-37.023279,-66.627367


In [28]:
#CHURRO_2_with_PSI_and_PHI.to_csv('ChURRO_2_with_alphafold_and_PSI_and_PHI_angles.csv')

# Ramachandran Analysis - ChURRO_3 Dataset

In [29]:
path = os.path.join(datasets_path, "ChURRO_3_with_alphafold.csv")
CHURRO_3_df = pd.read_csv(path).drop(columns = ['Unnamed: 0'])

pd.set_option('display.max_columns', None)
display(CHURRO_3_df)
pd.reset_option('display.max_columns')

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,3_3 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pval,average,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,SGEHDFGAAFDGDGDRNM[661.3660]ILGK,SGEHDFGAAFDGDGDRNM[667.3735]ILGK,,,4.117889,,,,,4.727480,,sp|P36871|PGM1_HUMAN,P36871,PGM1_HUMAN,PGM1,Phosphoglucomutase-1,0.043804,4.422685,M295,PGM1_M295,red,MVKIVTVKTQAYQDQKPGTSGLRKRVKVFQSSANYAENFIQSIIST...,SGEHDFGAAFDGDGDRNMILGK,277,22,SGEHDFGAAFDGDGDRN,17,294,TMKSGEHDFGAAFDGDGDRN,ILGKHGFFVNPSDSVAVIAA,P36871,217,M,294,98.84,-1.027,-0.722,-1.323,-1.238,4.103,2.917,1.622,3.187,-11.604,-10.699,-11.246,-9.357,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,2,5,6,7,9,39,116,230,8,0.0,0.0,0.0,0.0,2.047619,2.142857,3.238095,4.666667,6.809524,8.047619,10.047619,35.095238,102.000000,202.285714,9.095238,0
1,M[661.3660]LESYLHAK,M[667.3735]LESYLHAK,,,,,,,,1.845600,2.022412,sp|Q86X55|CARM1_HUMAN,Q86X55,CARM1_HUMAN,CARM1,Histone-arginine methyltransferase CARM1,0.029081,1.934006,M268,CARM1_M268,red,MAAAAAAVGPGAGGAGSAVPGGAGPCATVSVFPGARLLTIGDANGE...,MLESYLHAK,267,9,,0,267,PEQVDIIISEPMGYMLFNER,LESYLHAKKYLKPSGNMFPT,Q86X55,476,M,267,97.44,-0.831,-1.686,-2.607,-0.865,6.147,6.666,5.590,7.244,6.088,4.920,4.328,3.848,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,6,7,7,25,96,193,7,0.0,0.0,0.0,0.0,2.000000,2.095238,2.666667,4.571429,5.523810,6.809524,7.904762,25.904762,86.714286,172.285714,8.285714,0
2,AHSIQIM[661.3660]K,AHSIQIM[667.3735]K,,1.517974,1.695327,,,,,,,sp|Q02543|RL18A_HUMAN,Q02543,RL18A_HUMAN,RPL18A,Large ribosomal subunit protein eL20,0.035102,1.606651,M127,RL18A_M127,red,MKASGTLREYKVVGRCLPTPKCHTPPLYRMRIFAPNHVVAKSRFWY...,AHSIQIMK,120,8,AHSIQI,6,126,TQCYRDMGARHRARAHSIQI,KVEEIAASKCRRPAVKQFHD,Q02543,363,M,126,96.15,-1.126,-0.535,0.414,0.181,7.314,6.317,7.053,5.236,8.863,9.858,10.811,9.175,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,2,3,4,5,5,19,61,118,6,0.0,0.0,0.0,0.0,2.000000,2.095238,2.285714,3.095238,4.285714,5.333333,6.666667,17.952381,49.857143,82.714286,4.000000,0
3,GFQQILAGEYDHLPEQAFYM[661.3660]VGPIEEAVAK,GFQQILAGEYDHLPEQAFYM[667.3735]VGPIEEAVAK,,1.587855,1.706229,,,,,1.419860,,sp|P06576|ATPB_HUMAN,P06576,ATPB_HUMAN,ATP5F1B,"ATP synthase subunit beta, mitochondrial",0.002784,1.571315,M509,ATPB_M509,red,MLGFVGRVAAAPASGALRRLTPSASLPPAQLLLRAAPTAVHPVRDY...,GFQQILAGEYDHLPEQAFYMVGPIEEAVAK,489,30,GFQQILAGEYDHLPEQAFY,19,508,KGFQQILAGEYDHLPEQAFY,VGPIEEAVAKADKLAEEHSS,P06576,87,M,508,91.57,-17.173,-17.762,-17.615,-19.156,8.802,9.340,10.868,8.922,19.938,18.620,18.481,18.387,BEND,BEND,1,0,0,0,0,0,0,0,0,2,2,3,4,4,5,6,23,68,126,3,0.0,0.0,0.0,0.0,2.000000,2.000000,2.238095,4.095238,5.190476,6.000000,6.904762,19.380952,55.857143,99.571429,5.047619,0
4,NLKPIKPM[661.3660]QFLGDEETVRK,NLKPIKPM[667.3735]QFLGDEETVRK,1.806439,1.527960,1.332013,,,,1.576976,1.474839,1.359018,sp|P18669|PGAM1_HUMAN,P18669,PGAM1_HUMAN,PGAM1,Phosphoglycerate mutase 1,0.000004,1.512874,M230,PGAM1_M230,red,MAAYKLVLIRHGESAWNLENRFSGWYDADLSPAGHEEAKRGGQALR...,NLKPIKPMQFLGDEETVRK,222,19,NLKPIKP,7,229,LPTGIPIVYELDKNLKPIKP,QFLGDEETVRKAMEAVAAQG,P18669,152,M,229,93.80,-7.874,-6.929,-6.079,-6.067,-8.180,-7.138,-6.413,-7.763,9.483,10.102,9.040,11.107,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,3,4,5,6,13,47,112,2,0.0,0.0,0.0,0.0,2.000000,2.000000,2.285714,3.476190,4.428571,5.952381,6.619048,15.095238,42.142857,81.523810,2.904762,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1170,LVSDGQALPEM[661.3660]EIHLQTNAEK,LVSDGQALPEM[667.3735]EIHLQTNAEK,,0.549687,0.592261,,,,-0.680663,-0.187379,-0.281744,sp|Q12931|TRAP1_HUMAN,Q12931,TRAP1_HUMAN,TRAP1,"Heat shock protein 75 kDa, mitochondrial",0.995261,-0.001567,M141,TRAP1_M141,grey,MARELRALLLWGRRLRPLLRAPALAAVPGGKPILCPRRTTAQLGPR...,LVSDGQALPEMEIHLQTNAEK,130,21,LVSDGQALPE,10,140,SDALEKLRHKLVSDGQALPE,EIHLQTNAEKGTITIQDTGI,Q12931,386,M,140,95.33,26.111,26.372,25.139,27.520,-3.099,-3.904,-4.733,-4.789,-23.593,-24.857,-25.229,-24.657,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,2,3,5,7,21,55,108,8,0.0,0.0,0.0,0.0,2.000000,2.000000,2.619048,3.190476,4.428571,5.714286,6.714286,15.904762,41.190476,79.857143,2.809524,0
1171,SSIHNFM[661.3660]THPEFR,SSIHNFM[667.3735]THPEFR,,-0.278141,,,,,,0.282283,,sp|P20020|AT2B1_HUMAN,P20020,AT2B1_HUMAN,ATP2B1,Plasma membrane calcium-transporting ATPase 1,0.995294,0.002071,M1145,AT2B1_M1145,grey,MGDMANNSVAYSGVKNSLKEANHDGDFGITLAELRALMELRSTDAL...,SSIHNFMTHPEFR,1138,13,SSIHNF,6,1144,RSSLYEGLEKPESRSSIHNF,THPEFRIEDSEPHIPLIDDT,P20020,157,M,1144,28.74,-14.281,-14.253,-12.869,-15.346,-11.525,-11.231,-11.582,-11.981,-10.681,-9.174,-8.572,-8.527,unstructured,unstructured,0,0,0,0,1,0,0,0,0,1,1,2,2,2,2,2,4,8,11,0,0.0,0.0,0.0,0.0,1.238095,1.333333,2.000000,2.000000,2.000000,2.000000,2.047619,4.095238,7.285714,10.857143,0.000000,1
1172,AAFTVSLDPGPLEQFPHSM[661.3660]EPQLR,AAFTVSLDPGPLEQFPHSM[667.3735]EPQLR,,0.408145,,,,,,-0.412637,,sp|Q9UKD2|MRT4_HUMAN,Q9UKD2,MRT4_HUMAN,MRTO4,mRNA turnover protein 4 homolog,0.996516,-0.002246,M149,MRT4_M149,grey,MPKSKRDKKVSLTKTAKKGLELKQNLIEELRKCVDTYKYLFIFSVA...,AAFTVSLDPGPLEQFPHSMEPQLR,130,24,AAFTVSLDPGPLEQFPHS,18,148,NKAAFTVSLDPGPLEQFPHS,EPQLRQLGLPTALKRGVVTL,Q9UKD2,621,M,148,95.86,17.478,17.175,17.488,17.878,1.980,0.493,0.047,-0.357,-23.544,-23.384,-21.952,-24.347,HELX_RH_3T_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,3,6,7,7,11,37,57,2,0.0,0.0,0.0,0.0,2.000000,2.047619,2.285714,3.523810,4.761905,5.809524,6.476190,14.714286,38.761905,57.476190,3.047619,0
1173,AIGVLTSGGDAQGM[661.3660]NAAVR,AIGVLTSGGDAQGM[667.3735]NAAVR,,0.003958,-0.003922,,,,,,,sp|P17858|PFKAL_HUMAN,P17858,PFKAL_HUMAN,PFKL,"ATP-dependent 6-phosphofructokinase, liver type",0.997051,0.000018,M30,PFKAL_M30,grey,MAAVDLEKLRASGAGKAIGVLTSGGDAQGMNAAVRAVTRMGIYVGA...,AIGVLTSGGDAQGMNAAVR,16,19,AIGVLTSGGDAQG,13,29,RASGAGKAIGVLTSGGDAQG,NAAVRAVTRMGIYVGAKVFL,P17858,148,M,29,98.19,13.577,13.422,14.803,12.705,0.055,1.274,1.862,0.944,-8.847,-7.922,-7.603,-6.678,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,4,6,9,12,45,102,213,13,0.0,0.0,0.0,0.0,2.000000,2.047619,2.571429,4.476190,6.428571,7.904762,9.666667,35.047619,96.285714,192.095238,10.952381,0


In [30]:
# 10 of the proteins in the CHURRO_3 dataset have mismatched sequences, so we remove them
clean_CHURRO_3 = CHURRO_3_df[CHURRO_3_df['Protein ID'].isin(unique_mismatches) == False].reset_index(drop = True)
clean_CHURRO_3

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,SGEHDFGAAFDGDGDRNM[661.3660]ILGK,SGEHDFGAAFDGDGDRNM[667.3735]ILGK,,,4.117889,,,,,4.727480,...,3.238095,4.666667,6.809524,8.047619,10.047619,35.095238,102.000000,202.285714,9.095238,0
1,M[661.3660]LESYLHAK,M[667.3735]LESYLHAK,,,,,,,,1.845600,...,2.666667,4.571429,5.523810,6.809524,7.904762,25.904762,86.714286,172.285714,8.285714,0
2,AHSIQIM[661.3660]K,AHSIQIM[667.3735]K,,1.517974,1.695327,,,,,,...,2.285714,3.095238,4.285714,5.333333,6.666667,17.952381,49.857143,82.714286,4.000000,0
3,GFQQILAGEYDHLPEQAFYM[661.3660]VGPIEEAVAK,GFQQILAGEYDHLPEQAFYM[667.3735]VGPIEEAVAK,,1.587855,1.706229,,,,,1.419860,...,2.238095,4.095238,5.190476,6.000000,6.904762,19.380952,55.857143,99.571429,5.047619,0
4,NLKPIKPM[661.3660]QFLGDEETVRK,NLKPIKPM[667.3735]QFLGDEETVRK,1.806439,1.527960,1.332013,,,,1.576976,1.474839,...,2.285714,3.476190,4.428571,5.952381,6.619048,15.095238,42.142857,81.523810,2.904762,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1160,LVSDGQALPEM[661.3660]EIHLQTNAEK,LVSDGQALPEM[667.3735]EIHLQTNAEK,,0.549687,0.592261,,,,-0.680663,-0.187379,...,2.619048,3.190476,4.428571,5.714286,6.714286,15.904762,41.190476,79.857143,2.809524,0
1161,SSIHNFM[661.3660]THPEFR,SSIHNFM[667.3735]THPEFR,,-0.278141,,,,,,0.282283,...,2.000000,2.000000,2.000000,2.000000,2.047619,4.095238,7.285714,10.857143,0.000000,1
1162,AAFTVSLDPGPLEQFPHSM[661.3660]EPQLR,AAFTVSLDPGPLEQFPHSM[667.3735]EPQLR,,0.408145,,,,,,-0.412637,...,2.285714,3.523810,4.761905,5.809524,6.476190,14.714286,38.761905,57.476190,3.047619,0
1163,AIGVLTSGGDAQGM[661.3660]NAAVR,AIGVLTSGGDAQGM[667.3735]NAAVR,,0.003958,-0.003922,,,,,,...,2.571429,4.476190,6.428571,7.904762,9.666667,35.047619,96.285714,192.095238,10.952381,0


In [31]:
CHURRO_3_psi_and_psi_map = extract_PSI_and_PHI(clean_CHURRO_3, concat_dihedrals)
CHURRO_3_psi_and_psi_map

  df = df._append(new_row, ignore_index = True)


Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,2.442435,-1.758809,139.941233,-100.772305
1,-0.454934,-1.334826,-26.065813,-76.479890
2,-0.818994,-1.599069,-46.924893,-91.619911
3,1.027626,0.907058,58.878618,51.970601
4,2.457769,-0.946585,140.819803,-54.235342
...,...,...,...,...
1160,1.828180,-1.895589,104.746982,-108.609264
1161,1.526607,-3.103464,87.468132,-177.815396
1162,-0.218514,-1.480004,-12.519931,-84.797997
1163,-0.723987,-0.939829,-41.481381,-53.848240


In [32]:
sum(CHURRO_3_psi_and_psi_map['PHI Degrees'].isna())

0

In [33]:
CHURRO_3_with_PSI_and_PHI = pd.concat([clean_CHURRO_3, CHURRO_3_psi_and_psi_map], axis=1)
CHURRO_3_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,SGEHDFGAAFDGDGDRNM[661.3660]ILGK,SGEHDFGAAFDGDGDRNM[667.3735]ILGK,,,4.117889,,,,,4.727480,...,10.047619,35.095238,102.000000,202.285714,9.095238,0,2.442435,-1.758809,139.941233,-100.772305
1,M[661.3660]LESYLHAK,M[667.3735]LESYLHAK,,,,,,,,1.845600,...,7.904762,25.904762,86.714286,172.285714,8.285714,0,-0.454934,-1.334826,-26.065813,-76.479890
2,AHSIQIM[661.3660]K,AHSIQIM[667.3735]K,,1.517974,1.695327,,,,,,...,6.666667,17.952381,49.857143,82.714286,4.000000,0,-0.818994,-1.599069,-46.924893,-91.619911
3,GFQQILAGEYDHLPEQAFYM[661.3660]VGPIEEAVAK,GFQQILAGEYDHLPEQAFYM[667.3735]VGPIEEAVAK,,1.587855,1.706229,,,,,1.419860,...,6.904762,19.380952,55.857143,99.571429,5.047619,0,1.027626,0.907058,58.878618,51.970601
4,NLKPIKPM[661.3660]QFLGDEETVRK,NLKPIKPM[667.3735]QFLGDEETVRK,1.806439,1.527960,1.332013,,,,1.576976,1.474839,...,6.619048,15.095238,42.142857,81.523810,2.904762,0,2.457769,-0.946585,140.819803,-54.235342
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1160,LVSDGQALPEM[661.3660]EIHLQTNAEK,LVSDGQALPEM[667.3735]EIHLQTNAEK,,0.549687,0.592261,,,,-0.680663,-0.187379,...,6.714286,15.904762,41.190476,79.857143,2.809524,0,1.828180,-1.895589,104.746982,-108.609264
1161,SSIHNFM[661.3660]THPEFR,SSIHNFM[667.3735]THPEFR,,-0.278141,,,,,,0.282283,...,2.047619,4.095238,7.285714,10.857143,0.000000,1,1.526607,-3.103464,87.468132,-177.815396
1162,AAFTVSLDPGPLEQFPHSM[661.3660]EPQLR,AAFTVSLDPGPLEQFPHSM[667.3735]EPQLR,,0.408145,,,,,,-0.412637,...,6.476190,14.714286,38.761905,57.476190,3.047619,0,-0.218514,-1.480004,-12.519931,-84.797997
1163,AIGVLTSGGDAQGM[661.3660]NAAVR,AIGVLTSGGDAQGM[667.3735]NAAVR,,0.003958,-0.003922,,,,,,...,9.666667,35.047619,96.285714,192.095238,10.952381,0,-0.723987,-0.939829,-41.481381,-53.848240


In [34]:
#CHURRO_3_with_PSI_and_PHI.to_csv('ChURRO_3_with_alphafold_and_PSI_and_PHI_angles.csv', index=False)

# Ramachandran Analysis - ChURRO_4 Dataset

In [35]:
path = os.path.join(datasets_path, "ChURRO_4_with_alphafold.csv")
CHURRO_4_df = pd.read_csv(path).drop(columns = ['Unnamed: 0'])

pd.set_option('display.max_columns', None)
display(CHURRO_4_df)
pd.reset_option('display.max_columns')

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,avg ratio,neglogpval,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,LQHVEDGVLSM[716.3718]QVASAR,LQHVEDGVLSM[722.3793]QVASAR,,4.656273,,,4.133012,4.551815,,4.242820,sp|Q07065|CKAP4_HUMAN,Q07065,CKAP4_HUMAN,CKAP4,Cytoskeleton-associated protein 4,4.940850e-05,4.395980,4.306198,M423,CKAP4_M423,red,MPSAKQRGSKGGHGAASPSEKGAHPSGGADDVAKKPPPAPQQPPPP...,LQHVEDGVLSMQVASAR,412,17,LQHVEDGVLS,10,422,QQKSQGLDSRLQHVEDGVLS,QVASARQTESLESLLSKSQE,Q07065,230,M,422,71.71,-75.225,-76.396,-77.149,-77.368,-23.796,-23.479,-22.216,-24.573,41.658,40.716,41.163,40.667,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,4,6,8,8,12,27,46,3,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0
1,NLKPIKPM[716.3718]QFLGDEETVRK,NLKPIKPM[722.3793]QFLGDEETVRK,3.769576,4.084958,3.440390,3.120784,3.223466,3.237076,3.317560,3.738827,sp|P18669|PGAM1_HUMAN,P18669,PGAM1_HUMAN,PGAM1,Phosphoglycerate mutase 1,1.403240e-08,3.491580,7.852868,M230,PGAM1_M230,red,MAAYKLVLIRHGESAWNLENRFSGWYDADLSPAGHEEAKRGGQALR...,NLKPIKPMQFLGDEETVRK,222,19,NLKPIKP,7,229,LPTGIPIVYELDKNLKPIKP,QFLGDEETVRKAMEAVAAQG,P18669,92,M,229,93.80,-7.874,-6.929,-6.079,-6.067,-8.180,-7.138,-6.413,-7.763,9.483,10.102,9.040,11.107,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,3,4,5,6,13,47,112,2,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.285714,3.476190,4.428571,5.952381,6.619048,15.095238,42.142857,81.523810,2.904762,0
2,EEELKDIQNM[716.3718]NFLLK,EEELKDIQNM[722.3793]NFLLK,3.013929,3.156209,2.977990,3.132468,2.965553,3.016646,2.986863,3.034743,sp|Q86UP2|KTN1_HUMAN,Q86UP2,KTN1_HUMAN,KTN1,Kinectin,0.000000e+00,3.035550,12.153380,M643,KTN1_M643,red,MEFYESAYFIVLIPSIVITVIFLFFWLFMKETLYDEVLAKQKREQK...,EEELKDIQNMNFLLK,633,15,EEELKDIQN,9,642,LASERDRLTSKEEELKDIQN,NFLLKAEVQKLQALANEQAA,Q86UP2,297,M,642,83.74,3.531,4.540,5.906,4.069,7.059,6.902,6.443,5.983,2.448,3.590,3.058,4.628,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,7,8,8,12,20,27,2,0.0,0.0,0.0,0.000000,2.000000,2.000000,3.142857,5.285714,6.333333,7.714286,7.714286,12.000000,19.809524,26.952381,2.047619,1
3,NLLHVTDTGVGM[716.3718]TREELVK,NLLHVTDTGVGM[722.3793]TREELVK,2.985369,3.111739,,,2.774224,,,,sp|P14625|ENPL_HUMAN,P14625,ENPL_HUMAN,HSP90B1,Endoplasmin,1.106589e-03,2.957111,2.956014,M154,ENPL_M154,red,MRALWVLGLCCVLLTFGSVRADDEVDVDGTVEEDLGKSREGSRTDD...,NLLHVTDTGVGMTREELVK,142,19,NLLHVTDTGVG,11,153,VKIKCDKEKNLLHVTDTGVG,TREELVKNLGTIAKSGTSEF,P14625,83,M,153,96.50,25.719,24.545,24.625,24.534,11.862,11.609,12.561,10.213,-37.253,-36.310,-35.107,-35.863,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,3,4,6,7,9,27,80,131,8,0.0,0.0,0.0,0.000000,2.095238,2.285714,2.666667,4.000000,6.000000,7.238095,8.809524,23.428571,67.428571,128.190476,5.476190,0
4,M[716.3718]HTTFEHDIQALGTQVR,M[722.3793]HTTFEHDIQALGTQVR,,1.828016,,,2.890468,2.740659,3.560620,1.773127,sp|Q01082|SPTB2_HUMAN,Q01082,SPTB2_HUMAN,SPTBN1,"Spectrin beta chain, non-erythrocytic 1",1.650558e-03,2.558578,2.782369,M1845,SPTB2_M1845,red,MTTTVATDYDNIEIQQQYSDVNNRWDVDDWDNENSSARLFERSRIK...,MHTTFEHDIQALGTQVR,1844,17,,0,1844,HKKLPEELGRDQNTVETLQR,HTTFEHDIQALGTQVRQLQE,Q01082,220,M,1844,85.93,72.329,73.529,74.723,73.882,-13.480,-14.425,-13.633,-15.106,-49.841,-49.683,-49.137,-50.942,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,6,8,8,10,37,57,1,0.0,0.0,0.0,0.000000,1.952381,1.952381,2.238095,4.190476,4.761905,5.761905,6.238095,12.476190,34.809524,55.095238,2.476190,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688,SGEGEVSGLM[716.3718]R,SGEGEVSGLM[722.3793]R,-0.297660,-0.280111,-0.458942,,-0.265283,0.388757,0.583342,0.346406,sp|Q13263|TIF1B_HUMAN,Q13263,TIF1B_HUMAN,TRIM28,Transcription intermediary factor 1-beta,9.886334e-01,0.002359,0.004965,M482,TIF1B_M482,grey,MAASAAAASAAAASAASGSPGPGEGSAGGEKRSTAPSAAASASASA...,SGEGEVSGLMR,472,11,SGEGEVSGL,9,481,EPHVSGVKRSRSGEGEVSGL,RKVPRVSLERLDLDLTADSQ,Q13263,240,M,481,34.62,-35.518,-35.522,-36.163,-36.205,5.698,4.831,3.465,5.523,44.880,46.149,45.852,47.251,unstructured,unstructured,0,0,0,0,1,0,0,0,0,1,1,2,2,2,2,2,4,6,10,0,0.0,0.0,0.0,0.047619,1.380952,1.428571,2.000000,2.000000,2.000000,2.000000,2.047619,4.047619,7.142857,10.380952,0.000000,1
689,FLRLM[716.3718]GAGK,FLRLM[722.3793]GAGK,,,,0.125250,,,,-0.121592,sp|O00193|SMAP_HUMAN,O00193,SMAP_HUMAN,SMAP,Small acidic protein,9.905643e-01,0.001829,0.004117,M45,SMAP_M45,grey,MSAARESHPHGVKRSASPDDDLGSSNWEAADLGNEERKQKFLRLMG...,FLRLMGAGK,40,9,FLRL,4,44,SNWEAADLGNEERKQKFLRL,GAGKKEHTGRLVIGDHKSTS,O00193,7,M,44,85.42,18.593,18.646,20.069,17.688,4.899,3.435,2.869,2.571,-13.774,-13.315,-13.476,-14.015,TURN_TY1_P,TURN,0,0,0,1,0,0,0,0,0,2,2,2,3,4,5,5,10,22,29,3,0.0,0.0,0.0,0.000000,1.238095,1.238095,2.142857,3.285714,3.857143,4.666667,4.761905,8.619048,16.476190,22.238095,1.666667,1
690,SEIEYYAM[716.3718]LAK,SEIEYYAM[722.3793]LAK,-0.315793,,,,0.323492,,,,sp|P62888|RL30_HUMAN,P62888,RL30_HUMAN,RPL30,Large ribosomal subunit protein eL30,9.923337e-01,0.003849,0.003342,M65,RL30_M65,grey,MVAAKKTKKSLESINSRLQLVMKSGKYVLGYKQTLKMIRQGKAKLV...,SEIEYYAMLAK,57,11,SEIEYYA,7,64,LVILANNCPALRKSEIEYYA,LAKTGVHHYSGNNIELGTAC,P62888,199,M,64,94.45,-12.994,-11.826,-12.078,-10.564,-3.793,-4.202,-5.568,-4.248,-0.635,-1.542,-2.203,-0.798,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,6,7,8,8,13,37,64,3,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.333333,3.809524,5.285714,6.619048,7.238095,15.857143,45.571429,73.904762,4.238095,0
691,EYWM[716.3718]DPEGEMKPGRK,EYWM[722.3793]DPEGEMKPGRK,,0.781025,,,,-0.132193,,-0.640560,sp|P53999|TCP4_HUMAN,P53999,TCP4_HUMAN,SUB1,Activated RNA polymerase II transcriptional co...,9.953121e-01,0.002757,0.002041,M90,TCP4_M90,grey,MPKSKELVSSSSSGSDSDSEVDKKLKRKKQVAPEKPVKKQKTGETS...,EYWMDPEGEMKPGRK,86,15,EYW,3,89,RYVSVRDFKGKVLIDIREYW,DPEGEMKPGRKGISLNPEQW,P53999,169,M,89,96.07,3.836,2.761,2.640,3.104,5.488,6.273,7.685,6.311,-20.202,-19.470,-20.059,-18.046,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,3,4,4,6,8,12,22,34,1,0.0,0.0,0.0,0.000000,2.000000,2.190476,2.476190,3.571429,4.380952,6.142857,7.476190,14.619048,28.476190,42.333333,1.761905,0


In [36]:
# 6 of the proteins in the CHURRO_4 dataset have mismatched sequences, so we remove them
clean_CHURRO_4 = CHURRO_4_df[CHURRO_4_df['Protein ID'].isin(unique_mismatches) == False].reset_index(drop = True)
clean_CHURRO_4

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,...,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,LQHVEDGVLSM[716.3718]QVASAR,LQHVEDGVLSM[722.3793]QVASAR,,4.656273,,,4.133012,4.551815,,4.242820,...,2.428571,4.476190,5.666667,7.476190,7.523810,11.952381,23.952381,43.190476,2.333333,0
1,NLKPIKPM[716.3718]QFLGDEETVRK,NLKPIKPM[722.3793]QFLGDEETVRK,3.769576,4.084958,3.440390,3.120784,3.223466,3.237076,3.317560,3.738827,...,2.285714,3.476190,4.428571,5.952381,6.619048,15.095238,42.142857,81.523810,2.904762,0
2,EEELKDIQNM[716.3718]NFLLK,EEELKDIQNM[722.3793]NFLLK,3.013929,3.156209,2.977990,3.132468,2.965553,3.016646,2.986863,3.034743,...,3.142857,5.285714,6.333333,7.714286,7.714286,12.000000,19.809524,26.952381,2.047619,1
3,NLLHVTDTGVGM[716.3718]TREELVK,NLLHVTDTGVGM[722.3793]TREELVK,2.985369,3.111739,,,2.774224,,,,...,2.666667,4.000000,6.000000,7.238095,8.809524,23.428571,67.428571,128.190476,5.476190,0
4,M[716.3718]HTTFEHDIQALGTQVR,M[722.3793]HTTFEHDIQALGTQVR,,1.828016,,,2.890468,2.740659,3.560620,1.773127,...,2.238095,4.190476,4.761905,5.761905,6.238095,12.476190,34.809524,55.095238,2.476190,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
682,SGEGEVSGLM[716.3718]R,SGEGEVSGLM[722.3793]R,-0.297660,-0.280111,-0.458942,,-0.265283,0.388757,0.583342,0.346406,...,2.000000,2.000000,2.000000,2.000000,2.047619,4.047619,7.142857,10.380952,0.000000,1
683,FLRLM[716.3718]GAGK,FLRLM[722.3793]GAGK,,,,0.125250,,,,-0.121592,...,2.142857,3.285714,3.857143,4.666667,4.761905,8.619048,16.476190,22.238095,1.666667,1
684,SEIEYYAM[716.3718]LAK,SEIEYYAM[722.3793]LAK,-0.315793,,,,0.323492,,,,...,2.333333,3.809524,5.285714,6.619048,7.238095,15.857143,45.571429,73.904762,4.238095,0
685,EYWM[716.3718]DPEGEMKPGRK,EYWM[722.3793]DPEGEMKPGRK,,0.781025,,,,-0.132193,,-0.640560,...,2.476190,3.571429,4.380952,6.142857,7.476190,14.619048,28.476190,42.333333,1.761905,0


In [37]:
CHURRO_4_psi_and_psi_map = extract_PSI_and_PHI(clean_CHURRO_4, concat_dihedrals)
CHURRO_4_psi_and_psi_map

  df = df._append(new_row, ignore_index = True)


Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,-0.781354,-1.069864,-44.768303,-61.298705
1,2.457769,-0.946585,140.819803,-54.235342
2,-0.668732,-1.148097,-38.315541,-65.781106
3,2.397712,-2.288282,137.378791,-131.108894
4,-0.593221,-1.111664,-33.989066,-63.693630
...,...,...,...,...
682,2.148378,-1.089016,123.092976,-62.396023
683,0.036523,-1.320989,2.092631,-75.687091
684,-0.839279,-1.031756,-48.087119,-59.115286
685,2.227214,-1.584730,127.609944,-90.798340


In [38]:
sum(CHURRO_4_psi_and_psi_map['PHI Degrees'].isna())

0

In [39]:
CHURRO_4_with_PSI_and_PHI = pd.concat([clean_CHURRO_4, CHURRO_4_psi_and_psi_map], axis=1)
CHURRO_4_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,1_4 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,2_4 Log2 Ratio HL,...,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,LQHVEDGVLSM[716.3718]QVASAR,LQHVEDGVLSM[722.3793]QVASAR,,4.656273,,,4.133012,4.551815,,4.242820,...,7.523810,11.952381,23.952381,43.190476,2.333333,0,-0.781354,-1.069864,-44.768303,-61.298705
1,NLKPIKPM[716.3718]QFLGDEETVRK,NLKPIKPM[722.3793]QFLGDEETVRK,3.769576,4.084958,3.440390,3.120784,3.223466,3.237076,3.317560,3.738827,...,6.619048,15.095238,42.142857,81.523810,2.904762,0,2.457769,-0.946585,140.819803,-54.235342
2,EEELKDIQNM[716.3718]NFLLK,EEELKDIQNM[722.3793]NFLLK,3.013929,3.156209,2.977990,3.132468,2.965553,3.016646,2.986863,3.034743,...,7.714286,12.000000,19.809524,26.952381,2.047619,1,-0.668732,-1.148097,-38.315541,-65.781106
3,NLLHVTDTGVGM[716.3718]TREELVK,NLLHVTDTGVGM[722.3793]TREELVK,2.985369,3.111739,,,2.774224,,,,...,8.809524,23.428571,67.428571,128.190476,5.476190,0,2.397712,-2.288282,137.378791,-131.108894
4,M[716.3718]HTTFEHDIQALGTQVR,M[722.3793]HTTFEHDIQALGTQVR,,1.828016,,,2.890468,2.740659,3.560620,1.773127,...,6.238095,12.476190,34.809524,55.095238,2.476190,0,-0.593221,-1.111664,-33.989066,-63.693630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
682,SGEGEVSGLM[716.3718]R,SGEGEVSGLM[722.3793]R,-0.297660,-0.280111,-0.458942,,-0.265283,0.388757,0.583342,0.346406,...,2.047619,4.047619,7.142857,10.380952,0.000000,1,2.148378,-1.089016,123.092976,-62.396023
683,FLRLM[716.3718]GAGK,FLRLM[722.3793]GAGK,,,,0.125250,,,,-0.121592,...,4.761905,8.619048,16.476190,22.238095,1.666667,1,0.036523,-1.320989,2.092631,-75.687091
684,SEIEYYAM[716.3718]LAK,SEIEYYAM[722.3793]LAK,-0.315793,,,,0.323492,,,,...,7.238095,15.857143,45.571429,73.904762,4.238095,0,-0.839279,-1.031756,-48.087119,-59.115286
685,EYWM[716.3718]DPEGEMKPGRK,EYWM[722.3793]DPEGEMKPGRK,,0.781025,,,,-0.132193,,-0.640560,...,7.476190,14.619048,28.476190,42.333333,1.761905,0,2.227214,-1.584730,127.609944,-90.798340


In [40]:
#CHURRO_4_with_PSI_and_PHI.to_csv('ChURRO_4_with_alphafold_and_PSI_and_PHI_angles.csv', index=False)

# Ramachandran Analysis - ChURRO_5 Dataset

In [41]:
path = os.path.join(datasets_path, "ChURRO_5_with_alphafold.csv")
CHURRO_5_df = pd.read_csv(path).drop(columns = ['Unnamed: 0'])

pd.set_option('display.max_columns', None)
display(CHURRO_5_df)
pd.reset_option('display.max_columns')

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,3_3 Log2 Ratio HL,Protein,Protein ID,Entry Name,Gene,Protein Description,pvalue,avg ratio,neglogpval,Site,Label,Color,Complete Sequence,Peptide Sequence,Sequence Location,Sequence Length,Left Prefix,Left Prefix Length,Methionine Location,Left 20,Right 20,protein_id,protein_number,AA,position,quality,x_coord_c,x_coord_ca,x_coord_cb,x_coord_n,y_coord_c,y_coord_ca,y_coord_cb,y_coord_n,z_coord_c,z_coord_ca,z_coord_cb,z_coord_n,secondary_structure,structure_group,BEND,HELX,STRN,TURN,unstructured,nAA_2_180_pae,nAA_3_180_pae,nAA_4_180_pae,nAA_4.5_180_pae,nAA_5_180_pae,nAA_5.5_180_pae,nAA_6_180_pae,nAA_6.5_180_pae,nAA_7_180_pae,nAA_7.5_180_pae,nAA_8_180_pae,nAA_12_180_pae,nAA_18_180_pae,nAA_24_180_pae,nAA_12_70_pae,nAA_2_180_pae_smooth10,nAA_3_180_pae_smooth10,nAA_4_180_pae_smooth10,nAA_4.5_180_pae_smooth10,nAA_5_180_pae_smooth10,nAA_5.5_180_pae_smooth10,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,GQYISPFHDIPIYADKDVFHM[695.3503]VVEVPR,GQYISPFHDIPIYADKDVFHM[701.3578]VVEVPR,,,,,3.997861,4.300817,,,,sp|Q15181|IPYR_HUMAN,Q15181,IPYR_HUMAN,PPA1,Inorganic pyrophosphatase,0.023230,4.149339,1.633942,M46,IPYR_M46,red,MSGFSTEERAAPFSLEYRVFLKNEKGQYISPFHDIPIYADKDVFHM...,GQYISPFHDIPIYADKDVFHMVVEVPR,25,27,GQYISPFHDIPIYADKDVFH,20,45,GQYISPFHDIPIYADKDVFH,VVEVPRWSNAKMEIATKDPL,Q15181,283,M,45,98.75,3.079,3.104,1.693,3.834,5.814,5.734,5.718,6.887,0.611,2.134,2.730,2.644,STRN,STRN,0,0,1,0,0,0,0,0,0,2,2,3,4,6,9,12,40,107,181,14,0.0,0.0,0.0,0.000000,2.000000,2.285714,2.809524,4.238095,5.952381,7.285714,9.142857,25.238095,74.333333,148.714286,5.857143,0
1,MMVDKDGDVTVTNDGATILSMM[695.3503]DVDHQIAK,MMVDKDGDVTVTNDGATILSM[701.3578]MDVDHQIAK,,,,,,,,3.390691,3.057908,sp|P48643|TCPE_HUMAN,P48643,TCPE_HUMAN,CCT5,T-complex protein 1 subunit epsilon,0.032824,3.224300,1.483810,M81,TCPE_M81,red,MASMGTLAFDEYGRPFLIIKDQDRKSRLMGLEALKSHIMAAKAVAN...,MMVDKDGDVTVTNDGATILSMMDVDHQIAK,59,30,MMVDKDGDVTVTNDGATILSM,21,80,MVDKDGDVTVTNDGATILSM,DVDHQIAKLMVELSKSQDDE,P48643,159,M,80,88.59,10.881,10.560,10.867,9.162,7.841,6.391,6.130,6.083,2.796,2.413,0.930,2.709,BEND,BEND,1,0,0,0,0,0,0,0,0,2,2,2,2,2,3,4,10,48,81,2,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.190476,3.619048,4.190476,5.380952,5.714286,16.571429,55.904762,108.142857,5.190476,0
2,EEELKDIQNM[695.3503]NFLLK,EEELKDIQNM[701.3578]NFLLK,,,3.025504,1.850107,2.063453,1.286034,5.001484,4.197276,4.719075,sp|Q86UP2|KTN1_HUMAN,Q86UP2,KTN1_HUMAN,KTN1,Kinectin,0.001366,3.163276,2.864502,M643,KTN1_M643,red,MEFYESAYFIVLIPSIVITVIFLFFWLFMKETLYDEVLAKQKREQK...,EEELKDIQNMNFLLK,633,15,EEELKDIQN,9,642,LASERDRLTSKEEELKDIQN,NFLLKAEVQKLQALANEQAA,Q86UP2,304,M,642,83.74,3.531,4.540,5.906,4.069,7.059,6.902,6.443,5.983,2.448,3.590,3.058,4.628,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,5,7,8,8,12,20,27,2,0.0,0.0,0.0,0.000000,2.000000,2.000000,3.142857,5.285714,6.333333,7.714286,7.714286,12.000000,19.809524,26.952381,2.047619,1
3,LGEM[695.3503]WSEQSAK,LGEM[701.3578]WSEQSAK,,,,1.829404,1.759502,1.939710,,,4.395717,sp|P26583|HMGB2_HUMAN,P26583,HMGB2_HUMAN,HMGB2,High mobility group protein B2,0.030303,2.481083,1.518515,M132,HMGB2_M132,red,MGKGDPNKPRGKMSSYAFFVQTCREEHKKKHPDSSVNFAEFSKKCS...,LGEMWSEQSAK,128,11,LGE,3,131,KIKSEHPGLSIGDTAKKLGE,WSEQSAKDKQPYEQKAAKLK,P26583,108,M,131,91.80,-11.727,-10.592,-10.191,-9.444,-19.313,-20.324,-20.395,-19.979,7.724,7.853,9.331,7.010,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,4,6,7,7,13,35,50,4,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.095238,3.761905,4.666667,6.000000,6.190476,11.904762,28.761905,42.190476,2.476190,0
4,AVENYLIQM[695.3503]AR,AVENYLIQM[701.3578]AR,,2.517243,2.418565,2.137920,,,,2.337744,,sp|O14737|PDCD5_HUMAN,O14737,PDCD5_HUMAN,PDCD5,Programmed cell death protein 5,0.000088,2.352868,4.055731,M77,PDCD5_M77,red,MADEELEALRRQRLAELQAKHGDPGDAAQQEAKHREAEMRNSILAQ...,AVENYLIQMAR,68,11,AVENYLIQ,8,76,SNLALVKPEKTKAVENYLIQ,ARYGQLSEKVSEQGLIEILK,O14737,10,M,76,81.72,6.459,5.821,4.480,5.620,-0.711,0.617,0.333,1.535,8.685,9.111,9.788,7.984,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,3,5,6,6,12,30,57,2,0.0,0.0,0.0,0.000000,1.809524,1.809524,2.095238,3.809524,4.238095,5.238095,5.285714,10.619048,29.285714,51.714286,2.238095,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
698,KGDIVDIKGM[695.3503]GTVQK,KGDIVDIKGM[701.3578]GTVQK,,0.612230,-0.372074,-0.465165,0.065857,0.175049,,,,sp|P46778|RL21_HUMAN,P46778,RL21_HUMAN,RPL21,Large ribosomal subunit protein eL21,0.987805,0.003179,0.005329,M45,RL21_M45,grey,MTNTKGKRRGTRYMFSRPFRKHGVVPLATYMRIYKKGDIVDIKGMG...,KGDIVDIKGMGTVQK,35,15,KGDIVDIKG,9,44,VPLATYMRIYKKGDIVDIKG,GTVQKGMPHKCYHGKTGRVY,P46778,152,M,44,93.38,6.602,5.655,4.189,5.831,-5.218,-4.880,-5.171,-3.481,-14.152,-13.005,-13.365,-12.600,unstructured,unstructured,0,0,0,0,1,0,0,0,0,2,2,2,2,2,4,5,16,45,78,1,0.0,0.0,0.0,0.000000,2.000000,2.047619,2.285714,2.904762,4.190476,5.523810,7.047619,18.714286,50.857143,79.476190,4.047619,0
699,AITGASLADIM[695.3503]AK,AITGASLADIM[701.3578]AK,,0.878992,,0.613909,-0.488093,-0.495052,-0.533030,,,sp|P83731|RL24_HUMAN,P83731,RL24_HUMAN,RPL24,Large ribosomal subunit protein eL24,0.988724,-0.004655,0.004925,M91,RL24_M91,grey,MKVELCSFSGYKIYPGHGRRYARTDGKVFQFLNAKCESAFLSKRNP...,AITGASLADIMAK,80,13,AITGASLADI,10,90,RTRRAVKFQRAITGASLADI,AKRNQKPEVRKAQREQAIRA,P83731,226,M,90,80.44,-24.507,-23.526,-22.116,-23.951,-2.812,-2.057,-2.099,-0.673,37.329,36.429,37.027,36.195,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,3,4,6,6,10,20,29,1,0.0,0.0,0.0,0.000000,1.904762,1.904762,2.000000,2.809524,3.380952,4.000000,4.428571,8.904762,17.000000,23.714286,1.095238,1
700,QM[695.3503]QSSFTSSEQELER,QM[701.3578]QSSFTSSEQELER,,,,-0.213786,,0.209838,,,,sp|Q86UP2|KTN1_HUMAN,Q86UP2,KTN1_HUMAN,KTN1,Kinectin,0.994066,-0.001974,0.002585,M1178,KTN1_M1178,grey,MEFYESAYFIVLIPSIVITVIFLFFWLFMKETLYDEVLAKQKREQK...,QMQSSFTSSEQELER,1176,15,Q,1,1177,EQEENKWKVKVDESHKTIKQ,QSSFTSSEQELERLRSENKD,Q86UP2,304,M,1177,86.46,10.959,11.746,11.492,13.189,0.149,1.424,2.476,1.179,-14.825,-14.485,-15.574,-14.331,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,3,6,7,8,8,12,20,28,2,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.714286,5.047619,6.190476,7.904762,7.904762,12.142857,20.190476,27.333333,1.904762,1
701,RDHFEEAM[695.3503]R,RDHFEEAM[701.3578]R,,,,-1.053555,0.704664,0.357943,,,,sp|P55072|TERA_HUMAN,P55072,TERA_HUMAN,VCP,Transitional endoplasmic reticulum ATPase,0.996032,0.003017,0.001727,M740,TERA_M740,grey,MASGADSKGDDLSTAILKQKNRPNRLIVDEAINEDNSVVSLSQPKM...,RDHFEEAMR,732,9,RDHFEEA,7,739,MEVEEDDPVPEIRRDHFEEA,RFARRSVSDNDIRKYEMFAQ,P55072,186,M,739,92.10,30.347,29.227,28.676,29.658,16.244,16.898,18.159,17.253,-33.989,-33.168,-33.851,-31.817,HELX_RH_AL_P,HELX,0,1,0,0,0,0,0,0,0,2,2,2,4,6,7,7,19,46,77,6,0.0,0.0,0.0,0.000000,2.000000,2.000000,2.238095,3.380952,4.190476,5.047619,5.761905,13.714286,38.714286,67.380952,3.190476,0


In [42]:
# 6 of the proteins in the CHURRO_5 dataset have mismatched sequences, so we remove them
clean_CHURRO_5 = CHURRO_5_df[CHURRO_5_df['Protein ID'].isin(unique_mismatches) == False].reset_index(drop=True)
clean_CHURRO_5

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_6_180_pae_smooth10,nAA_6.5_180_pae_smooth10,nAA_7_180_pae_smooth10,nAA_7.5_180_pae_smooth10,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR
0,GQYISPFHDIPIYADKDVFHM[695.3503]VVEVPR,GQYISPFHDIPIYADKDVFHM[701.3578]VVEVPR,,,,,3.997861,4.300817,,,...,2.809524,4.238095,5.952381,7.285714,9.142857,25.238095,74.333333,148.714286,5.857143,0
1,MMVDKDGDVTVTNDGATILSMM[695.3503]DVDHQIAK,MMVDKDGDVTVTNDGATILSM[701.3578]MDVDHQIAK,,,,,,,,3.390691,...,2.190476,3.619048,4.190476,5.380952,5.714286,16.571429,55.904762,108.142857,5.190476,0
2,EEELKDIQNM[695.3503]NFLLK,EEELKDIQNM[701.3578]NFLLK,,,3.025504,1.850107,2.063453,1.286034,5.001484,4.197276,...,3.142857,5.285714,6.333333,7.714286,7.714286,12.000000,19.809524,26.952381,2.047619,1
3,LGEM[695.3503]WSEQSAK,LGEM[701.3578]WSEQSAK,,,,1.829404,1.759502,1.939710,,,...,2.095238,3.761905,4.666667,6.000000,6.190476,11.904762,28.761905,42.190476,2.476190,0
4,AVENYLIQM[695.3503]AR,AVENYLIQM[701.3578]AR,,2.517243,2.418565,2.137920,,,,2.337744,...,2.095238,3.809524,4.238095,5.238095,5.285714,10.619048,29.285714,51.714286,2.238095,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,KGDIVDIKGM[695.3503]GTVQK,KGDIVDIKGM[701.3578]GTVQK,,0.612230,-0.372074,-0.465165,0.065857,0.175049,,,...,2.285714,2.904762,4.190476,5.523810,7.047619,18.714286,50.857143,79.476190,4.047619,0
693,AITGASLADIM[695.3503]AK,AITGASLADIM[701.3578]AK,,0.878992,,0.613909,-0.488093,-0.495052,-0.533030,,...,2.000000,2.809524,3.380952,4.000000,4.428571,8.904762,17.000000,23.714286,1.095238,1
694,QM[695.3503]QSSFTSSEQELER,QM[701.3578]QSSFTSSEQELER,,,,-0.213786,,0.209838,,,...,2.714286,5.047619,6.190476,7.904762,7.904762,12.142857,20.190476,27.333333,1.904762,1
695,RDHFEEAM[695.3503]R,RDHFEEAM[701.3578]R,,,,-1.053555,0.704664,0.357943,,,...,2.238095,3.380952,4.190476,5.047619,5.761905,13.714286,38.714286,67.380952,3.190476,0


In [43]:
CHURRO_5_psi_and_psi_map = extract_PSI_and_PHI(clean_CHURRO_5, concat_dihedrals)
CHURRO_5_psi_and_psi_map

  df = df._append(new_row, ignore_index = True)


Unnamed: 0,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,2.355836,-2.144990,134.979488,-122.898859
1,2.108450,-1.305379,120.805315,-74.792719
2,-0.668732,-1.148097,-38.315541,-65.781106
3,-0.714445,-1.011894,-40.934669,-57.977265
4,-0.674473,-1.005026,-38.644440,-57.583767
...,...,...,...,...
692,2.430323,-2.171364,139.247273,-124.409990
693,-0.750828,-1.166838,-43.019297,-66.854912
694,-0.704603,-1.016828,-40.370798,-58.259925
695,-0.555659,-1.146523,-31.836928,-65.690956


In [44]:
sum(CHURRO_5_psi_and_psi_map['PHI Degrees'].isna())

0

In [45]:
CHURRO_5_with_PSI_and_PHI = pd.concat([clean_CHURRO_5, CHURRO_5_psi_and_psi_map], axis=1)
CHURRO_5_with_PSI_and_PHI

Unnamed: 0,Light Modified Peptide,Heavy Modified Peptide,1_1 Log2 Ratio HL,1_2 Log2 Ratio HL,1_3 Log2 Ratio HL,2_1 Log2 Ratio HL,2_2 Log2 Ratio HL,2_3 Log2 Ratio HL,3_1 Log2 Ratio HL,3_2 Log2 Ratio HL,...,nAA_8_180_pae_smooth10,nAA_12_180_pae_smooth10,nAA_18_180_pae_smooth10,nAA_24_180_pae_smooth10,nAA_12_70_pae_smooth10,IDR,PSI Radians,PHI Radians,PSI Degrees,PHI Degrees
0,GQYISPFHDIPIYADKDVFHM[695.3503]VVEVPR,GQYISPFHDIPIYADKDVFHM[701.3578]VVEVPR,,,,,3.997861,4.300817,,,...,9.142857,25.238095,74.333333,148.714286,5.857143,0,2.355836,-2.144990,134.979488,-122.898859
1,MMVDKDGDVTVTNDGATILSMM[695.3503]DVDHQIAK,MMVDKDGDVTVTNDGATILSM[701.3578]MDVDHQIAK,,,,,,,,3.390691,...,5.714286,16.571429,55.904762,108.142857,5.190476,0,2.108450,-1.305379,120.805315,-74.792719
2,EEELKDIQNM[695.3503]NFLLK,EEELKDIQNM[701.3578]NFLLK,,,3.025504,1.850107,2.063453,1.286034,5.001484,4.197276,...,7.714286,12.000000,19.809524,26.952381,2.047619,1,-0.668732,-1.148097,-38.315541,-65.781106
3,LGEM[695.3503]WSEQSAK,LGEM[701.3578]WSEQSAK,,,,1.829404,1.759502,1.939710,,,...,6.190476,11.904762,28.761905,42.190476,2.476190,0,-0.714445,-1.011894,-40.934669,-57.977265
4,AVENYLIQM[695.3503]AR,AVENYLIQM[701.3578]AR,,2.517243,2.418565,2.137920,,,,2.337744,...,5.285714,10.619048,29.285714,51.714286,2.238095,0,-0.674473,-1.005026,-38.644440,-57.583767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,KGDIVDIKGM[695.3503]GTVQK,KGDIVDIKGM[701.3578]GTVQK,,0.612230,-0.372074,-0.465165,0.065857,0.175049,,,...,7.047619,18.714286,50.857143,79.476190,4.047619,0,2.430323,-2.171364,139.247273,-124.409990
693,AITGASLADIM[695.3503]AK,AITGASLADIM[701.3578]AK,,0.878992,,0.613909,-0.488093,-0.495052,-0.533030,,...,4.428571,8.904762,17.000000,23.714286,1.095238,1,-0.750828,-1.166838,-43.019297,-66.854912
694,QM[695.3503]QSSFTSSEQELER,QM[701.3578]QSSFTSSEQELER,,,,-0.213786,,0.209838,,,...,7.904762,12.142857,20.190476,27.333333,1.904762,1,-0.704603,-1.016828,-40.370798,-58.259925
695,RDHFEEAM[695.3503]R,RDHFEEAM[701.3578]R,,,,-1.053555,0.704664,0.357943,,,...,5.761905,13.714286,38.714286,67.380952,3.190476,0,-0.555659,-1.146523,-31.836928,-65.690956


In [46]:
#CHURRO_5_with_PSI_and_PHI.to_csv('ChURRO_5_with_alphafold_and_PSI_and_PHI_angles.csv', index=False)

# End