# Reading in DMS2structure results and parsing them

To use in comparison of single vs double filament phi analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import scipy

In [110]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
%%bash

pip freeze

archspec @ file:///croot/archspec_1697725767277/work
arcplot==0.1.4
asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work
biopython==1.83
boltons @ file:///work/ci_py311/boltons_1677685195580/work
Brotli @ file:///work/ci_py311/brotli-split_1676830125088/work
certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1707022139797/work/certifi
cffi @ file:///croot/cffi_1700254295673/work
charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1710320294760/work
conda-content-trust @ file:///croot/conda-content-trust_1693490622020/work
conda-package-handling @ file:///croot/conda-package-handling_1690999929514/work
conda_package_streaming @ file:///croot/conda-package-streaming_1690987966409/work
contourpy==1.2.0
cryptography @ file:///croot/cryptography_1702070282333/work
cycler==0.12.1
debugpy @ file:///croot/debugpy_1690905042057/work
decorator @ f

In [4]:
structures = ['2nao','5kk3','5oqv','7q4b','7q4m','8ezd','8eze']

In [62]:
# read in tables with distances
results_dir = '/lustre/scratch126/gengen/projects/amyloid_beta_epistasis/DMS2structure_code_and_results/double_filament_2_monomers_facing/'

distances_3d = {}

for structure in structures:
    print(structure)
    
    distances_3d[structure] = pd.read_csv(results_dir + structure + '/processed_data/PDB_contactmap_' + structure + '_2_monomers_facing_pseudomonomer_A.txt',
                                         sep=' ')
    
    distances_3d[structure] = distances_3d[structure].replace([np.inf, -np.inf], np.nan)
    distances_3d[structure] = distances_3d[structure].dropna(subset=["scHAmin"], how="all")

2nao
5kk3
5oqv
7q4b
7q4m
8ezd
8eze


In [71]:
# positions for which structural information is available
pos_available = {}
pos_available['2nao'] = [i for i in range(0,42)] # from pos 1 to 42, here enumerated from 0
pos_available['5kk3'] = [i for i in range(10,42)] # from pos 11 to 42, here enumerated from 0
pos_available['5oqv'] = [i for i in range(0,42)] # from pos 1 to 42, here enumerated from 0
pos_available['7q4b'] = [i for i in range(8,42)] # from pos 9 to 42, here enumerated from 0
pos_available['7q4m'] = [i for i in range(11,42)] # from pos 12 to 42, here enumerated from 0
pos_available['8ezd'] = [i for i in range(11,42)] # from pos 12 to 42, here enumerated from 0
pos_available['8eze'] = [i for i in range(0,42)] # from pos 1 to 42, here enumerated from 0


In [74]:
len(pos_available['2nao'])

42

In [82]:
len(pos_available['5kk3'])

32

In [81]:
AB_WT = 'DAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIA'


In [96]:
AB_WT[pos_available['2nao'][0]]

'D'

In [98]:
pos_available['2nao']

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41]

In [118]:
# in case of 2nao the first 42 positions are from one chain, and positions 43-84 - from another
# so need to mark that in the table

for structure in structures:
    print(structure)
    
    distances_3d[structure]['Pos1_chain_id'] = ['1' if 0 < Pos1 < len(pos_available[structure])+1 else '2' for Pos1 in distances_3d[structure]['Pos1']]
    distances_3d[structure]['Pos2_chain_id'] = ['1' if 0 < Pos2 < len(pos_available[structure])+1 else '2' for Pos2 in distances_3d[structure]['Pos2']]

    distances_3d[structure]['Pos1_Pos2_chain_id'] = [Pos1_chain_id + '_' + Pos2_chain_id for Pos1_chain_id, Pos2_chain_id in zip(distances_3d[structure]['Pos1_chain_id'],
                                                                                            distances_3d[structure]['Pos2_chain_id'])]
    distances_3d[structure]['Pos1_in_Abeta'] = ['' for i in range(len(distances_3d[structure]))]                                                                                        
    distances_3d[structure]['Pos2_in_Abeta'] = ['' for i in range(len(distances_3d[structure]))]        
    
    for idx in list(distances_3d[structure].index):
        
        pos1 = distances_3d[structure].loc[idx, 'Pos1']
        
        if 0 < pos1 < len(pos_available[structure])+1:
            curr_pos1_in_Abeta = AB_WT[pos_available[structure][0] + (pos1 - 1)] + '-' + str(pos_available[structure][0] + pos1)
        else:
            curr_pos1_in_Abeta = AB_WT[pos_available[structure][0] + (pos1 - len(pos_available[structure]) - 1)] + '-' + str(pos1 - len(pos_available[structure]))
        
        distances_3d[structure].loc[idx,'Pos1_in_Abeta'] = curr_pos1_in_Abeta
        
    
        pos2 = distances_3d[structure].loc[idx, 'Pos2']

        if 0 < pos2 < len(pos_available[structure])+1:
            curr_pos2_in_Abeta = AB_WT[pos_available[structure][0] + (pos2 - 1)] + '-' + str(pos_available[structure][0] + pos2)
        else:
            curr_pos2_in_Abeta = AB_WT[pos_available[structure][0] + (pos2 - len(pos_available[structure]) - 1)] + '-' + str(pos2 - len(pos_available[structure]))
        
        distances_3d[structure].loc[idx,'Pos2_in_Abeta'] = curr_pos2_in_Abeta
        
    

2nao
5kk3
5oqv
7q4b
7q4m
8ezd
8eze


In [122]:
distances_3d_interchain_only = {}

for structure in structures:
    print(structure)
    distances_3d_interchain_only[structure] = distances_3d[structure][(distances_3d[structure]['Pos1_Pos2_chain_id'] == '1_2') | (distances_3d[structure]['Pos1_Pos2_chain_id'] == '2_1')]
    

2nao
5kk3
5oqv
7q4b
7q4m
8ezd
8eze


In [138]:
# save to be used later

for structure in structures:
    print(structure)
    distances_3d[structure].to_csv('./files/20240507_3D_distances_2_filament_facing_structure_' + structure + '.csv')
    

2nao
5kk3
5oqv
7q4b
7q4m
8ezd
8eze


In [123]:
distances_3d_interchain_only['2nao'][distances_3d_interchain_only['2nao']['scHAmin'] < 5]

Unnamed: 0,Pos1,Pos2,WT_AA1,WT_AA2,chainids,HAmin,scHAmin,CB,HAmin_sd,scHAmin_sd,CB_sd,Pos1_chain_id,Pos2_chain_id,Pos1_Pos2_chain_id,Pos1_in_Abeta,Pos2_in_Abeta
1252,15,77,Q,M,A,3.831841,4.455919,7.071708,0,0,0,1,2,1_2,Q-15,M-35
2912,35,57,M,Q,A,3.686819,3.849388,7.05108,0,0,0,1,2,1_2,M-35,Q-15
2914,35,59,M,L,A,4.070131,4.448036,8.969041,0,0,0,1,2,1_2,M-35,L-17
2931,35,76,M,L,A,3.893813,4.222465,4.850061,0,0,0,1,2,1_2,M-35,L-34
4738,57,35,Q,M,A,3.686819,3.849388,7.05108,0,0,0,2,1,2_1,Q-15,M-35
4906,59,35,L,M,A,4.070131,4.448036,8.969041,0,0,0,2,1,2_1,L-17,M-35
6334,76,35,L,M,A,3.893813,4.222465,4.850061,0,0,0,2,1,2_1,L-34,M-35
6398,77,15,M,Q,A,3.831841,4.455919,7.071708,0,0,0,2,1,2_1,M-35,Q-15


In [137]:
contacting_positions = {}

cutoff = 6

for structure in structures:
    print(structure)
    
    contacting_positions[structure] = np.unique(list(distances_3d_interchain_only[structure][distances_3d_interchain_only[structure]['scHAmin'] < cutoff]['Pos1_in_Abeta']) + list(distances_3d_interchain_only[structure][distances_3d_interchain_only[structure]['scHAmin'] < cutoff]['Pos2_in_Abeta']))
    
    print(contacting_positions[structure])

2nao
['G-37' 'H-14' 'L-17' 'L-34' 'M-35' 'Q-15']
5kk3
['G-27' 'G-28' 'G-37' 'G-38' 'H-13' 'H-4' 'L-17' 'L-24' 'L-34' 'L-7'
 'M-25' 'M-35' 'Q-15' 'Q-5']
5oqv
['G-38' 'I-41']
7q4b
['F-19' 'G-29' 'L-17' 'L-34' 'V-12' 'V-28' 'V-31']
7q4m
['A-31' 'A-42' 'G-18' 'G-29' 'K-17' 'K-28']
8ezd
['A-19' 'G-18' 'G-22' 'G-33' 'I-21' 'I-41' 'K-17' 'M-35' 'V-40']
8eze
['A-42' 'F-20' 'G-25' 'G-37' 'G-38' 'K-16' 'L-34' 'N-27' 'V-39']


In [None]:
for structure in structures:
    print(structure)
    
    fig = plt.figure(figsize=(10, 5))
    
    sns.displot(distances_3d_interchain_only[structure]['scHAmin'])

In [131]:
min(distances_3d_interchain_only['5oqv']['scHAmin'])

5.73767709732056

In [79]:
distances_3d_interchain_only['5kk3'][distances_3d_interchain_only['5kk3']['scHAmin'] < 5]

Unnamed: 0,Pos1,Pos2,WT_AA1,WT_AA2,chainids,HAmin,scHAmin,CB,HAmin_sd,scHAmin_sd,CB_sd,Pos1_chain_id,Pos2_chain_id,Pos1_Pos2_chain_id
186,3,59,H,G,A,3.444898,3.62242,3.62242,0,0,0,1,2,1_2
187,3,60,H,G,A,3.318741,3.535287,3.649162,0,0,0,1,2,1_2
312,5,57,Q,M,A,3.210215,3.370968,5.899814,0,0,0,1,2,1_2
440,7,57,L,M,A,3.848898,3.848898,6.659911,0,0,0,1,2,1_2
1527,24,56,L,L,A,3.965596,4.481792,4.481792,0,0,0,1,2,1_2
1528,24,57,L,M,A,4.109112,4.75737,5.84859,0,0,0,1,2,1_2
1572,25,37,M,Q,A,3.085132,3.124737,6.223017,0,0,0,1,2,1_2
1574,25,39,M,L,A,3.536623,3.536623,6.595454,0,0,0,1,2,1_2
1591,25,56,M,L,A,3.464748,3.566577,6.259754,0,0,0,1,2,1_2
1699,27,36,G,H,A,3.665001,4.035601,4.951795,0,0,0,1,2,1_2


In [25]:
list(distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '1_2']['scHAmin']) == list(distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '2_1']['scHAmin'])


False

In [36]:
assymetry = [elem_1 - elem_2 for elem_1,elem_2 in zip(list(distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '1_2']['scHAmin']),
                                                      list(distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '2_1']['scHAmin']))]



In [37]:
assymetry

[0.0,
 0.09903717041019888,
 0.11226654052730112,
 0.04863739013670454,
 0.2541198730468892,
 0.31578063964850855,
 0.01300811767579546,
 0.8075561523438068,
 -2.498283386230405,
 0.36158752441410513,
 0.6166305541992045,
 0.20296096801759944,
 0.47535705566399855,
 0.9201049804688068,
 0.742935180664098,
 0.463218688964794,
 0.6560096740722017,
 0.9112129211425994,
 0.6266746520997017,
 1.1136054992675994,
 0.587795257568402,
 0.641937255859304,
 0.9788360595702983,
 0.7639923095702983,
 0.8598251342774006,
 0.9313774108887003,
 1.2255058288574006,
 1.2406997680664062,
 1.0583419799803977,
 1.0571479797363992,
 0.9851417541503977,
 0.7463684082030966,
 0.810302734375,
 0.43045425415039773,
 0.7823410034179972,
 0.9280853271485014,
 0.8598041534424041,
 1.2585716247558985,
 1.2120132446288991,
 1.5728206634521982,
 1.2827033996582031,
 1.6588783264159943,
 -0.09903717041019888,
 0.0,
 0.0030670166015056566,
 -0.06803894042970171,
 0.13629150390619316,
 0.1976318359375,
 -0.0966796875,


In [35]:
list(distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '1_2']['scHAmin'])

[81.2316436767578,
 75.6451110839844,
 77.7320327758789,
 70.821533203125,
 74.4341049194336,
 68.8273162841797,
 71.1855926513672,
 66.6531219482422,
 63.9444732666016,
 65.8695526123047,
 61.3290710449219,
 57.4386138916016,
 58.6244621276855,
 54.8859558105469,
 49.3883399963379,
 53.0798645019531,
 46.8949165344238,
 51.3376770019531,
 44.4181518554688,
 47.9936676025391,
 55.0401992797852,
 57.6979598999023,
 56.3795967102051,
 50.9639625549316,
 51.2155799865723,
 47.0322036743164,
 46.343318939209,
 40.8988952636719,
 40.2244606018066,
 44.3490028381348,
 39.8803901672363,
 44.4013214111328,
 41.3072166442871,
 41.2991142272949,
 36.2300186157227,
 36.7850151062012,
 32.0402908325195,
 31.3792724609375,
 34.6569557189941,
 31.9656143188477,
 37.2074394226074,
 36.6210060119629,
 75.5460739135742,
 69.9257125854492,
 72.000617980957,
 65.1113204956055,
 68.6747589111328,
 63.0364189147949,
 65.3960494995117,
 60.8678665161133,
 58.1039085388184,
 59.9657440185547,
 55.41743850708

In [31]:
distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '2_1'][:10]

Unnamed: 0,Pos1,Pos2,WT_AA1,WT_AA2,chainids,HAmin,scHAmin,CB,HAmin_sd,scHAmin_sd,CB_sd,Pos1_chain_id,Pos2_chain_id,Pos1_Pos2_chain_id
3528,43,1,D,D,A,78.468632,81.231644,81.231644,0,0,0,2,1,2_1
3529,43,2,D,A,A,74.162849,75.546074,75.546074,0,0,0,2,1,2_1
3530,43,3,D,E,A,74.856167,77.619766,77.619766,0,0,0,2,1,2_1
3531,43,4,D,F,A,68.981861,70.772896,72.01281,0,0,0,2,1,2_1
3532,43,5,D,R,A,71.416855,74.179985,74.179985,0,0,0,2,1,2_1
3533,43,6,D,H,A,66.74036,68.511536,68.699341,0,0,0,2,1,2_1
3534,43,7,D,D,A,68.540619,71.172585,71.172585,0,0,0,2,1,2_1
3535,43,8,D,S,A,64.520361,65.845566,65.845566,0,0,0,2,1,2_1
3536,43,9,D,G,A,64.851645,66.442757,66.442757,0,0,0,2,1,2_1
3537,43,10,D,Y,A,63.050005,65.507965,65.507965,0,0,0,2,1,2_1


In [22]:
distances_3d['2nao'][distances_3d['2nao']['Pos1_Pos2_chain_id'] == '2_1']

Unnamed: 0,Pos1,Pos2,WT_AA1,WT_AA2,chainids,HAmin,scHAmin,CB,HAmin_sd,scHAmin_sd,CB_sd,Pos1_chain_id,Pos2_chain_id,Pos1_Pos2_chain_id
3528,43,1,D,D,A,78.468632,81.231644,81.231644,0,0,0,2,1,2_1
3529,43,2,D,A,A,74.162849,75.546074,75.546074,0,0,0,2,1,2_1
3530,43,3,D,E,A,74.856167,77.619766,77.619766,0,0,0,2,1,2_1
3531,43,4,D,F,A,68.981861,70.772896,72.012810,0,0,0,2,1,2_1
3532,43,5,D,R,A,71.416855,74.179985,74.179985,0,0,0,2,1,2_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7009,84,38,A,G,A,36.443230,38.223530,38.223530,0,0,0,2,1,2_1
7010,84,39,A,V,A,38.409302,39.721363,39.721363,0,0,0,2,1,2_1
7011,84,40,A,V,A,42.830864,45.137089,45.137089,0,0,0,2,1,2_1
7012,84,41,A,I,A,43.254332,45.652489,45.652489,0,0,0,2,1,2_1


In [12]:
max(distances_3d['2nao']['scHAmin'])

81.2316436767578