Mutational scanning of open and closed

### Init and declare

In [1]:
import pyrosetta_help as ph
from pathlib import Path
import pyrosetta
import pyrosetta_help as ph
from types import ModuleType
from collections import Counter
prc: ModuleType = pyrosetta.rosetta.core
prp: ModuleType = pyrosetta.rosetta.protocols
prcc: ModuleType = pyrosetta.rosetta.core.conformation
pr_scoring: ModuleType = pyrosetta.rosetta.core.scoring
pr_options: ModuleType = pyrosetta.rosetta.basic.options


logger = ph.configure_logger()
pyrosetta.distributed.maybe_init(extra_options=ph.make_option_string(no_optH=False,
                                                                     ex1=None,
                                                                     ex2=None,
                                                                     # mute='all',
                                                                     ignore_unrecognized_res=True,
                                                                     load_PDB_components=False,
                                                                     ignore_waters=True)
                                 )


def scan(pose, name, chain='A', chain_id=1, interfaces=(), offset=1):
    aas = {
        "A": "Ala",
        "R": "Arg",
        "N": "Asn",
        "D": "Asp",
        "C": "Cys",
        "E": "Glu",
        "Q": "Gln",
        "G": "Gly",
        "H": "His",
        "I": "Ile",
        "L": "Leu",
        "K": "Lys",
        "M": "Met",
        "F": "Phe",
        "P": "Pro",
        "S": "Ser",
        "T": "Thr",
        "W": "Trp",
        "Y": "Tyr",
        "V": "Val"
    }
    model = ph.MutantScorer(pose, modelname=name)
    model.scorefxn = pyrosetta.create_score_function('ref2015')
    model.strict_about_starting_residue = True
    model.make_output_folder()
    mutations = [f'{aas[r]}{i+offset+1}{t}' for i, r in enumerate(pose.chain_sequence(chain_id)) for t in aas.values()]
    data = model.score_mutations(mutations,
                                 chains=chain,
                                 interfaces=interfaces,  #
                                 preminimize=False,
                                 distance=8,
                                 cycles=5)
    import pandas as pd
    scores = pd.DataFrame(data)
    ph.extend_scores(scores)
    return scores





### Run

triplicates and for three templates
EDIT: template2.pdb gives 5 kcal/mol mean error. Ignoring

In [None]:
for i in range(1, 1+3):
    filename = 'template2.pdb'
    pose = pyrosetta.pose_from_file(filename)
    open_scores = scan(pose, f'open-{i}', chain='A', offset=+6)
    open_scores.to_csv(f'open{i}_mutational_scan.csv')
    
    filename = 'x0152_template.pdb'
    closed_pose = pyrosetta.pose_from_file(filename)
    closed_scores = scan(closed_pose, f'closed-{i}', chain='A', offset=+6)
    closed_scores.to_csv(f'closed{i}_mutational_scan.csv')
    
    filename = 'x0310_template.pdb'
    openalt_pose = pyrosetta.pose_from_file(filename)
    openalt_scores = scan(openalt_pose, f'x0310-{i}', chain='A', offset=+6)
    openalt_scores.to_csv(f'openalt{i}_mutational_scan.csv')

In [139]:
import pandas as pd
import numpy as np
import operator
import plotly.express as px

get_complex_ddG = lambda mode, i: pd.read_csv(f'{mode}{i}_mutational_scan.csv').set_index('mutation')['complex_ddG']
df = pd.DataFrame({f'{mode}_replicate{i}': get_complex_ddG(mode, i) for i in range(1, 1+3) for mode in ('open', 'closed', 'openalt')})

for mode in ('open', 'closed', 'openalt'):
    df[mode] = pd.concat([df[f'{mode}_replicate{i}'] for i in range(1, 1+3)], axis=1).min(axis=1)
df['openref'] = df['open']
df['open'] = df[['openref','openalt']].max(axis=1)  # worst for now
df['∆∆∆G'] =  df.openalt - df.closed
df['residue_index'] = df.index.to_series().apply(operator.itemgetter(slice(1, -1))).astype(int)
df['to_residue'] = df.index.to_series().apply(operator.itemgetter(-1))
df['from_residue'] = df.index.to_series().apply(operator.itemgetter(0))

## This is not great: the values are off by 0.7 on average
print('Noise? MAE of silent mutations', df.loc[df.to_residue == df.from_residue]['∆∆∆G'].abs().mean() )
noise = df.loc[df.to_residue == df.from_residue].set_index('residue_index')['∆∆∆G'].to_dict()
df['denoised_dddG'] = df['∆∆∆G'] + df.residue_index.map(noise).abs()
df.sort_values('denoised_dddG').to_csv('scan_summary.csv')

fig = px.scatter(df, df.residue_index, df.to_residue, color='∆∆∆G', 
                 size=[1]*len(df), size_max=10, range_color=[-10,1], symbol_sequence=['square'],
                template='plotly_white', width=2000,
                title='Difference in ∆∆G between open and closed (negative = better open)'
                )
fig.update_traces(marker=dict(line_width=0))
fig.write_image('scores_scan.png')

### Shortlist

In [144]:
shortlist = df.loc[(df.openref <= 0) & (df.openalt <= 0) & (df.closed > 0) & (df['denoised_dddG'] <= -2.)].sort_values('denoised_dddG')
shortlist

Unnamed: 0_level_0,open_replicate1,closed_replicate1,openalt_replicate1,open_replicate2,closed_replicate2,openalt_replicate2,open_replicate3,closed_replicate3,openalt_replicate3,open,closed,openalt,openref,∆∆∆G,residue_index,to_residue,from_residue,denoised_dddG
mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
E88P,-0.534509,5.497908,-2.198705,-0.534509,5.497908,-2.198705,-0.534509,5.497908,-2.198705,-0.534509,5.497908,-2.198705,-0.534509,-7.696613,88,P,E,-7.103755
Y89S,-0.025478,5.314023,-1.447253,-0.027796,5.314023,-1.447253,-0.027796,5.314023,-1.447253,-0.027796,5.314023,-1.447253,-0.027796,-6.761276,89,S,Y,-6.523027
Y89N,-0.951704,3.874841,-2.62439,-0.951704,3.874841,-2.619897,-0.951704,3.874841,-2.62439,-0.951704,3.874841,-2.62439,-0.951704,-6.499231,89,N,Y,-6.260982
Y89D,-1.802368,5.008269,-1.386629,-1.802368,5.008269,-1.386629,-1.802368,5.008269,-1.386629,-1.386629,5.008269,-1.386629,-1.802368,-6.394898,89,D,Y,-6.156649
Y89Q,-1.095164,4.950291,-1.285121,-1.095164,4.950291,-1.285121,-1.095164,4.950291,-1.285121,-1.095164,4.950291,-1.285121,-1.095164,-6.235412,89,Q,Y,-5.997163
R93F,-13.427746,6.209146,-2.645659,-13.427746,6.209146,-2.645659,-13.427746,6.209146,-2.645659,-2.645659,6.209146,-2.645659,-13.427746,-8.854805,93,F,R,-5.957241
Y89A,-0.336258,5.317582,-0.078554,-0.336258,5.317582,-0.078554,-0.336258,5.317582,-0.078554,-0.078554,5.317582,-0.078554,-0.336258,-5.396136,89,A,Y,-5.157887
S87D,-1.544001,1.757349,-3.64551,-1.544001,1.757349,-3.64551,-1.544001,1.757349,-3.64551,-1.544001,1.757349,-3.64551,-1.544001,-5.40286,87,D,S,-4.807067
Y89K,-2.357202,3.524468,-1.444971,-2.357202,3.524468,-1.444971,-2.357202,3.524468,-1.444971,-1.444971,3.524468,-1.444971,-2.357202,-4.969439,89,K,Y,-4.73119
A92H,-0.529267,4.151047,-0.254869,-0.529267,4.151047,-0.254869,-0.529267,4.151047,-0.254869,-0.254869,4.151047,-0.254869,-0.529267,-4.405917,92,H,A,-4.091611


In [162]:
common = pd.DataFrame({'from_residue': shortlist.groupby('residue_index')['from_residue'].sum().apply(operator.itemgetter(0)),
             'to_residue': shortlist.groupby('residue_index')['to_residue'].sum()})
common['length'] = common.to_residue.apply(len)
common = common.sort_values('length', ascending=False)
", ".join( (common.from_residue + common.index.to_series().astype(str) + common.to_residue).to_list() )

'Y89SNDQAKHF, S87DRCE, A92HYFN, T45DI, Y90RV, C110FY, G127SM, H21Q, C56I, E88P, R93F'

## Minimise w/o restraints and check if it closes up

In [None]:
from pathlib import Path
import pyrosetta
import pyrosetta_help as ph
from types import ModuleType
from typing import List, Dict
from IPython.display import display, HTML

from collections import Counter
prc: ModuleType = pyrosetta.rosetta.core
prp: ModuleType = pyrosetta.rosetta.protocols
prn: ModuleType = pyrosetta.rosetta.numeric
prcc: ModuleType = pyrosetta.rosetta.core.conformation
pr_scoring: ModuleType = pyrosetta.rosetta.core.scoring

logger = ph.configure_logger()
pyrosetta.distributed.maybe_init(extra_options=ph.make_option_string(no_optH=False,
                                                                     ex1=None,
                                                                     ex2=None,
                                                                     # mute='all',
                                                                     ignore_unrecognized_res=True,
                                                                     load_PDB_components=False,
                                                                     ignore_waters=True)
                                 )
pyrosetta.rosetta.basic.options.set_boolean_option('run:ignore_zero_occupancy', False)
pyrosetta.rosetta.basic.options.set_boolean_option('in:auto_setup_metals', True)

def relax(original: pyrosetta.Pose, constraint_weight: float=5, cycles: int=15, to_initial=True) -> pyrosetta.Pose:
    pose: pyrosetta.Pose = original.clone()
    scorefxn: pr_scoring.ScoreFunction = pyrosetta.get_fa_scorefxn()
    scorefxn.set_weight(pr_scoring.ScoreType.coordinate_constraint, constraint_weight)
    scorefxn.set_weight(pr_scoring.ScoreType.angle_constraint, constraint_weight)
    scorefxn.set_weight(pr_scoring.ScoreType.atom_pair_constraint, constraint_weight)
    pyrosetta.rosetta.basic.options.set_boolean_option('relax:constrain_relax_to_start_coords', to_initial)
    pyrosetta.rosetta.basic.options.set_boolean_option('relax:coord_constrain_sidechains', to_initial)
    pyrosetta.rosetta.protocols.relax.FastRelax.register_options()
    relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn, cycles)
    relax.constrain_relax_to_start_coords(to_initial)
    relax.apply(pose)
    return pose

scores: List[Dict[str, float]] = []

mutation: str
for mutation in shortlist.index:
    if int(mutation[1:-1]) == 89:
        continue
    pose: pyrosetta.Pose = pyrosetta.pose_from_pdb(f'variants/x0310-1.{mutation}.pdb')
    tyr89_resi: int = pose.pdb_info().pdb2pose(res=89, chain='A') # OH
    ser125_resi: int = pose.pdb_info().pdb2pose(res=125, chain='A') # O
    # add a slight pull to close it
    # at 1x 7 Å it's 2 kcal/mol and at 3Å it's zero.
    con = pr_scoring.constraints.AtomPairConstraint(pyrosetta.AtomID(atomno_in=pose.residue(tyr89_resi).atom_index('OH'), rsd_in=tyr89_resi), 
                                                pyrosetta.AtomID(atomno_in=pose.residue(ser125_resi).atom_index('O'), rsd_in=ser125_resi), 
                                                pr_scoring.func.HarmonicFunc(3, ((7-3)/2)**1/2)
                                                )
    pose.add_constraint(con)
    relaxed: pyrosetta.Pose = relax(pose, 2, 5)
    scorefxn: pr_scoring.ScoreFunction = pyrosetta.get_fa_scorefxn()
    relaxed.dump_pdb(f'variants/open_relaxed.{mutation}.pdb')
    distance: float = prn.xyzVector_double_t.distance( relaxed.residues[tyr89_resi].xyz('OH'), relaxed.residues[ser125_resi].xyz('O') )
    scores.append({'mutation': mutation, 'start': scorefxn(pose), 'relaxed': scorefxn(relaxed), 'distance': distance})

In [None]:
pd.DataFrame( scores ).sort_values('distance', ascending=False).round(1)