In [3]:
import pyrosetta
from pyrosetta.rosetta.core.scoring import get_score_function
from pyrosetta.rosetta.protocols.analysis import InterfaceAnalyzerMover
from pyrosetta.toolbox import mutants
import numpy as np
import pandas as pd

from tqdm import tqdm


pyrosetta.init("-mute core basic protocols")
ref_protein = "ref_repack.pdb"
mut_protein = "mut_repack.pdb"

pickle_6YLA = '/home/zengyun1/AbRFC/data/test/CMAB0/6YLA_scores.pickle'



data = pd.read_pickle(pickle_6YLA)
feature = data.loc[0]


┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python38.ubuntu 2024.42+release.3366cf78a3df04339d1982e94531b77b098ddb99 2024-10-11T08:24:04] retrieved from: http://www.pyrosetta.org


In [None]:

def get_new_feature(pose,feature,pose_residue_index):
    sfxn = get_score_function(True)
    sfxn(pose)
    interface_analyzer = InterfaceAnalyzerMover()
    interface_analyzer.set_interface(interface_chain)
    interface_analyzer.set_calc_dSASA(True)  
    interface_analyzer.set_compute_interface_energy(True)  
    interface_analyzer.set_compute_packstat(True) 
    interface_analyzer.apply(pose)
    
    
    neighbors_1 = [] 
    neighbors_2 = [] 
    inter_energy = pose.energies().residue_pair_energies_array()
    resid_energy = pose.energies().residue_total_energies_array()

    pose_residue_index = pose_residue_index - 1

    for resid in range(len(inter_energy[pose_residue_index,:])):

        inter_energy_i = np.array(list(inter_energy[pose_residue_index, :][resid]))[1:]
        inter_energy_i = np.abs(inter_energy_i)

        if np.max(inter_energy_i) > 0.15 : neighbors_2.append(resid)
        
        elif np.max(inter_energy_i) > 0.05 : neighbors_1.append(resid)

    dE2 = pose.energies().total_energy()

    idelta_unsat_hbonds = interface_analyzer.get_all_data().delta_unsat_hbonds
    iinterface_hbonds = interface_analyzer.get_all_data().interface_hbonds
    ipackstat = interface_analyzer.get_all_data().packstat
    isc_value = interface_analyzer.get_all_data().sc_value

    icomplexed_sasa = interface_analyzer.get_all_per_residue_data().complexed_sasa
    icomplexed_sasa = np.array(icomplexed_sasa)
    icomplexed_sasa = np.nan_to_num(icomplexed_sasa, nan=0.0)
    icomplexed_sasa_0 = icomplexed_sasa[pose_residue_index].sum()
    icomplexed_sasa_1 = icomplexed_sasa[neighbors_1].sum()
    icomplexed_sasa_2 = icomplexed_sasa[neighbors_2].sum()

    idG = interface_analyzer.get_all_per_residue_data().dG
    idG = np.array(idG)
    idG = np.nan_to_num(idG, nan=0.0)
    idG_0 = idG[pose_residue_index].sum()
    idG_1 = idG[neighbors_1].sum()
    idG_2 = idG[neighbors_2].sum()

    idSASA = interface_analyzer.get_all_per_residue_data().dSASA
    idSASA = np.array(idSASA)
    idSASA = np.nan_to_num(idSASA, nan=0.0)
    idSASA_0 =idSASA[pose_residue_index].sum()
    idSASA_1 =idSASA[neighbors_1].sum()
    idSASA_2 =idSASA[neighbors_2].sum()

    idSASA_fraction = interface_analyzer.get_all_per_residue_data().dSASA_fraction
    idSASA_fraction = np.array(idSASA_fraction)
    idSASA_fraction = np.nan_to_num(idSASA_fraction, nan=0.0)
    idSASA_fraction_0 = idSASA_fraction[pose_residue_index].sum()
    idSASA_fraction_1 = idSASA_fraction[neighbors_1].sum()
    idSASA_fraction_2 = idSASA_fraction[neighbors_2].sum()

    idSASA_sc = interface_analyzer.get_all_per_residue_data().dSASA_sc
    idSASA_sc = np.array(idSASA_sc)
    idSASA_sc = np.nan_to_num(idSASA_sc, nan=0.0)
    idSASA_sc_0 = idSASA_sc[pose_residue_index].sum()
    idSASA_sc_1 = idSASA_sc[neighbors_1].sum()
    idSASA_sc_2 = idSASA_sc[neighbors_2].sum()

    idhSASA = interface_analyzer.get_all_per_residue_data().dhSASA
    idhSASA = np.array(idhSASA)
    idhSASA = np.nan_to_num(idhSASA, nan=0.0)
    idhSASA_0 = idhSASA[pose_residue_index].sum()
    idhSASA_1 = idhSASA[neighbors_1].sum()
    idhSASA_2 = idhSASA[neighbors_2].sum()

    idhSASA_rel_by_charge = interface_analyzer.get_all_per_residue_data().dhSASA_rel_by_charge
    idhSASA_rel_by_charge = np.array(idhSASA_rel_by_charge)
    idhSASA_rel_by_charge = np.nan_to_num(idhSASA_rel_by_charge, nan=0.0)
    idhSASA_rel_by_charge_0 = idhSASA_rel_by_charge[pose_residue_index].sum()
    idhSASA_rel_by_charge_1 = idhSASA_rel_by_charge[neighbors_1].sum()
    idhSASA_rel_by_charge_2 = idhSASA_rel_by_charge[neighbors_2].sum()

    idhSASA_sc = interface_analyzer.get_all_per_residue_data().dhSASA_sc
    idhSASA_sc = np.array(idhSASA_sc)
    idhSASA_sc = np.nan_to_num(idhSASA_sc, nan=0.0)
    idhSASA_sc_0 = idhSASA_sc[pose_residue_index].sum()
    idhSASA_sc_1 = idhSASA_sc[neighbors_1].sum()
    idhSASA_sc_2 = idhSASA_sc[neighbors_2].sum()

    iinterface_residues = interface_analyzer.get_all_per_residue_data().interface_residues
    iinterface_residues = np.array(iinterface_residues)
    iinterface_residues = np.nan_to_num(iinterface_residues, nan=0.0)
    iinterface_residues_0 = iinterface_residues[pose_residue_index].sum()
    iinterface_residues_1 = iinterface_residues[neighbors_1].sum()
    iinterface_residues_2 = iinterface_residues[neighbors_2].sum()

    total_score = resid_energy['total_score']
    total_score_0 = total_score[pose_residue_index].sum()
    total_score_1 = total_score[neighbors_1].sum()

    fa_atr = resid_energy['fa_atr']
    fa_atr_0 = fa_atr[pose_residue_index].sum()
    fa_atr_1 = fa_atr[neighbors_1].sum()

    fa_rep = resid_energy['fa_rep']
    fa_rep_0 = fa_rep[pose_residue_index].sum()
    fa_rep_1 = fa_rep[neighbors_1].sum()

    fa_sol = resid_energy['fa_sol']
    fa_sol_0 = fa_sol[pose_residue_index].sum()
    fa_sol_1 = fa_sol[neighbors_1].sum()

    fa_intra_rep = resid_energy['fa_intra_rep']
    fa_intra_rep_0 = fa_intra_rep[pose_residue_index].sum()
    fa_intra_rep_1 = fa_intra_rep[neighbors_1].sum()

    fa_intra_sol_xover4 = resid_energy['fa_intra_sol_xover4']
    fa_intra_sol_xover4_0 = fa_intra_sol_xover4[pose_residue_index].sum()
    fa_intra_sol_xover4_1 = fa_intra_sol_xover4[neighbors_1].sum()

    lk_ball_wtd = resid_energy['lk_ball_wtd']
    lk_ball_wtd_0 = lk_ball_wtd[pose_residue_index].sum()
    lk_ball_wtd_1 = lk_ball_wtd[neighbors_1].sum()

    fa_elec = resid_energy['fa_elec']
    fa_elec_0 = fa_elec[pose_residue_index].sum()
    fa_elec_1 = fa_elec[neighbors_1].sum()

    pro_close = resid_energy['pro_close']
    pro_close_0 = pro_close[pose_residue_index].sum()
    pro_close_1 = pro_close[neighbors_1].sum()

    hbond_sr_bb = resid_energy['hbond_sr_bb']
    hbond_sr_bb_0 = hbond_sr_bb[pose_residue_index].sum()
    hbond_sr_bb_1 = hbond_sr_bb[neighbors_1].sum()

    hbond_lr_bb = resid_energy['hbond_lr_bb']
    hbond_lr_bb_0 = hbond_lr_bb[pose_residue_index].sum()
    hbond_lr_bb_1 = hbond_lr_bb[neighbors_1].sum()

    hbond_bb_sc = resid_energy['hbond_bb_sc']
    hbond_bb_sc_0 = hbond_bb_sc[pose_residue_index].sum()
    hbond_bb_sc_1 = hbond_bb_sc[neighbors_1].sum()

    hbond_sc = resid_energy['hbond_sc']
    hbond_sc_0 = hbond_sc[pose_residue_index].sum()
    hbond_sc_1 = hbond_sc[neighbors_1].sum()
    new_feature = pd.Series({
    'dE2' : dE2,
    'idelta_unsat_hbonds' : idelta_unsat_hbonds,
    'iinterface_hbonds' : iinterface_hbonds,
    'ipackstat' : ipackstat,
    'isc_value' : isc_value,
    'icomplexed_sasa_0' : icomplexed_sasa_0,
    'icomplexed_sasa_1' : icomplexed_sasa_1,
    'icomplexed_sasa_2' : icomplexed_sasa_2,
    'idG_0' : idG_0,
    'idG_1' : idG_1,
    'idG_2' : idG_2,
    'idSASA_0' : idSASA_0,
    'idSASA_1' : idSASA_1,
    'idSASA_2' : idSASA_2,
    'idSASA_fraction_0' : idSASA_fraction_0,
    'idSASA_fraction_1' : idSASA_fraction_1,
    'idSASA_fraction_2' : idSASA_fraction_2,
    'idSASA_sc_0' : idSASA_sc_0,
    'idSASA_sc_1' : idSASA_sc_1,
    'idSASA_sc_2' : idSASA_sc_2,
    'idhSASA_0' : idhSASA_0,
    'idhSASA_1' : idhSASA_1,
    'idhSASA_2' : idhSASA_2,
    'idhSASA_rel_by_charge_0' : idhSASA_rel_by_charge_0,
    'idhSASA_rel_by_charge_1' : idhSASA_rel_by_charge_1,
    'idhSASA_rel_by_charge_2' : idhSASA_rel_by_charge_2,
    'idhSASA_sc_0' : idhSASA_sc_0,
    'idhSASA_sc_1' : idhSASA_sc_1,
    'idhSASA_sc_2' : idhSASA_sc_2,
    'iinterface_residues_0' : iinterface_residues_0,
    'iinterface_residues_1' : iinterface_residues_1,
    'iinterface_residues_2' : iinterface_residues_2,
    'total_score_0' : total_score_0,
    'total_score_1' : total_score_1,
    'fa_atr_0' : fa_atr_0,
    'fa_atr_1' : fa_atr_1,
    'fa_rep_0' : fa_rep_0,
    'fa_rep_1' : fa_rep_1,
    'fa_sol_0' : fa_sol_0,
    'fa_sol_1' : fa_sol_1,
    'fa_intra_rep_0' : fa_intra_rep_0,
    'fa_intra_rep_1' : fa_intra_rep_1,
    'fa_intra_sol_xover4_0' : fa_intra_sol_xover4_0,
    'fa_intra_sol_xover4_1' : fa_intra_sol_xover4_1,
    'lk_ball_wtd_0' : lk_ball_wtd_0,
    'lk_ball_wtd_1' : lk_ball_wtd_1,
    'fa_elec_0' : fa_elec_0,
    'fa_elec_1' : fa_elec_1,
    'pro_close_0' : pro_close_0,
    'pro_close_1' : pro_close_1,
    'hbond_sr_bb_0' : hbond_sr_bb_0,
    'hbond_sr_bb_1' : hbond_sr_bb_1,
    'hbond_lr_bb_0' : hbond_lr_bb_0,
    'hbond_lr_bb_1' : hbond_lr_bb_1,
    'hbond_bb_sc_0' : hbond_bb_sc_0,
    'hbond_bb_sc_1' : hbond_bb_sc_1,
    'hbond_sc_0' : hbond_sc_0,
    'hbond_sc_1' : hbond_sc_1})

    return new_feature

In [3]:
new_feature1_columns = ['dE2',
 'idelta_unsat_hbonds',
 'iinterface_hbonds',
 'ipackstat',
 'isc_value',
 'icomplexed_sasa_0',
 'icomplexed_sasa_1',
 'icomplexed_sasa_2',
 'idG_0',
 'idG_1',
 'idG_2',
 'idSASA_0',
 'idSASA_1',
 'idSASA_2',
 'idSASA_fraction_0',
 'idSASA_fraction_1',
 'idSASA_fraction_2',
 'idSASA_sc_0',
 'idSASA_sc_1',
 'idSASA_sc_2',
 'idhSASA_0',
 'idhSASA_1',
 'idhSASA_2',
 'idhSASA_rel_by_charge_0',
 'idhSASA_rel_by_charge_1',
 'idhSASA_rel_by_charge_2',
 'idhSASA_sc_0',
 'idhSASA_sc_1',
 'idhSASA_sc_2',
 'iinterface_residues_0',
 'iinterface_residues_1',
 'iinterface_residues_2',
 'total_score_0',
 'total_score_1',
 'fa_atr_0',
 'fa_atr_1',
 'fa_rep_0',
 'fa_rep_1',
 'fa_sol_0',
 'fa_sol_1',
 'fa_intra_rep_0',
 'fa_intra_rep_1',
 'fa_intra_sol_xover4_0',
 'fa_intra_sol_xover4_1',
 'lk_ball_wtd_0',
 'lk_ball_wtd_1',
 'fa_elec_0',
 'fa_elec_1',
 'pro_close_0',
 'pro_close_1',
 'hbond_sr_bb_0',
 'hbond_sr_bb_1',
 'hbond_lr_bb_0',
 'hbond_lr_bb_1',
 'hbond_bb_sc_0',
 'hbond_bb_sc_1',
 'hbond_sc_0',
 'hbond_sc_1']

In [4]:
from SIN import compute_sin_scores
import json 

aif = json.load(open('aif_matrix.json','r'))


In [68]:
"mut/"+feature['label']+".pdb"

'mut/H100A_PI.pdb'

In [None]:
total_feature=[]
from rosetta.protocols.relax import FastRelax

for index, feature in tqdm(data.iterrows(), total=len(data)):


    try:
        chain_id = feature['chain']
        origin_aa = feature['refAA']
        target_aa = feature['mutAA']
        pack_radius = 5.0
        
        interface_chain = feature['sides'][1]+"_"+feature['sides'][0]
    
        pose = pyrosetta.pose_from_pdb('ref.pdb')

        if feature['res'][-1].isalpha():
            numeric_part = feature['res'][:-1]
            letter_part = feature['res'][-1]
            pose_residue_index = pose.pdb_info().pdb2pose(chain_id, res=int(numeric_part), icode=letter_part)
        else:
            pose_residue_index = pose.pdb_info().pdb2pose(chain_id, res=int(feature['res']))

        if  index == 0:##关于ref的操作只有第一次涉及
            ref_pose = pyrosetta.pose_from_pdb('ref.pdb')

            mutants.mutate_residue(ref_pose, pose_residue_index,origin_aa, pack_radius=pack_radius) #不突变
            # ref_pose = pack_pose(ref_pose,pose_residue_index)
            ref_pose.dump_pdb("ref/"+feature['label']+".pdb")

            pose = pyrosetta.pose_from_pdb('ref.pdb')

            interface_analyzer = InterfaceAnalyzerMover()
            interface_analyzer.set_interface(interface_chain)
            interface_analyzer.set_calc_dSASA(True)  
            interface_analyzer.set_compute_interface_energy(True)  
            interface_analyzer.set_compute_packstat(True) 
            interface_analyzer.apply(pose)

            SIN_matrix = compute_sin_scores(pose)
            ref_AIF_mat = SIN_matrix.copy()
            num_residues = pose.total_residue()
            for i in range(num_residues):
                for j in range(num_residues):
                    if SIN_matrix[i][j] > 0:
                        res1 = pose.residue(i + 1)
                        res2 = pose.residue(j + 1)
                        ref_AIF_mat[i][j] = aif[res1.name1()+res2.name1()]
            SIN_scores = SIN_matrix.sum(axis=1)
            max_score = np.max(SIN_scores)
            min_score = np.min(SIN_scores)
            ref_SIN_scores_normalized = np.round((SIN_scores - min_score) / (max_score - min_score), 6)

        mut_pose = pyrosetta.pose_from_pdb('ref.pdb')
        mutants.mutate_residue(mut_pose, pose_residue_index, target_aa,pack_radius=pack_radius)
        # scorefxn = get_score_function()
        # fr = FastRelax()
        # fr.set_scorefxn(scorefxn)
        # fr.max_iter(100)
        # fr.apply(mut_pose)
        # mut_pose = pack_pose(mut_pose,pose_residue_index)

        mut_pose.dump_pdb("mut/"+feature['label']+".pdb")
        ref_pose.dump_pdb("ref/"+feature['label']+".pdb")

        
        new_ref_feature = get_new_feature(ref_pose,feature,pose_residue_index)
        new_mut_feature = get_new_feature(mut_pose,feature,pose_residue_index)


        new_feature1 = new_ref_feature - new_mut_feature
    except Exception as e:
        print(f"Error processing index {index}: {e}")
        new_feature1 = pd.Series({key: "NaN" for key in new_feature1_columns})


    interface_resid = [i-1 for i in interface_analyzer.get_interface_set()]
    ref_sin_if = sum([ref_SIN_scores_normalized[i] for i in interface_resid])
    ref_sin_res	= ref_SIN_scores_normalized[pose_residue_index-1]
    ref_sin_norm = max_score - min_score
    ref_AIF_score = ref_AIF_mat[pose_residue_index-1][:].sum()

    if 1:
            interface_analyzer = InterfaceAnalyzerMover()
            interface_analyzer.set_interface(interface_chain)
            interface_analyzer.set_calc_dSASA(True)  
            interface_analyzer.set_compute_interface_energy(True)  
            interface_analyzer.set_compute_packstat(True) 
            interface_analyzer.apply(mut_pose)
            mut_SIN_matrix = compute_sin_scores(mut_pose)
            mut_AIF_mat = mut_SIN_matrix.copy()
            num_residues = mut_pose.total_residue()
            for i in range(num_residues):
                for j in range(num_residues):
                    if mut_SIN_matrix[i][j] > 0:
                        res1 = mut_pose.residue(i + 1)
                        res2 = mut_pose.residue(j + 1)
                        mut_AIF_mat[i][j] = aif[res1.name1()+res2.name1()]
            SIN_scores = mut_SIN_matrix.sum(axis=1)
            max_score = np.max(SIN_scores)
            min_score = np.min(SIN_scores)
            mut_SIN_scores_normalized = np.round((SIN_scores - min_score) / (max_score - min_score), 6)

            interface_resid = [i-1 for i in interface_analyzer.get_interface_set()]
            mut_sin_if = sum([mut_SIN_scores_normalized[i] for i in interface_resid])
            mut_sin_res	= mut_SIN_scores_normalized[pose_residue_index-1]
            mut_sin_norm = max_score - min_score
            mut_AIF_score = mut_AIF_mat[pose_residue_index-1][:].sum()

    new_feature2 = pd.Series({
    'pdb_ref' : feature['pdb_ref'],
    'chain' : feature['chain'],
    'res' : feature['res'],
    'refAA' : "ref/"+feature['label']+".pdb",
    'mutAA' : "mut/"+feature['label']+".pdb",
    'sides' : feature['sides'],
    'pdb_ref_repack' : feature['pdb_ref_repack'],
    'pdb_mut_repack' : feature['pdb_mut_repack'],
    'aif_score' : ref_AIF_score-mut_AIF_score,
    'sin_if' : ref_sin_if-mut_sin_if,
    'sin_res' : ref_sin_res-mut_sin_res,
    'sin_norm' : ref_sin_norm-mut_sin_norm,
    'yclf' : feature['yclf'],
    'yreg' : feature['yreg'],
    'label' : feature['label']})


    new_feature = pd.concat([new_feature1, new_feature2])
    total_feature.append(new_feature)
    
    
total_feature = pd.concat(total_feature, axis=1).transpose()

# total_feature.to_pickle("final_feature_df.pkl")


  from rosetta.protocols.relax import FastRelax
  0%|          | 0/954 [00:00<?, ?it/s]

1


  0%|          | 1/954 [04:06<65:11:49, 246.28s/it]

1


  0%|          | 2/954 [08:07<64:21:53, 243.40s/it]

1


  0%|          | 3/954 [12:12<64:30:36, 244.20s/it]

1


  0%|          | 4/954 [16:11<63:49:18, 241.85s/it]

1


  1%|          | 5/954 [20:12<63:42:44, 241.69s/it]

1


  1%|          | 6/954 [24:07<63:01:55, 239.36s/it]

1


  1%|          | 7/954 [28:08<63:09:13, 240.08s/it]

1


  1%|          | 8/954 [32:12<63:25:05, 241.34s/it]

1


  1%|          | 8/954 [36:03<71:03:05, 270.39s/it]


KeyboardInterrupt: 

In [8]:
total_feature = pd.concat(total_feature, axis=1).transpose()


TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

In [9]:
total_feature

Unnamed: 0,dE2,idelta_unsat_hbonds,iinterface_hbonds,ipackstat,isc_value,icomplexed_sasa_0,icomplexed_sasa_1,icomplexed_sasa_2,idG_0,idG_1,...,sides,pdb_ref_repack,pdb_mut_repack,aif_score,sin_if,sin_res,sin_norm,yclf,yreg,label
0,-0.019177,3.0,-3.0,0.0,-0.056817,0.0,12.614014,14.847594,-0.632595,-2.757844,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,2.321531,-1.647974,0.058827,0.26,0.368213,-0.89993,H100_TA
1,-10.921228,-2.0,-2.0,0.0,-0.03645,0.0,11.236174,-3.29453,-2.519375,0.32144,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,6.132346,0.83665,0.008082,-0.278,0.590464,0.773489,H100_TD
2,-7.808997,-2.0,-2.0,0.0,-0.016765,-0.512343,-24.967618,-35.953658,-1.348003,1.600735,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,4.483393,0.530847,-0.070746,-0.098,0.400249,-1.116424,H100_TE
3,-14.865319,3.0,2.0,0.0,0.015391,-1.663299,2.847175,-14.386113,1.26162,-1.911061,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,0.13085,-0.00648,-0.26092,0.132,0.445142,-2.025474,H100_TF
4,1.945544,0.0,-4.0,0.0,-0.075391,0.0,7.185521,10.865768,-0.64113,0.856327,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,4.371584,-2.206199,0.182719,0.264,0.244351,-1.436645,H100_TG
5,-15.179634,2.0,0.0,0.0,0.038168,-9.01575,11.076984,-80.162779,-1.061458,-2.621668,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,6.911575,0.989854,-0.152473,-0.297,0.639111,0.370489,H100_TH
6,-6.377295,0.0,-3.0,0.0,-0.013094,-0.512343,1.284578,-0.639163,0.312333,0.442341,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,0.401584,0.128204,-0.121334,-0.065,0.307679,-2.496906,H100_TI
7,-19.657979,3.0,-1.0,0.0,-0.046483,-8.751438,-57.953154,-84.371784,-1.30516,-0.22846,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,5.573298,0.872015,-0.092843,-0.36,0.464797,-0.171263,H100_TK


In [36]:
my_data = pd.read_pickle("final_feature_df.pkl")

In [None]:
from rosetta.protocols.relax import FastRelax
scorefxn = get_score_function()
fr = FastRelax()
fr.set_scorefxn(scorefxn)
fr.max_iter(100)
fr.apply(pose)

In [19]:
type(paper_H100_TA)

pandas.core.frame.DataFrame

In [61]:
total_feature

Unnamed: 0,dE2,idelta_unsat_hbonds,iinterface_hbonds,ipackstat,isc_value,icomplexed_sasa_0,icomplexed_sasa_1,icomplexed_sasa_2,idG_0,idG_1,...,sides,pdb_ref_repack,pdb_mut_repack,aif_score,sin_if,sin_res,sin_norm,yclf,yreg,label
0,22.648534,1.0,0.0,0.0,0.038574,1.024687,13.678254,22.093316,-0.633721,-1.839444,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,3.616762,1.201928,0.098469,-0.181,0.368213,-0.89993,H100_TA
1,31.771095,0.0,0.0,0.0,0.015682,1.024687,4.926397,7.133029,-3.050799,-0.293713,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,7.020873,0.96137,0.02132,0.0,0.590464,0.773489,H100_TD
2,-174.783973,-2.0,1.0,0.0,0.033993,0.512343,-35.964161,-27.359642,-12.463527,2.044215,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,5.122196,0.769158,-0.084485,0.0,0.400249,-1.116424,H100_TE
3,-859.884783,0.0,0.0,0.0,0.035452,1.024687,13.678254,2.398796,-424.428995,-1.775592,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,1.12119,-0.512252,-0.484191,0.0,0.445142,-2.025474,H100_TF
4,42.902198,3.0,0.0,0.0,0.059821,0.512343,8.396314,17.639445,-0.7864,3.976301,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,9.558439,1.270555,0.180762,0.0,0.244351,-1.436645,H100_TG
5,-260.402308,-6.0,1.0,0.0,0.039109,-0.512343,13.678254,-12.846849,-99.534075,-1.839444,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,6.956793,0.66262,-0.180884,0.0,0.639111,0.370489,H100_TH
6,-41.509903,-2.0,0.0,0.0,-0.002689,0.512343,0.0,2.877443,0.200889,-0.067028,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,1.138399,0.725807,-0.098592,0.0,0.307679,-2.496906,H100_TI
7,-1333.348896,-8.0,1.0,0.0,0.086922,1.024687,5.767095,-28.208337,-581.619241,1.575998,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,4.47213,1.255418,-0.094183,0.0,0.464797,-0.171263,H100_TK
8,-281.380779,-1.0,1.0,0.0,0.008741,1.024687,13.678254,8.490166,-51.9342,-1.839444,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,-8.288771,0.720371,-0.099033,0.0,0.376051,-2.397409,H100_TL
9,-110.267302,-3.0,1.0,0.0,0.051705,1.024687,-26.723217,-41.869673,-0.382266,4.418649,...,"[LH, A]",/efs/Antibody/compute/projects/corona/c27/para...,/efs/Antibody/compute/projects/corona/c27/para...,1.265298,0.816623,-0.084632,0.0,0.418716,-1.165471,H100_TM


In [34]:
a= pd.concat([total_feature,paper_H100_TA], axis=0)

In [57]:
def pack_pose(pose,target_residue_index):
    target_selector = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector()
    target_selector.set_index(str(target_residue_index))

    neighborhood_selector = pyrosetta.rosetta.core.select.residue_selector.NeighborhoodResidueSelector()
    neighborhood_selector.set_focus_selector(target_selector)  
    neighborhood_selector.set_distance(5.0)  # 半径为 5 埃
    neighborhood_selector.set_include_focus_in_subset(True) 

    tf = pyrosetta.rosetta.core.pack.task.TaskFactory()
    tf.push_back(pyrosetta.rosetta.core.pack.task.operation.InitializeFromCommandline())
    tf.push_back(pyrosetta.rosetta.core.pack.task.operation.RestrictToRepacking())

    restrict_residues = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(
        pyrosetta.rosetta.core.pack.task.operation.RestrictToRepackingRLT(), neighborhood_selector
    )
    tf.push_back(restrict_residues)

    packer = pyrosetta.rosetta.protocols.minimization_packing.PackRotamersMover()
    packer.task_factory(tf)

    packer.apply(pose)


    return pose

In [54]:
pose
    

<pyrosetta.rosetta.core.pose.Pose at 0x7ddf380787f0>

In [24]:
new_ref_feature 

dE2                       -1346.621158
idelta_unsat_hbonds          17.000000
iinterface_hbonds            13.000000
ipackstat                     0.000000
isc_value                     0.614205
icomplexed_sasa_0             0.000000
icomplexed_sasa_1            12.614014
icomplexed_sasa_2            22.465157
idG_0                        -1.054008
idG_1                        -2.757844
idG_2                        -7.868393
idSASA_0                     18.194065
idSASA_1                     96.515606
idSASA_2                    200.468107
idSASA_fraction_0             1.000000
idSASA_fraction_1             1.768992
idSASA_fraction_2             6.196657
idSASA_sc_0                  18.194065
idSASA_sc_1                  90.367486
idSASA_sc_2                 138.644620
idhSASA_0                    17.169378
idhSASA_1                    89.592986
idhSASA_2                    91.133833
idhSASA_rel_by_charge_0      16.298903
idhSASA_rel_by_charge_1      87.625947
idhSASA_rel_by_charge_2  

In [25]:
new_mut_feature

dE2                       -1344.307306
idelta_unsat_hbonds          17.000000
iinterface_hbonds            13.000000
ipackstat                     0.000000
isc_value                     0.590413
icomplexed_sasa_0             0.512343
icomplexed_sasa_1             5.226291
icomplexed_sasa_2            20.415783
idG_0                        -0.420835
idG_1                        -1.174477
idG_2                        -7.506756
idSASA_0                     11.016557
idSASA_1                     51.337115
idSASA_2                    181.742229
idSASA_fraction_0             0.955560
idSASA_fraction_1             0.907603
idSASA_fraction_2             5.370497
idSASA_sc_0                  11.016557
idSASA_sc_1                  51.337115
idSASA_sc_2                 133.521186
idhSASA_0                    11.016557
idhSASA_1                    51.337115
idhSASA_2                    81.745792
idhSASA_rel_by_charge_0       9.805955
idhSASA_rel_by_charge_1      46.213214
idhSASA_rel_by_charge_2  

In [None]:
my_data