## Uses older PyRosetta bindings



TMalign is an algorithm that can be used to find a matrix of best fit to overlay 2 proteins on one another. It has been encoded into Rosetta and here we use it to align 2 proteins that have different sequences

Note: This does **not** move the ligands if there are any present!!!

In [None]:
from rosetta import *
rosetta.init()

In [None]:
pose1 = pose_from_file('pose1.pdb')
pose2 = pose_from_file('pose2.pdb')

In [None]:
print pose1.residue(1).xyz('CA')
print pose2.residue(1).xyz('CA')

Trying to use the TMalign algorithm to do my superposition of fragments/proteins


In [None]:
from rosetta.protocols import hybridization

In [None]:
tm = hybridization.TMalign()
tm.apply(pose1, pose2)       ### note, the proteins have not moved

In [None]:
from rosetta.core.scoring import create_score_function
sfxn = create_score_function('talaris2013')
AddPyMolObserver(pose1, True)
AddPyMolObserver(pose2, True)
sfxn(pose1)
sfxn(pose2)

In [None]:
longest = max(pose1.n_residue()+1, pose2.n_residue()+1)
print longest              # need to normalize by one of the lengths of the proteins
shortest = min(pose1.n_residue()+1, pose2.n_residue()+1)
print tm.TMscore(longest)  # yields tmscore of .957 ( or very similar in structure)

In [None]:
# but the tmalign object doesn't actually move the pose....
print pose1.residue(1).xyz('CA')
print pose2.residue(1).xyz('CA')

## Looking into the Rosetta code base

From /src/protocols/hybridization/HybridizeProtocol.cc, lines 1439 to 1466
demonstrates how to run tmalign and then do the partial threading to actually move the pose

```c++
// get tmalign sequence mapping
		TMalign tm_align;
		std::string seq_pose, seq_ref, aligned;
		int reval = tm_align.apply(pose, ref_pose, residue_list, ref_residue_list);
		if ( reval != 0 ) continue;  // TO DO: remove this domain

		tm_align.alignment2AtomMap(pose, ref_pose, residue_list, ref_residue_list, n_mapped_residues, atom_map);
		tm_align.alignment2strings(seq_pose, seq_ref, aligned);

		using namespace ObjexxFCL::format;
		Size norm_length = residue_list.size() < ref_residue_list.size() ? residue_list.size():ref_residue_list.size();
		TR << "Align domain with TMscore of " << F(8,3,tm_align.TMscore(norm_length)) << std::endl;
		TR << seq_pose << std::endl;
		TR << aligned << std::endl;
		TR << seq_ref << std::endl;

		if ( n_mapped_residues < 6 ) continue;  // TO DO: remove this domain

		// add in ligand residues
		for ( core::Size i=last_protein_residue+1; i<=pose.total_residue(); ++i ) {
			core::Size res_controlling_i = ligres_map[i];
			for ( core::Size iloop=1; iloop<=domains[i_domain].num_loop(); ++iloop ) {
				if ( res_controlling_i < domains[i_domain][iloop].start() || res_controlling_i > domains[i_domain][iloop].stop() ) continue;
				residue_list.push_back(i);
			}
		}

		partial_align(pose, ref_pose, atom_map, residue_list, true, aln_cutoffs, min_coverage);
        ```

So I need to make run this method first:

tm.alignment2AtomMap(pose, ref_pose, residue_list, ref_residue_list, n_mapped_residues, atom_map);

which results below

1413    core::id::AtomID_Map< core::id::AtomID > atom_map;

1414    core::pose::initialize_atomid_map( atom_map, pose, core::id::BOGUS_ATOM_ID )

In [None]:
from rosetta.core.id import AtomID_Map_T_core_id_AtomID_T
atommap = AtomID_Map_T_core_id_AtomID_T()

In [None]:
#rosetta.core.pose.initialize_atomid_map_AtomID?
from rosetta.core.pose import initialize_atomid_map
initialize_atomid_map( atommap, pose1 )

In [None]:
pose1list = utility.vector1_Size()
pose2list = utility.vector1_Size()
for i in xrange(1,shortest):
    pose1list.append(i)
    pose2list.append(i)
print type(pose1list)
#print pose2list

In [None]:
tm.alignment2AtomMap(pose1,pose2, atommap )

In [None]:
aln_cutoff = rosetta.utility.vector1_Real()

In [None]:
for i in [2,1.5,1.0,.5]:
    aln_cutoff.append(i)

In [None]:
min_coverage = .2

In [None]:
rosetta.protocols.hybridization.partial_align(pose1,pose2, atommap, True, aln_cutoff, min_coverage)

In [None]:
# check for movement
print pose1.residue(1).xyz('CA')
print pose2.residue(1).xyz('CA')
sfxn(pose1)

Now let's put that into a function so that we can use it!!!

In [None]:
def tmalign( pose, ref_pose ):
    
    print 'Running tmalign on poses'
    print 'Starting xyz coords for res1 CA pose and reference pose'
    print pose.residue(1).xyz('CA')
    print ref_pose.residue(1).xyz('CA')
    
    # construct the TMalign object
    tm = rosetta.protocols.hybridization.TMalign()
    tm.apply(pose, ref_pose)
    longest = max(pose.n_residue()+1, ref_pose.n_residue()+1)
    
    print 'TMScore = %s ' %tm.TMscore(longest)
    #print tm.TMscore(longest)
    
    # Now pull the atom mapping from tmalign
    # tmalign makes it's own alignment so we use that to do the ''partial'' threading

    # some setup for alignment2AtomMap method
    atommap =  rosetta.core.id.AtomID_Map_T_core_id_AtomID_T()
    rosetta.core.pose.initialize_atomid_map( atommap, pose )
    tm.alignment2AtomMap( pose, ref_pose, atommap )
    
    # some setup for partial thread
    aln_cutoff = rosetta.utility.vector1_Real()
    for i in [2,1.5,1.0,.5]:
        aln_cutoff.append(i)
    min_coverage = .2
    rosetta.protocols.hybridization.partial_align(pose1,pose2, atommap, True, aln_cutoff, min_coverage)
    
    print 'Hopefully these coordinates have changed, use the PyMolMover / Observer to watch in realtime'
    print pose1.residue(1).xyz('CA')
    print pose2.residue(1).xyz('CA')