In [None]:
upstream = ['01-align']
product = None
reference = None

# 02 - Rechain

## Summary

This notebook uses the reference defined in the pipeline to rename all the chains according to which is the closest reference chain. 

In [None]:
import prody as pdy
import numpy as np
import json

Reading output from the previous step.

In [None]:
print(upstream)

with open(upstream['01-align']['list']) as f:
    files = list(map(lambda x: x.strip(), f.readlines()))
files[:10]

We use the reference to establish the new chain names. The reference structure should have a chain for every possible
chain that might show up.

In [None]:
ref = pdy.parsePDB(reference)
ref_matrix = []
ref_chains = []
for chain in ref.getHierView().iterChains():
    ref_matrix.append(pdy.calcCenter(chain))
    ref_chains.append(chain.getChid())
    

ref_matrix = np.stack(ref_matrix)
ref_matrix

Rechain functon. It just assigns chains using the minimal distance argument. In principle, it should work
as chains have been aligned in the previous step.

In [None]:
def rechain(target, ref_matrix):
    out_chains = []
    for chain in target.iterChains():
            
        distances = np.linalg.norm((pdy.calcCenter(chain) - ref_matrix), axis=1)
        if not np.any(distances < 10.0):
            raise RuntimeError(f"unable to find a matching chain for {f} - {chain.getChid()}")
            
        min_dist = distances.argmin()
        chain.setChid(ref_chains[min_dist])
        out_chains.append(ref_chains[min_dist])
    return out_chains


Rechaining

In [None]:
output = open(product['data'], 'w')
chains_out = dict()
for f in files:
    target = pdy.parsePDB(f'{f}')
    # u = target.getHierView()

    try:    
        chains = rechain(target, ref_matrix)
        basename = f.replace('.aligned.pdb', '')
        chains_out[basename] = chains
    except:
        print(f"unable to match {f}")
        continue
    
    f = f.replace('aligned.pdb', 'rechained.pdb')
    
    pdy.writePDB(f'{f}', target)
    print(f'{f}', file=output)
    
with open(product['chains'], 'w') as f:
    json.dump(chains_out, f)