# Coiled Coil Descriptors Analysis

In [None]:
import mdtraj as md
import numpy
import pandas as pd
import ampal
import nglview as nv
import matplotlib.pyplot as plt
from contextlib import redirect_stdout

In [None]:
import isambard
import isambard.specifications as specs
import isambard.modelling as modelling

from ampal.analyse_protein import reference_axis_from_chains, alpha_angles, crick_angles,polymer_to_reference_axis_distances, polypeptide_vector
from ampal.pseudo_atoms import Primitive
from ampal.geometry import is_acute

In [None]:
class PACCAnalysis(object):
    def __init__(self, coiledcoil):
        """Class for the parametric analysis of coiled coils.
        Currently only functions for parallel and ap blunt ended assemblies.
        Parameters
        ----------
        coiledcoil: Assembly
            Must contain only the coiled coil polypeptides which need to be of
            equal length.
        """
        len_set = set([len(x) for x in coiledcoil])
        if len(len_set) != 1:
            raise ValueError('The helices of the coiled coil must be of equal length.')
        self.cc_len = len_set.pop()
        self.cc = coiledcoil
        self.ra = reference_axis_from_chains(self.cc)
        # create flipped axis
        self.ra_flipped = Primitive.from_coordinates(numpy.flipud(self.ra.coordinates))
        ref_polypeptide_vec = polypeptide_vector(self.cc[0])

        for ch in self.cc:
            ch_polypeptide_vec = polypeptide_vector(ch)
            # if both vectors point in the same direction (angle less than 90 deg)
            if is_acute(ref_polypeptide_vec, ch_polypeptide_vec):
                ref_ax = self.ra
            else:
                ref_ax = self.ra_flipped
            polymer_to_reference_axis_distances(ch, ref_ax)
            alpha_angles(ch, ref_ax)
            crick_angles(ch, ref_ax)

        self.radii_layers = []
        self.alpha_layers = []
        self.ca_layers = []
        self.gather_layer_info()

    def gather_layer_info(self):
        """Extracts the tagged coiled-coil parameters for each layer."""
        for i in range(len(self.cc[0])):
            layer_radii = [x[i].tags['distance_to_ref_axis'] for x in self.cc]
            self.radii_layers.append(layer_radii)
            layer_alpha = [x[i].tags['alpha_angle_ref_axis'] for x in self.cc]
            self.alpha_layers.append(layer_alpha)
            layer_ca = [x[i].tags['crick_angle_ref_axis'] for x in self.cc]
            self.ca_layers.append(layer_ca)
        return

    @staticmethod
    def calc_average_parameters(parameter_layers):
        """Takes a group of equal length lists and averages them across each index.
        Returns
        -------
        mean_layers: [float]
            List of values averaged by index
        overall_mean: float
            Mean of the averaged values.
        """
        mean_layers = [numpy.mean(x) if x[0] else 0 for x in parameter_layers]
        overall_mean = numpy.mean([x for x in mean_layers if x])
        return mean_layers, overall_mean

    def heptad_register(self):
        """Returns the calculated register of the coiled coil and the fit quality."""
        base_reg = 'abcdefg'
        exp_base = base_reg * (self.cc_len//7+2)
        ave_ca_layers = self.calc_average_parameters(self.ca_layers)[0][:-1]
        reg_fit = fit_heptad_register(ave_ca_layers)
        hep_pos = reg_fit[0][0]
        return exp_base[hep_pos:hep_pos+self.cc_len], reg_fit[0][1:]

    def generate_report(self):
        """Generates a report on the coiled coil parameters.
        Returns
        -------
        report: str
            A string detailing the register and parameters of the coiled coil.
        """
        # Find register
        lines = ['Register Assignment\n-------------------']
        register, fit = self.heptad_register()
        lines.append('{}\n{}\n'.format(register, '\n'.join(self.cc.sequences)))
        lines.append('Fit Quality - Mean Angular Discrepancy = {:3.2f} (Std Dev = {:3.2f})\n'.format(*fit))
        # Find coiled coil parameters
        lines.append('Coiled Coil Parameters\n----------------------')
        layer_info = (self.radii_layers, self.alpha_layers, self.ca_layers)
        r_layer_aves, a_layer_aves, c_layer_aves = [self.calc_average_parameters(x) for x in layer_info]
        start_line = ['Res#'.rjust(5), 'Radius'.rjust(9), 'Alpha'.rjust(9), 'CrAngle'.rjust(9)]
        lines.append(''.join(start_line))
        for i in range(len(r_layer_aves[0])):
            residue = '{:>5}'.format(i+1)
            average_r = '{:+3.3f}'.format(r_layer_aves[0][i]).rjust(9)
            average_a = '{:+3.3f}'.format(a_layer_aves[0][i]).rjust(9)
            average_c = '{:+3.3f}'.format(c_layer_aves[0][i]).rjust(9)
            line = [residue, average_r, average_a, average_c]
            lines.append(''.join(line))
        # Average for assembly
        lines.append('-'*32)
        residue = '  Ave'
        average_r = '{:+3.3f}'.format(r_layer_aves[1]).rjust(9)
        average_a = '{:+3.3f}'.format(a_layer_aves[1]).rjust(9)
        average_c = '{:+3.3f}'.format(c_layer_aves[1]).rjust(9)
        line = [residue, average_r, average_a, average_c]
        lines.append(''.join(line))
        # Std dev
        residue = 'Std D'
        std_d_r = '{:+3.3f}'.format(numpy.std(r_layer_aves[0])).rjust(9)
        std_d_a = '{:+3.3f}'.format(numpy.std(a_layer_aves[0][:-1])).rjust(9)
        std_d_c = '{:+3.3f}'.format(numpy.std(c_layer_aves[0][:-1])).rjust(9)
        line = [residue, std_d_r, std_d_a, std_d_c]
        lines.append(''.join(line))
        return '\n'.join(lines)
    
def fit_heptad_register(crangles):
    """Attempts to fit a heptad repeat to a set of Crick angles.
    Parameters
    ----------
    crangles: [float]
        A list of average Crick angles for the coiled coil.
    Returns
    -------
    fit_data: [(float, float, float)]
        Sorted list of fits for each heptad position.
    """
    crangles = [x if x > 0 else 360 + x for x in crangles]
    hept_p = [x * (360.0 / 7.0) + ((360.0 / 7.0) / 2.0) for x in range(7)]
    ideal_crangs = [
        hept_p[0],
        hept_p[2],
        hept_p[4],
        hept_p[6],
        hept_p[1],
        hept_p[3],
        hept_p[5]
    ]
    full_hept = len(crangles) // 7
    ideal_crang_list = ideal_crangs * (full_hept + 2)  
    fitting = []
    for i in range(7):
        ang_pairs = zip(crangles, ideal_crang_list[i:])
        ang_diffs = [abs(y - x) for x, y in ang_pairs]
        fitting.append((i, numpy.mean(ang_diffs), numpy.std(ang_diffs)))
    return sorted(fitting, key=lambda x: x[1])


__author__ = 'Christopher W. Wood'

Specifying the working and topology directories:

In [None]:
work_dir = '/home/eva/Documents/gbsa-sims/sims-2/CC-Hex/3r3k_T/GLH/'

In [None]:
top_dir = '/home/eva/Documents/structures/structures-gbn-ff99SB/CC-Hex/3r3k_T/GLH/'

Loading a trajectory with a stride of 100, to make analysis with ampal feasible

The topology used needs to be a new .pdb file that includes the number of chains (prepared with parmed and ambpdb)

In [None]:
traj = md.load(work_dir+'3r3k_T_GLH_output.dcd', top = top_dir+'3r3k_T_GLH_new_top.pdb', stride = 100)

In [None]:
traj

In [None]:
traj.topology

In [None]:
traj.save_pdb(work_dir+'3r3k_T_GLH_traj.pdb')

Creating the dataframe that will process the PDB file to be readable by ampal

In [None]:
colnames = ['atom', 'atomnumber', 'atomtype', 'resname', 'chain', 'resnumber', 'x', 'y', 'z', 'occupancy', 'beta', 'element']

In [None]:
df = pd.read_csv(work_dir+'3r3k_T_GLH_traj.pdb', names=colnames, dtype=object, sep='\s+')

In [None]:
df.loc[df.resname == 'ACE', 'atom'] = 'HETATM'
df.loc[df.resname == 'NHE', 'atom'] = 'HETATM'
df.loc[df.resname == 'GLH', 'atom'] = 'HETATM'

In [None]:
df = df[~df['atom'].isin(['CONECT'])]

In [None]:
df = df.fillna('0')

In [None]:
attributes =[]
attributes_small = range(0, df.columns.size)
for attribute in attributes_small:
    attributes.append([])
    
for i in range(0, df.columns.size):
    for j in df[colnames[i]]: 
        attributes[i].append(j)

Additional list for the charge - needed for the correct PDB format

In [None]:
charge=[]
for i in range(len(df['atom'])):
    charge.append('')

In [None]:
f = open(work_dir+'traj_for_ampal.pdb', 'w')


idx=0

for i in range(len(df.atom)):
    f.write('{:6s}{:5d} {:^4s} {:3s} {:1s}{:4d}    {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}          {:>2s}{:2s}\n'.format(attributes[0][idx], int(attributes[1][idx]), attributes[2][idx], attributes[3][idx], attributes[4][idx], int(attributes[5][idx]), float(attributes[6][idx]), float(attributes[7][idx]), float(attributes[8][idx]), float(attributes[9][idx]), float(attributes[10][idx]), attributes[11][idx], charge[idx]))
    idx+=1
    
f.close()

In [None]:
myprotein=ampal.load_pdb(work_dir+'traj_for_ampal.pdb')

In [None]:
myprotein

In [None]:
def show_ball_and_stick(ampal):
    view = nv.show_text(ampal.pdb)
    view.add_ball_and_stick()
    view.remove_cartoon()
    return view

In [None]:
#show_ball_and_stick(myprotein)

In [None]:
myprotein[0].sequences

In [None]:
indx=0

for frame in myprotein:
    instance=PACCAnalysis(frame)
    with open(work_dir+'pacc-report.txt', 'a') as out:
        with redirect_stdout(out):
            print('MODEL '+str(indx)+'\n'+instance.generate_report()+'\n')
    indx+=1

In [None]:
aa = myprotein[0][0]
resids =[]
resids_str = range(1,len(aa)+1)
for resid in resids_str:
    resids.append(str(resid))


radius=[]
for i in range(0,len(aa)):
    radius.append([])

alpha = []
for i in range(0,len(aa)):
    alpha.append([])
    
crangle = []
for i in range(0,len(aa)):
    crangle.append([])

with open(work_dir+'pacc-report.txt') as f:
    for line in f:
        line = line.split()
        if line==[]: continue
        if line[0] in resids:
            radius[int(line[0])-1].append(float(line[1]))
            alpha[int(line[0])-1].append(float(line[2]))
            crangle[int(line[0])-1].append(float(line[3]))

In [None]:
t = numpy.arange(0.0, 200.0, 0.4)

## Radius analysis

In [None]:
fig,ax = plt.subplots()
ax.plot(t, radius[0])
ax.set(xlabel='t (ns)', ylabel='radius (A)', title='Radius - residue 1')
ax.grid()

fig.savefig(work_dir+'analysis/radius-res1.png')
plt.show()

In [None]:
mean_radius =[]
std_radius = []

for rad_list in radius:
    mean_rad = numpy.mean(rad_list)
    std_rad = numpy.std(rad_list)
    mean_radius.append(mean_rad)
    std_radius.append(std_rad)

In [None]:
fig,ax = plt.subplots()
ax.plot(range(0, len(aa)), mean_radius, color = 'seagreen')
ax.set(xlabel='residue number', ylabel='radius (A)', title='CC-Hex*-T, all E protonated - replicate 1')
#ax.grid()
ax.set_ylim([5.0, 15.0])

ax.errorbar(range(0, len(aa)), mean_radius, yerr = std_radius, fmt = '.k', elinewidth = 0.6, capsize = 1)

fig.savefig(work_dir+'analysis/radius-per-res.png')
plt.show()

## Alpha angle analysis

In [None]:
fig,ax = plt.subplots()
ax.plot(t, alpha[0])
ax.set(xlabel='t (ns)', ylabel='alpha angle (deg)', title='Alpha angle - residue 1')
ax.grid()

fig.savefig(work_dir+'analysis/alpha-res1.png')
plt.show()

In [None]:
mean_alpha =[]
std_alpha = []

for alpha_list in alpha:
    mean_alp = numpy.mean(alpha_list)
    std_alp = numpy.std(alpha_list)
    mean_alpha.append(mean_alp)
    std_alpha.append(std_alp)

In [None]:
fig,ax = plt.subplots()
ax.plot(range(0, len(aa)), mean_alpha, color = 'purple')
ax.set(xlabel='residue number', ylabel='alpha angle (deg)', title='CC-Hex*-T, all E protonated - replicate 1')
#ax.grid()
ax.set_ylim([0.0, 100.0])

ax.errorbar(range(0, len(aa)), mean_alpha, yerr = std_alpha, fmt = '.k', elinewidth = 0.6, capsize = 1)

fig.savefig(work_dir+'analysis/alpha-per-res.png')
plt.show()

## Crick Angle Analysis

In [None]:
fig,ax = plt.subplots()
ax.plot(t, crangle[0])
ax.set(xlabel='t (ns)', ylabel='Crick angle (deg)', title='Crick angle - residue 1')
ax.grid()

fig.savefig(work_dir+'analysis/crangle-res1.png')
plt.show()

In [None]:
mean_crangle =[]
std_crangle = []

for crangle_list in crangle:
    mean_crick = numpy.mean(crangle_list)
    std_crick = numpy.std(crangle_list)
    mean_crangle.append(mean_crick)
    std_crangle.append(std_crick)

In [None]:
fig,ax = plt.subplots()
ax.plot(range(0, len(aa)), mean_crangle, color = 'royalblue')
ax.set(xlabel='residue number', ylabel='Crick angle (deg)', title='CC-Hex*-T, all E protonated - replicate 1')
#ax.grid()
ax.set_ylim([-200.0, 200.0])

ax.errorbar(range(0, len(aa)), mean_crangle, yerr = std_crangle, fmt = '.k', elinewidth = 0.6, capsize = 1)

fig.savefig(work_dir+'analysis/crangle-per-res.png')
plt.show()