# **POKY TALOS-N Notebook** <br>
by Woonghee Lee, Ph.D. (woonghee.lee@ucdenver.edu; POKY Team, Department of Chemistry, University of Colorado Denver)<br>
January 31, 2023<br>
*This notebook requires POKY BUILD 08/25/2022n or newer for the convenient data import.*<br>
POKY download: https://poky.clas.ucdenver.edu
<br><br>
TALOS-N: Prediction of Protein Backbone and Sidechain Torsion Angles from NMR Chemical Shifts<br><br>
Please cite: *Protein backbone and sidechain torsion angles predicted from NMR chemical shifts using artificial neural networks. <br> Yang Shen, and Ad Bax, J. Biomol. NMR, 56, 227-241(2013).*
https://spin.niddk.nih.gov/bax/software/TALOS-N/

## **Configure the form below and Run all from Runtime menu above.**

In [None]:
#@title **Configure TALOS-N Input**
#@markdown To import chemical shifts used in POKY,
#@markdown type **bj**.
#@markdown Then, paste (ctrl/cmd+v) in the box below. <br>
poky_input = "" #@param {type:"string"}

import json
poky_shift_dict =json.loads(poky_input)
sequence = poky_shift_dict['sequence']
groupatomshift_dict = poky_shift_dict['shifts']


In [None]:
#@title **Installation of TALOS-N**
!apt -qq install tcsh > /dev/null
!wget -q https://poky.clas.ucdenver.edu/poky_releases/misc/talosn_install.zip
!unzip -qq talosn_install.zip
!chmod +x install.com
!./install.com > /dev/null

In [None]:
#@title **Preparing TALOS-N Inputs**

line_queue = []
line_queue.append('REMARK POKY generated TALOS input file\n\n')
talos_atoms = ['HA', 'HA2', 'HA3', 'C', 'CA', 'CB', 'CG', 'N', 'H']

# create DATA SEQUENCE
# we only support 1 as the first sequence number here.
iFirstIdx = 1
line_queue.append('DATA FIRST_RESID %d\n\n' % (iFirstIdx))
szSeq = 'DATA SEQUENCE '
for i in range(len(sequence)):
  szSeq  = szSeq + sequence[i]
  if (i+1) % 10 == 0:
    szSeq = szSeq + ' '
  if (i+1) % 50 == 0 and i != len(sequence)-1:
    szSeq = szSeq + '\nDATA SEQUENCE '

line_queue.append(szSeq)
line_queue.append('\n\nVARS   RESID RESNAME ATOMNAME SHIFT\n')
line_queue.append('FORMAT %4d   %1s     %4s      %8.3f\n\n')

# add chemical shifts
szOutput = ''
for i in range(len(sequence)):
  seq = sequence[i]
  for atom in talos_atoms:
    try:
      gas = '%s%d%s' % (seq, i+1, atom)
      cs = groupatomshift_dict[gas]
      if atom == 'HN' or atom == 'H':
        line_queue.append('%4d %1s %4s %8.3f\n' % (i+1, seq, 'HN', cs))
      else:
        line_queue.append('%4d %1s %4s %8.3f\n' % (i+1, seq, atom, cs))
    except:
      pass
content = ''.join(line_queue)

import os
job_dir = '/content/talos_run'
talos_in = os.path.join(job_dir, 'talos_in.tab')
if not os.path.exists(job_dir):
  os.mkdir(job_dir)
f = open(talos_in, 'w')
f.write(content)
f.close()


In [None]:
#@title **Running TALOS-N**
!export TALOSN_DIR="/content"
!cp /content/tab/talos.obsCS.tab /content/tab/talos_obsCS.tab
!tcsh -c 'cd /content/talos_run; /content/talosn -in talos_in.tab'

In [None]:
#@title **Analyzing/Visualizing TALOS-N SS/S2 Outputs**
import matplotlib.pyplot as plt

def talosn_analyze_plot(outname):
  if outname.find('S2.tab') != -1:
    mode = 'S2'
  elif outname.find('SS.tab') != -1:
    mode = 'SS'

  x, y, y2 = [], [], []
  f = open(outname, 'r')
  lines = f.readlines()
  f.close()

  for i in range(len(lines)):
    line = lines[i]
    if line.find('VARS') == 0:
      break

  for j in range(i, len(lines)):
    sp = lines[j].strip().split()
    if len(sp) < 5:
      continue
    try:
      x.append(int(sp[0]))
      if mode == 'S2':
        y.append(float(sp[-1]))
      elif mode == 'SS':
        y.append(float(sp[4]))
        y2.append(-1. * float(sp[5]))
    except:
      pass

  # plotting
  xlabel = 'Residue Number'
  if mode == 'S2':
    title = 'Protein Flexibility by RCI-S2'
    ylabel = 'RCI-S2 Order Parameter'
  elif mode == 'SS':
    title = 'Protein Secondary Structure Prediction by TALOS-N'
    ylabel = 'Propensity'

  fig = plt.figure(mode)
  barlist = plt.bar(x, y)
  plt.title(title)

  for i in range(len(x)):
    if mode == 'S2':
      g = y[i]**2
      r = 1.0 - y[i]**2
      b = 0.0
    elif mode == 'SS':
      r = 1 - y[i]
      g = 1.0
      b = 1 - y[i]
    barlist[i].set_color((r,g,b))
  if mode == 'SS':
    barlist = plt.bar(x, y2)
    for i in range(len(x)):
      r = 1 + y2[i]
      b = 1.0
      g = 1 + y2[i]
      barlist[i].set_color((r,g,b))

  if mode == 'S2':
    plt.xlim(x[0], x[-1])
    plt.ylim(0, 1.0)
  elif mode == 'SS':
    plt.xlim(x[0], x[-1])
    plt.ylim(-1.0, 1.0)

  plt.pause(0.1)
  plt.show()
  fig.savefig(outname + '.svg')

# predSS.tab
talosn_analyze_plot(os.path.join(job_dir, 'predSS.tab'))

# predS2.tab
talosn_analyze_plot(os.path.join(job_dir, 'predS2.tab'))

# generate tbl/aco
!/content/com/talos2dyana.com {job_dir}/pred.tab > {job_dir}/talos.aco
!/content/com/talos2xplor.com {job_dir}/pred.tab > {job_dir}/talos.tbl

In [None]:
#@title ### **Download TALOS-N Results**
from google.colab import files
!cd {job_dir}; cd ..; zip -ru talos_run.zip talos_run
compressed_file = '/content/talos_run.zip'
files.download(compressed_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>