Skip to content

Commit

Permalink
Seq: add ENTRNA - a framework to predict RNA foldability
Browse files Browse the repository at this point in the history
  • Loading branch information
mmagnus committed Aug 10, 2019
1 parent afa2a91 commit 662947d
Show file tree
Hide file tree
Showing 2 changed files with 263 additions and 2 deletions.
198 changes: 198 additions & 0 deletions notes/ENTRNA - a framework to predict RNA foldability.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ENTRNA - a framework to predict RNA foldability"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Su, C., Weir, J. D., Zhang, F., Yan, H., & Wu, T. (2019). \n",
"**ENTRNA: a framework to predict RNA foldability.** \n",
"BMC Bioinformatics, 20(1), 1–11. http://doi.org/10.1186/s12859-019-2948-5"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from rna_tools.Seq import RNASequence"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"free energy: -27.200000\n",
"foldability: 0.828365\n"
]
}
],
"source": [
"seq = 'acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg'\n",
"ss = '((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))'\n",
"\n",
"seq = RNASequence(seq)\n",
"seq.ss = ss\n",
"fe = seq.eval()\n",
"print('free energy: %f' % fe)\n",
"fa = seq.get_foldability()\n",
"print('foldability: %f' % fa)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"free energy: -11.800000\n",
"[17, 16, 15, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 81, 80, 79, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 0, 0, 0, 0, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 0, 0, 28, 27, 26, 25]\n",
"cd /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/ && python -W ignore ENTRNA_predict.py --seq_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpIARsa2 --str_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpfGmLP4\n",
"\n",
"\n",
"\n",
"\n",
"===============================================================\n",
"\n",
"\n",
"RNA sequence:\n",
"acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg\n",
"RNA secondary structure:\n",
"[17, 16, 15, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 81, 80, 79, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 0, 0, 0, 0, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 0, 0, 28, 27, 26, 25]\n",
"This is pseudoknot-free RNA\n",
"Foldability: 0.8283652197473068\n",
"\n",
"\n",
"===============================================================\n",
"\n",
"\n",
"\n",
"foldability: 0.828365\n"
]
}
],
"source": [
"seq = 'acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg'\n",
"ss = '((((..[[[[[..)))).......((((....]]]]]....(((((((((((((((....)))))))))))))))..))))'\n",
"\n",
"seq = RNASequence(seq)\n",
"seq.ss = ss\n",
"fe = seq.eval()\n",
"print('free energy: %f' % fe)\n",
"fa = seq.get_foldability(verbose=True)\n",
"print('foldability: %f' % fa)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"free energy: -13.500000\n",
"foldability: 0.055185\n"
]
}
],
"source": [
"seq = RNASequence(\"GGCAGGGGCGCUUCGGCCCCCUAUGCC\")\n",
"seq.ss = \"((((((((.((....)).)))).))))\"\n",
"\n",
"fe = seq.eval()\n",
"print('free energy: %f' % fe)\n",
"fa = seq.get_foldability()\n",
"print('foldability: %f' % fa)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"free energy: 100000.500000\n"
]
}
],
"source": [
"seq = RNASequence(\"GGCAGGGGCGCUUCGGCCCCCUAUGCC\")\n",
"ss = \"..............()...........\"\n",
"fe = seq.eval(ss=ss)\n",
"print('free energy: %f' % fe)"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "30px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
67 changes: 65 additions & 2 deletions rna_tools/Seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
import tempfile
import sys


from rna_tools.rna_tools_config import CONTEXTFOLD_PATH, RNASTRUCTURE_PATH
from SecondaryStructure import parse_vienna_to_pairs
from rna_tools.rna_tools_config import CONTEXTFOLD_PATH, RNASTRUCTURE_PATH, ENTRNA_PATH


class MethodNotChosen(Exception):
Expand Down Expand Up @@ -139,6 +139,69 @@ def eval(self, ss='', no_dangling_end_energies=True, verbose=False):
return float(self.ss_log.strip().split(' ')[-1].replace('(','').replace(')', ''))


def get_foldability(self, ss='', verbose=False):
"""Get foldability based on:
Steps:
- parse SS into basepairs,
- calculate foldabilty
Configuration:
- Set ENTRNA_PATH to the folder where ENTRNA_predict.py is.
Cmd: python ENTRNA_predict.py --seq_file pseudoknotted_seq.txt --str_file pseudoknotted_str.txt
Su, C., Weir, J. D., Zhang, F., Yan, H., & Wu, T. (2019). ENTRNA: a framework to predict RNA foldability. BMC Bioinformatics, 20(1), 1–11. http://doi.org/10.1186/s12859-019-2948-5
"""
if ss:
self.ss = ss

# parse SS into base-pairs
def dp_to_bp(dp):
import numpy as np
a_list = []
bp_array = np.zeros(len(dp),dtype = int)
for i in range(len(dp)):
if dp[i] == "(":
a_list.append(i)
if dp[i] == ")":
bp_array[i] = a_list[-1] + 1
bp_array[a_list[-1]] = i + 1
a_list.pop()
return list(bp_array)

bp = dp_to_bp(self.ss)
if verbose: print(bp)

tempstr = tempfile.NamedTemporaryFile(delete=False)
with open(tempstr.name, 'w') as f:
f.write(str(bp))

tempseq = tempfile.NamedTemporaryFile(delete=False)
with open(tempseq.name, 'w') as f:
f.write(self.seq)

# -W to silent warnings See [1]
cmd = "cd " + ENTRNA_PATH + " && python -W ignore ENTRNA_predict.py --seq_file " + tempseq.name + " --str_file " + tempstr.name
log = subprocess.check_output(cmd, shell=True).decode()
if verbose:
print(cmd)
print(log)
for l in log.split('\n'):
if l.startswith('Foldability: '):
return float(l.replace('Foldability: ', ''))
return -1
## [1]:
## /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/util/pseudoknot_free.py:22: SettingWithCopyWarning:
## A value is trying to be set on a copy of a slice from a DataFrame.
## Try using .loc[row_indexer,col_indexer] = value instead

## See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
## df_v1['length'] = df_v1['seq'].apply(lambda x:len(x))
## /home/magnus/miniconda2/lib/python2.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
## FutureWarning)
## cd /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/ && python ENTRNA_predict.py --seq_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUORegp --str_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp1ERCcD

def predict_ss(self, method="RNAfold", constraints='', shapefn='', verbose=0):
"""Predict secondary structure of the seq.
Expand Down

0 comments on commit 662947d

Please sign in to comment.