# Basic Python wrapper for IntARNA
Author: Milad Miladi

License: GPLv3

For details about IntARNA invokation please refere to:

https://github.com/BackofenLab/IntARNA


In [11]:
import os
import pandas as pd
from subprocess import Popen, PIPE
from Bio import SeqIO

# Parameters (fixed) for calling IntARNA
INTARNA_ARGS = "-n 10 --outOverlap=B --outMode=C" # Some extra params: --outOverlap=B --outMode=C --seedBP=5 --temperature 22  --qAccW 0 --qAccL 0 "

# For conda installation keep the PATH empty
PATH_INTARNA = '' #'./IntaRNA/src/bin/'

INTARNA_BIN = os.path.join(PATH_INTARNA, 'IntaRNA')

def which(program):
    import os
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None

if which(INTARNA_BIN) is None:
    raise RuntimeError("Cannot find IntARNA executable using path: {}".format(INTARNA_BIN))
    

def run_intaRNA_shape(query_fa, target_fa, query_id=None, target_id=None, shape_file=None, out_suffix='',
                      spot_probs_pairs_str=None):
    
    if len(list(SeqIO.parse(query_fa, "fasta"))) != 1 or len(list(SeqIO.parse(target_fa,"fasta"))) != 1:
        raise RuntimeError('The wrapper accepts exactly one target and one query per fasta file')
    if query_id is None:
        query_id = 'query'
    if target_id is None:
        target_id = 'target'
    
    # Make a shell command 
    subopt_csv = 'intarna-subopts_{}-{}{}.csv'.format(target_id, query_id, out_suffix)
    heatmap_csv = 'intarna-heatmap_{}-{}{}.csv'.format(target_id, query_id, out_suffix)
    spotprob_csv = 'intarna-spotProbs_{}-{}{}.csv'.format(target_id, query_id, out_suffix)

    cmd = INTARNA_BIN + ' --target={} --query={} '.format(target_fa, query_fa)
    cmd += '--out=pMinE:{} --out={} '.format(heatmap_csv, subopt_csv)
    params = '{} '.format(INTARNA_ARGS)
    
    if shape_file is not None:
        cmd += '--qShape {} '.format(shape_file)
        params += '--qShapeMethod "Z" '
    if spot_probs_pairs_str is not None:
        cmd += '--out="spotProb:{}:{}" '.format(spot_probs_pairs_str, spotprob_csv)

    print (cmd, params)
    p = Popen(cmd + params , stdin=PIPE, shell=True, stdout=PIPE, stderr=PIPE)
    out, err = p.communicate()
    if err or b"ERROR" in out:
        raise RuntimeError("Error in calling intaRNA\n{}\n{}\n".format(out, err))

    print (out)
    df_subopts = pd.read_csv(subopt_csv,sep=';')
    df_heatmap = pd.read_csv(heatmap_csv,sep=';')

    #df_subopts['intarna_params:{}'.format(params)] = ''
    return  df_subopts#, df_heatmap, spotprob_csv

In [14]:
# Create sample test fasta files

query_fa_str=\
""">ncRNA1
AGGAUGGGGGAAACCCCAUACUCCUCACACACCAAAUCGCCCGAUUUAUCGGGCUUUUUU
"""
target_fa_str=\
""">mRNA1
UUUAAAUUAAAAAAUCAUAGAAAAAGUAUCGUUUGAUACUUGUGAUUAUACUCAGUUAUA
CAGUAUCUUAAGGUGUUAUUAAUAGUGGUGAGGAGAAUUUAUGAAGCUUUUCAAAAGCUU
GCUUGUGGCACCUGCAACUCUUGGUCUUUUAGCACCAAUGACCGCUACUGCUAAU
"""
! echo "$query_fa_str" > ./intarna_query.fa
! echo "$target_fa_str" > ./intarna_target.fa


In [15]:
run_intaRNA_shape('./intarna_query.fa','./intarna_target.fa')

IntaRNA --target=./intarna_target.fa --query=./intarna_query.fa --out=pMinE:intarna-heatmap_target-query.csv --out=intarna-subopts_target-query.csv  -n 10 --outOverlap=B --outMode=C 
b''


Unnamed: 0,id1,start1,end1,id2,start2,end2,subseqDP,hybridDP,E
0,mRNA1,85,95,ncRNA1,21,32,GUGGUGAGGAG&CUCCUCACACAC,(((((((((((&)))))))).))),-11.8783
1,mRNA1,85,95,ncRNA1,21,30,GUGGUGAGGAG&CUCCUCACAC,((.((((((((&)))))))))),-9.12246
2,mRNA1,86,95,ncRNA1,21,31,UGGUGAGGAG&CUCCUCACACA,((((((((((&)))))))).)),-8.81678
3,mRNA1,88,95,ncRNA1,21,28,GUGAGGAG&CUCCUCAC,((((((((&)))))))),-8.78297
4,mRNA1,89,95,ncRNA1,21,27,UGAGGAG&CUCCUCA,(((((((&))))))),-5.57445
