# Supplementary Table 5. Pri-miRNA structures
1. structure information (ct format)  
2. symbolized structure (M-match, S-symmetric IL, A-asymmetric IL/bulge, F-flanking segment, X-internal stem loop) 

In [1]:
import time
today = time.strftime('%Y-%m-%d')
name = 'Seungchan Baek'
print 'Last revised by %s at %s.' % (name, today)

Last revised by Seungchan Baek at 2020-10-13.


In [2]:
home = '/casa/bsc/projects/1_DCS/2004_paper_prep/'
%cd $home

/casa/bsc/projects/1_DCS/2004_paper_prep


In [3]:
from __future__ import division
import pandas as pd
import sys; sys.path.append('/casa/bsc/notebooks/')
from util import *

In [4]:
s1 = pd.read_csv('supplementary/201012_s1_pri-info.csv', header=1, index_col=0)
s2 = pd.read_csv('supplementary/201012_s2_pri-construct.csv', header=1, index_col=0)
allpris = s1.index
print len(allpris)

1881


### 1. structure information (ct format)

In [5]:
def get_ct_info(mir):
    ct = 'supplementary/structures/%s.ct' % mir
    lines = open(ct, 'rt').read().split('\n')[1:-1]
    ctinfo = [ tuple(map(int, l.strip().split()[4:6][::-1])) for l in lines ]
    return ctinfo

In [6]:
tbl = pd.DataFrame(columns=range(1,126), index=allpris)
for pri in allpris:
    tbl.loc[pri] = dict(get_ct_info(pri))

#### 2. symbolized structure

In [7]:
def get_sym_str(mir):
    c5, c3 = get_pre_annot(mir)[1:3]
    ps, pe, strand = s2.loc[mir, ['Start','End','Strand']]
    if strand=='+':
        ps, pe = c5-ps+1, c3-ps+1
    else:
        ps, pe = pe-c3+1, pe-c5+1
    stinfo = [ p for p in get_ct_info(mir) if p[1]>0 ] 
    ls = [ (m,n) for i,(m,n) in enumerate(stinfo[:-1]) if stinfo[i+1]==(n,m) and m>ps and n<pe ]
    if not ls:
        return 'F'*125
    loopst, loopen = min(ls, key=lambda (x,y):abs(120-x-y))
    stemst, stemen = [ (i,p) for i,p in stinfo if i<loopst and p>loopen ][0]
    pairs = [ (i,p) for i,p in stinfo if stemst<=i<=loopst and loopen<=p<=stemen ]
    pairs = pairs + [ (p,i) for i,p in pairs ][::-1]
    symst = ''
    for (l5,l3), (u5,u3) in zip(pairs[:-1],pairs[1:]):
        if l5==loopst: symst += 'M'+'L'*(loopen-loopst-1)
        elif u5-l5==1: symst += 'M'
        else:
            symmet = min(u5-l5-1, l3-u3-1)
            symst += 'M' + (u5-l5-1-symmet)*'A' + symmet*'S'
    symst = 'F'*(stemst-1) + symst + 'M' + 'F'*(125-stemen)
    return symst

In [8]:
for pri in allpris:
    tbl.loc[pri, 'symbolized structure'] = get_sym_str(pri)

In [9]:
tbl.to_csv('resources/201012_s5_pri-structure.csv')

In [10]:
out = open('supplementary/201012_s5_pri-structure.csv', 'wt')
description = 'Supplementary Table 5. Pri-miRNA secondary structure\n\n\n\n\n'
out.write(description)
for l in open('resources/201012_s5_pri-structure.csv', 'rt'):
    out.write(l)
out.close()