# Table S2. Constructs used in this study.
- Pri-miRNA construct sequences were retrieved from Table S2 of [Kim, Baek et al. (2021)](https://www.sciencedirect.com/science/article/pii/S1097276521005451)
---
1. Load data
2. Control RNA sequences
3. Build table

In [1]:
import time
today = time.strftime('%Y-%m-%d')
name = 'S. Chan Baek'
print 'Last revised by %s at %s.' % (name, today)

Last revised by S. Chan Baek at 2024-01-12.


In [2]:
HOME = '/casa/bsc/projects/2_Structure-of-pri/2007_paper_prep'
%cd $HOME

/casa/bsc/projects/2_Structure-of-pri/2007_paper_prep


In [3]:
from __future__ import division
from Bio import SeqIO
from collections import defaultdict
import pandas as pd
import numpy as np

In [20]:
s1 = pd.read_csv('publication/TableS1__Pri-miRNAs_selected_for_SHAPE-MaP.csv',index_col=0)
ALLMIRS = list(s1.index)
print len(ALLMIRS)
s1.head(1)

519


Unnamed: 0,Precursor,5p,3p,Guide
hsa-let-7a-1,UGAGGUAGUAGGUUGUAUAGUUUUAGGGUCACACCCACCACUGGGA...,UGAGGUAGUAGGUUGUAUAGUU,CUAUACAAUCUACUGUCUUUC,5p


## 1. Load data

In [6]:
kims2file = 'publication/Kim2021_TableS2.xlsx'
kims2 = pd.ExcelFile(kims2file).parse('s2. Pri-miRNA constructs',header=9,index_col=0)
constseqs = dict(kims2['Construct sequence (125 nt)'])
print constseqs['hsa-let-7a-1']
kims2.head(1)

CTGGATGTTCTCTTCACTGTGGGATGAGGTAGTAGGTTGTATAGTTTTAGGGTCACACCCACCACTGGGAGATAACTATACAATCTACTGTCTTTCCTAACGTGATAGAAAAGTCTGCATCCAGG


Unnamed: 0_level_0,Chr,Start,End,Strand,Construct sequence (125 nt),5' flanking segment,Pre-miRNA,3' flanking segment
Pri-miRNA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
hsa-let-7a-1,chr9,94175938,94176062,+,CTGGATGTTCTCTTCACTGTGGGATGAGGTAGTAGGTTGTATAGTT...,CUGGAUGUUCUCUUCACUGUGGGA,UGAGGUAGUAGGUUGUAUAGUUUUAGGGUCACACCCACCACUGGGA...,CUAACGUGAUAGAAAAGUCUGCAUCCAGG


## 2. Control RNA sequences

In [21]:
CONTROLS = ['IRES-domainII', 'U1-snRNA', 'Yeast-tRNAasp']
CTL_SEQS = { 
'U1-snRNA':'ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGC\
TGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG',
'Yeast-tRNAasp':'GCCGTGATAGTTTAATGGTCAGAATGGGCGCTTGTCGCGTGCCAGATCGGGGTTCAATTCCCCGTCGCGGCGCCA',
'IRES-domainII':'CCATGAATCACTCCCCTGTGAGGAACTACTGTCTTCACGCAGAAAGCGTCTAGCCATGGCGTTAGTATGAGTGTCGT\
GCAGCCTCCAGGACCCCC'}
CTL_STRS = {
'IRES-domainII':'0 0 0 0 0 0 0 0 0 0 0 0 0 0 89 88 87 86 0 85 84 83 82 0 0 0 0 0 81 80 79 78 \
76 0 0 73 72 71 0 69 68 0 0 0 63 62 61 60 59 58 0 0 0 0 0 0 0 50 49 48 47 46 45 0 0 0 0 41 40 0 \
38 37 36 0 0 33 0 32 31 30 29 23 22 21 20 18 17 16 15 0 0 0 0 0 0',
'U1-snRNA':'0 0 0 0 0 0 0 0 0 0 0 122 121 120 119 0 47 46 45 44 43 0 42 41 40 39 38 0 0 0 0 0 \
0 0 0 0 0 27 26 25 24 23 21 20 19 18 17 91 90 89 88 87 0 0 0 83 82 81 0 79 78 77 76 75 0 0 0 \
0 0 0 0 0 0 0 64 63 62 61 60 0 58 57 56 0 0 0 52 51 50 49 48 117 116 115 113 112 111 110 109 \
108 0 0 0 0 0 0 0 100 99 98 97 96 95 0 94 93 92 0 15 14 13 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
163 162 161 160 159 158 0 0 155 154 153 0 0 0 0 148 147 146 0 0 143 142 141 140 139 138 0',
'Yeast-tRNAasp':'71 70 69 68 67 66 65 0 0 25 24 23 22 0 0 0 0 0 0 0 0 13 12 11 10 0 43 42 41 \
40 39 0 0 0 0 0 0 0 31 30 29 28 27 0 0 0 0 64 63 62 61 60 0 0 0 0 0 0 0 52 51 50 49 48 7 6 5 \
4 3 2 1 0 0 0 0'}

In [22]:
def make_ct(ctl, out):
    seq = CTL_SEQS[ctl].replace('T','U')
    sts = map(int, CTL_STRS[ctl].split())
    with open(out, 'wt') as outf:
        outf.write('%5s %s\n'%(len(seq),ctl))
        for i, (nuc,bp) in enumerate(zip(seq,sts)):
            outf.write('%5s %s   %5s%5s%5s%5s\n'%(i+1,nuc,i,(i+2)%(len(seq)+1),bp,i+1))

In [15]:
pvclient = '/casa/bsc/bin/ShapeMapper_v1.2/pvclient.py'
epspdf = '/casa/bsc/.local/texlive/2017/bin/x86_64-linux/epspdf'

In [19]:
# Ct files for validating our SHAPE-MaP experiments
for ctl in CONTROLS:
    ctfile = 'publication/controls/%s_ref.ct'%ctl
    out = 'publication/controls/%s_ref'%ctl
    eps = '%s.eps'%out
    make_ct(ctl, ctfile)
    result = !$pvclient --ct $ctfile --out $out
    print ctl, result[-1]

IRES-domainII Done.
U1-snRNA Done.
Yeast-tRNAasp Done.


In [23]:
# Reference sequence files for aligning
with open('publication/Constructs.fa', 'wt') as out:
    for pri in ALLMIRS:
        out.write('>%s\n%s\n' % (pri,constseqs[pri]))
    for ctl in CONTROLS:
        out.write('>%s\n%s\n' % (ctl,CTL_SEQS[ctl]))

## 3. Build table

In [24]:
tbl = pd.DataFrame(index=ALLMIRS, columns=['Construct sequence'])
for mir in ALLMIRS:
    tbl.loc[mir, 'Construct sequence'] = constseqs[mir]

In [25]:
for ct in sorted(CONTROLS):
    tbl.loc[ct, 'Construct sequence'] = CTL_SEQS[ct]

In [26]:
tbl.tail(1)

Unnamed: 0,Construct sequence
Yeast-tRNAasp,GCCGTGATAGTTTAATGGTCAGAATGGGCGCTTGTCGCGTGCCAGA...


In [27]:
tbl.to_csv('publication/TableS2__Constructs_used_in_this_study.csv')