In [1]:
%load_ext autoreload
%autoreload 2

# Case study: sRNA with dual action (repression + activation)

## Imports

In [2]:
import os
from subprocess import Popen, PIPE, run
import re
from datetime import datetime
import pandas as pd
import logging

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.parameter_prediction.IntaRNA.bin.copomus.IntaRNA import IntaRNA
from synbio_morpher.srv.parameter_prediction.simulator import process_raw_stdout
from synbio_morpher.utils.common.setup import prepare_config, expand_config
from synbio_morpher.utils.data.data_format_tools.common import load_json_as_dict
from synbio_morpher.utils.data.data_format_tools.manipulate_fasta import load_seq_from_FASTA
from synbio_morpher.utils.misc.type_handling import flatten_listlike

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np



## Load Data

In [3]:
fn_merged_inter = os.path.join('..', 'data', 'sRNA', 'merged_inter.csv')
# fn_merged_inter = os.path.join('..', 'data', 'sRNA', 'merged_EcoCyc_RNAInter_sRNATarBase.csv')
data = pd.read_csv(fn_merged_inter)
try:
    data = data.drop(columns=['Unnamed: 0', 'level_0'])
except:
    pass

data = data[~data['Sequence 1'].isna()]
data = data[~data['Sequence 2'].isna()]
data = data.drop(columns=['Unnamed: 0'])

## Dual action sRNAs

In [15]:
overlap = list(set(data[data['Regulation'] == 'Induction']['Interactor 1'].unique()) & set(data[data['Regulation'] == 'Repression']['Interactor 1'].unique()))
# sort 
overlap = list(data[data['Interactor 1'].isin(overlap)]['Interactor 1'].value_counts().index) 
data[data['Interactor 1'].isin(overlap)]['Interactor 1'].value_counts()

Interactor 1
ryhB       56
Spot_42    32
gcvB       17
dsrA       14
sgrS       13
mntS       10
rprA        8
rseX        3
arcZ        2
mcaS        2
Name: count, dtype: int64

In [30]:
data['Interactor 1'].value_counts().iloc[:10]

Interactor 1
ryhB       56
omrA       53
Spot_42    32
omrB       32
rybB       29
oxyS       26
gcvB       17
fnrS       15
dsrA       14
sgrS       13
Name: count, dtype: int64

In [29]:
data['Interactor 2'].value_counts()

Interactor 2
rpoS    20
ompA    12
ompC    11
ompF    10
hns      8
        ..
uup      1
clpB     1
yccS     1
ydbC     1
mreB     1
Name: count, Length: 266, dtype: int64

In [17]:
len(overlap)

10

In [16]:
data[data['Interactor 1'].isin(overlap[-3:])]

Unnamed: 0,Interactor 1,Interactor 2,ID 1,ID 2,Category 1,Category 2,Regulation,Binding position 1,Binding position 2,Sequence 1,Sequence 2,Source DB,Genome position 1,Genome position 2,Strand 1,Strand 2,Sequence before 1,Sequence before 2,Sequence after 1,Sequence after 2
35,rseX,ompA,"chromosome:NC_000913.3, Gene ID:5061507","chromosome:NC_000913.3, Gene ID:945571",trans-encoded antisense RNA,mRNA,Induction,['37..50'],['-22..-8'],TTTTTATTATTCTGTGTCATGATGCTTCCGTTATTAGCCTTTTATC...,ATGAAAAAGACAGCTATCGCGATTGCAGTGGCACTGGCTGGTTTCG...,sRNATarBase,2033649..2033739,1019013..1020053,forward,reverse,ttagtactattatgtgataaaagtcacatttttccacattggatga...,attttttgcgcctcgttatcatccaaaatacgccatgaatatctcc...,ttatgccttcattaatgtgcgcctgatcacaccagccgtttggcgc...,cgtcgcggtaaaacgctttctgaaacgattgttcagctgattgaag...
36,rseX,ompC,"chromosome:NC_000913.3, Gene ID:5061507","chromosome:NC_000913.3, Gene ID:946716",trans-encoded antisense RNA,mRNA,Repression,['30..55'],['-31..-1'],TTTTTATTATTCTGTGTCATGATGCTTCCGTTATTAGCCTTTTATC...,ATGAAAGTTAAAGTACTGTCCCTCCTGGTCCCAGCTCTGCTGGTAG...,sRNATarBase,2033649..2033739,2311646..2312749,forward,reverse,ttagtactattatgtgataaaagtcacatttttccacattggatga...,atgttattaaccctctgttatatgcctttatttgcttttttatgcc...,ttatgccttcattaatgtgcgcctgatcacaccagccgtttggcgc...,gtgggttttggcgggttgtggtttttgatcgcaaccaacaaagaag...
233,arcZ,rpoS,"chromosome:NC_000913.3, Gene ID:2847690","chromosome:NC_000913.3, Gene ID:947210",trans-encoded antisense RNA,mRNA,Induction,['66..91'],['-120..-99 (adapted relative to the translati...,GTGCGGCCTGAAAAACAGTGCTGTGCCCTTGTAACTCATCATAATA...,ATGAGTCAGAATACGCTGAAAGTTCATGATTTAAATGAAGATGCGG...,sRNATarBase,3350577..3350697,2866559..2867551,forward,reverse,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,ATCCTCGGGTCTTGCAGGCCACACAGGACACCCTGAACCGTCATGG...,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,ATCCTCGGGTCTTGCAGGCCACACAGGACACCCTGAACCGTCATGG...
248,rseX,rpoS,"chromosome:NC_000913.3, Gene ID:5061507","chromosome:NC_000913.3, Gene ID:947210",sRNA,mRNA,No Interaction,['NA'],['NA'],TTTTTATTATTCTGTGTCATGATGCTTCCGTTATTAGCCTTTTATC...,ATGAGTCAGAATACGCTGAAAGTTCATGATTTAAATGAAGATGCGG...,sRNATarBase,2033649..2033739,2866559..2867551,forward,reverse,ttagtactattatgtgataaaagtcacatttttccacattggatga...,ATCCTCGGGTCTTGCAGGCCACACAGGACACCCTGAACCGTCATGG...,ttatgccttcattaatgtgcgcctgatcacaccagccgtttggcgc...,ATCCTCGGGTCTTGCAGGCCACACAGGACACCCTGAACCGTCATGG...
292,mcaS,csgD,"chromosome:NC_000913.3, Gene ID:2847751","chromosome:NC_000913.3, Gene ID:949119",trans-encoded antisense RNA,mRNA,Repression,"['40..55', 'NA']","['-81..-96', 'NA']",TGAAATCTGTCACTGAAGAAAATTGGCAACTAAAGGTTAAAACCGT...,ATGTTTAATGAAGTCCATAGTATTCATGGTCATACATTATTGTTGA...,sRNATarBase,1405656..1405751,1102546..1103196,reverse,reverse,AGCAATTCAGCAATCCAATCATCCAGCAACAGAGCTTTCAAAGCCA...,atgatgaaaccccgctttttttattgatcgcacacctgacagctgc...,AGCAATTCAGCAATCCAATCATCCAGCAACAGAGCTTTCAAAGCCA...,ctgggcctttcattaatcgttaagttacccgtatagtcactttccc...
293,mcaS,flhD,"chromosome:NC_000913.3, Gene ID:2847751","chromosome:NC_000913.3, Gene ID:945442",trans-encoded antisense RNA,mRNA,Induction,['11..20; 69..77'],['-77..-86; -52..-61'],TGAAATCTGTCACTGAAGAAAATTGGCAACTAAAGGTTAAAACCGT...,ATGCATACCTCCGAGTTGCTGAAACACATTTATGACATCAACTTGT...,sRNATarBase,1405656..1405751,1977847..1978197,reverse,reverse,AGCAATTCAGCAATCCAATCATCCAGCAACAGAGCTTTCAAAGCCA...,attattcccacccagaataaccaactttatttttatgcggtttcac...,AGCAATTCAGCAATCCAATCATCCAGCAACAGAGCTTTCAAAGCCA...,aggtcataaaccagtcggttgagaatggcagcatgcctttcggcgg...
302,arcZ,flhD,"chromosome:NC_000913.3, Gene ID:2847690","chromosome:NC_000913.3, Gene ID:945442",trans-encoded antisense RNA,mRNA,Repression,['1..17'],['-64..-47; -24..-7'],GTGCGGCCTGAAAAACAGTGCTGTGCCCTTGTAACTCATCATAATA...,ATGCATACCTCCGAGTTGCTGAAACACATTTATGACATCAACTTGT...,sRNATarBase,3350577..3350697,1977847..1978197,forward,reverse,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,attattcccacccagaataaccaactttatttttatgcggtttcac...,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,aggtcataaaccagtcggttgagaatggcagcatgcctttcggcgg...
