In [1]:
import numpy as np
from Bio import SeqIO
import re

## RNA Splicing

In [35]:
records = SeqIO.parse('./data/test/rosalind_rna_splicing.txt','fasta')
sequences = []
for rec in records:
    sequences.append(rec.seq)

In [36]:
sequences

[Seq('ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGC...TAG'),
 Seq('ATCGGTCGAA'),
 Seq('ATCGGTCGAGCGTGT')]

In [4]:
codon_dict ={
    'UUU': 'F', 'CUU': 'L', 'AUU': 'I', 'GUU': 'V',
    'UUC': 'F', 'CUC': 'L', 'AUC': 'I', 'GUC': 'V',
    'UUA': 'L', 'CUA': 'L', 'AUA': 'I', 'GUA': 'V',
    'UUG': 'L', 'CUG': 'L', 'AUG': 'M', 'GUG': 'V',
    'UCU': 'S', 'CCU': 'P', 'ACU': 'T', 'GCU': 'A',
    'UCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',
    'UCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',
    'UCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',
    'UAU': 'Y', 'CAU': 'H', 'AAU': 'N', 'GAU': 'D',
    'UAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',
    'UAA': 'Stop', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',
    'UAG': 'Stop', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',
    'UGU': 'C', 'CGU': 'R', 'AGU': 'S', 'GGU': 'G',
    'UGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',
    'UGA': 'Stop', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',
    'UGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'
}


In [50]:
dna = sequences[0]
subs = sequences[1:]

In [51]:
subs

[Seq('ATCGGTCGAA'), Seq('ATCGGTCGAGCGTGT')]

In [56]:
print(dna)
for i in subs:
    print(i)
    dna = dna.replace(i,'')
    print(dna)

ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
ATCGGTCGAA
ATGGTCTACATAGCTGACAAACAGCACGTAGCATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
ATCGGTCGAGCGTGT
ATGGTCTACATAGCTGACAAACAGCACGTAGCATCTCGAGAGGCATATGGTCACATGTTCAAAGTTTGCGCCTAG


In [57]:
dna.translate(to_stop=True)

Seq('MVYIADKQHVASREAYGHMFKVCA')

### apply to data 

In [60]:
records = SeqIO.parse('./data/rosalind_splc.txt','fasta')
sequences = []
for rec in records:
    sequences.append(rec.seq)
dna = sequences[0]
subs = sequences[1:]

for i in subs:
    dna = dna.replace(i,'')
str(dna.translate(to_stop=True))

'MSQHTIQWGTGRPPTQYSGVEPPHHMARRVSCAPTRTLGHIRRSSFFILHKLATSLGNVRRNIQVRVPGSRRLHFMRLIVNRLVLVYVRDFITCNNGKSMHAPPIPESQFTLQVELLVLRREELGARCRLELSILDHRHVSYYLDGTPAPALNNVTSWCGESARRHYDARCTRFFPRFYLY'

## Enumerating k-mers Lexicographically

In [27]:
from itertools import permutations,product

In [29]:
lis = sorted(['A','T','C','G' ])

In [35]:
for aa in product(lis,repeat =2):
    print(''.join(aa))

AA
AC
AG
AT
CA
CC
CG
CT
GA
GC
GG
GT
TA
TC
TG
TT


### apply to data

In [36]:
lis=[]
with open('./data/rosalind_lexf.txt') as f:
    lines = f.read()
    for l in lines.split("\n"):
        print(l)

A B C D E F G H
3



In [42]:
lis = sorted(['A','B','C','D','E','F','G','H'])
for aa in product(lis,repeat = 3):
    print(''.join(aa))

AAA
AAB
AAC
AAD
AAE
AAF
AAG
AAH
ABA
ABB
ABC
ABD
ABE
ABF
ABG
ABH
ACA
ACB
ACC
ACD
ACE
ACF
ACG
ACH
ADA
ADB
ADC
ADD
ADE
ADF
ADG
ADH
AEA
AEB
AEC
AED
AEE
AEF
AEG
AEH
AFA
AFB
AFC
AFD
AFE
AFF
AFG
AFH
AGA
AGB
AGC
AGD
AGE
AGF
AGG
AGH
AHA
AHB
AHC
AHD
AHE
AHF
AHG
AHH
BAA
BAB
BAC
BAD
BAE
BAF
BAG
BAH
BBA
BBB
BBC
BBD
BBE
BBF
BBG
BBH
BCA
BCB
BCC
BCD
BCE
BCF
BCG
BCH
BDA
BDB
BDC
BDD
BDE
BDF
BDG
BDH
BEA
BEB
BEC
BED
BEE
BEF
BEG
BEH
BFA
BFB
BFC
BFD
BFE
BFF
BFG
BFH
BGA
BGB
BGC
BGD
BGE
BGF
BGG
BGH
BHA
BHB
BHC
BHD
BHE
BHF
BHG
BHH
CAA
CAB
CAC
CAD
CAE
CAF
CAG
CAH
CBA
CBB
CBC
CBD
CBE
CBF
CBG
CBH
CCA
CCB
CCC
CCD
CCE
CCF
CCG
CCH
CDA
CDB
CDC
CDD
CDE
CDF
CDG
CDH
CEA
CEB
CEC
CED
CEE
CEF
CEG
CEH
CFA
CFB
CFC
CFD
CFE
CFF
CFG
CFH
CGA
CGB
CGC
CGD
CGE
CGF
CGG
CGH
CHA
CHB
CHC
CHD
CHE
CHF
CHG
CHH
DAA
DAB
DAC
DAD
DAE
DAF
DAG
DAH
DBA
DBB
DBC
DBD
DBE
DBF
DBG
DBH
DCA
DCB
DCC
DCD
DCE
DCF
DCG
DCH
DDA
DDB
DDC
DDD
DDE
DDF
DDG
DDH
DEA
DEB
DEC
DED
DEE
DEF
DEG
DEH
DFA
DFB
DFC
DFD
DFE
DFF
DFG
DFH
DGA
DGB
DGC
DGD
DGE
DGF
DGG
DGH
DHA
DHB


## Longest Increasing Subsequence

In [48]:
nums = '51432'

In [61]:
elements = []
for i in nums:
    elements.append(i)
# find decresing
## start with max elements
decre_candi = []
start = nums[np.argmax(elements)]
for n in range(len(nums)) :
    for i in permutations(nums,n):
        num = ''.join(i)
        if num[0] == start:
            decre_candi.append(''.join(num))
        else:
            continue

IndexError: string index out of range

In [53]:
max(elements)

'5'

'5'