## The Bioinformatics Armory

Source: rosalind.info

Access: Dec 2024


---

### Introduction ("INI")

In [1]:
from Bio.Seq import Seq

In [3]:
with open("rosalind_armory_sample_dataset.txt", "r") as f:
    sample = f.read().rstrip('\n')

sequence = Seq(sample)

print(sequence.count('A'),
      sequence.count('C'),
      sequence.count('G'),
      sequence.count('T'))

del sequence, sample

20 12 17 21


In [4]:
with open("rosalind_ini.txt", "r") as f:
    sample = f.read().rstrip('\n')

sequence = Seq(sample)

print(sequence.count('A'),
      sequence.count('C'),
      sequence.count('G'),
      sequence.count('T'))

del sequence, sample

195 200 211 215


---

### GenBank Introduction (GBK)

In [6]:
from Bio import Entrez

In [None]:
# Access example

Entrez.email = "robb.young.2011@gmail.com"

handle = Entrez.esearch(
    db='nucleotide', # for 'GenBank' use `nucleotide`, for 'PubMed' use `pubmed`
    term='"Zea mays"[Organism] AND rbcl[Gene]' # items in a Query field
)

record = Entrez.read(handle)

record['Count']

'39'

In [11]:
# Access example from Sample Dataset

with open("rosalind_armory_sample_dataset.txt", "r") as f:
    sample = f.readlines()
    sample = [s.rstrip('\n') for s in sample]

Entrez.email = "robb.young.2011@gmail.com"
handle = Entrez.esearch(
    db='nucleotide', 
    term=sample[0]+'[Organism] AND '+sample[1]+':'+sample[2]+'[Publication Date]'
)
record = Entrez.read(handle)

print(int(record['Count']))
del record, handle, sample

7


In [12]:
with open("rosalind_gbk.txt", "r") as f:
    sample = f.readlines()
    sample = [s.rstrip('\n') for s in sample]

Entrez.email = "robb.young.2011@gmail.com"
handle = Entrez.esearch(
    db='nucleotide', 
    term=sample[0]+'[Organism] AND '+sample[1]+':'+sample[2]+'[Publication Date]'
)
record = Entrez.read(handle)

print(int(record['Count']))
del record, handle, sample

67


---

### Data Formats (FRMT)

In [17]:
from Bio import Entrez, SeqIO

In [18]:
Entrez.email = "robb.young.2011@gmail.com"

handle = Entrez.efetch(
    db='nucleotide', 
    id="FJ817486",
    rettype='fasta'
)

record = handle.read()

print(record)
del handle, record

>FJ817486.1 Malus hybrid cultivar flavanone 3-hydroxylase protein (F3H) mRNA, complete cds
CGCGTATTTCGTTTGAGCCAATACCAAGTAGACAGAACCAACAAATTCGACACCAAATATGGCTCCTGCT
ACTACGCTCACATCCATAGCGCATGAGAAAACCCTGCAACAAAAATTTGTCCGAGACGAAGACGAGCGTC
CAAAGGTTGCCTACAACGACTTCAGCAACGAAATTCCGATCATCTCGCTTGCCGGGATCGATGAGGTGGA
AGGCCGCCGGGGCGAGATTTGCAAGAAGATTGTAGCGGCTTGTGAAGACTGGGGTATTTTCCAGATTGTT
GACCATGGGGTTGATGCTGAGCTCATATCGGAAATGACCGGTCTCGCTAGAGAGTTCTTTGCTTTGCCAT
CGGAGGAGAAGCTCCGCTTCGACATGTCCGGTGGCAAAAAGGGTGGCTTCATCGTGTCCAGTCATTTACA
GGGAGAAGCTGTGCAAGATTGGCGTGAAATTGTGACCTACTTTTCATATCCGATTCGTCACCGGGACTAT
TCGAGGTGGCCAGACAAGCCTGAGGCCTGGAGGGAGGTGACAAAGAAGTACAGTGACGAGTTGATGGGGC
TGGCATGCAAGCTCTTGGGCGTTTTATCAGAAGCCATGGGGTTGGATACAGAGGCATTGACAAAGGCATG
TGTGGACATGGACCAAAAAGTCGTCGTGAATTTCTACCCAAAATGCCCTCAGCCCGACCTAACCCTTGGC
CTCAAGCGCCATACCGACCCGGGCACAATTACCCTTCTGCTTCAAGACCAAGTTGGGGGCCTCCAGGCTA
CTCGGGATGATGGGAAAACGTGGATCACCGTTCAACCAGTGGAAGGAGCTTTTGTGGTCAATCTTGGAGA
TCATGGTCATCTTCTGAGCAATGGGAGGTTCAAGAATGCTGATCACCAAGCAGTGGT

In [19]:
Entrez.email = "robb.young.2011@gmail.com"

handle = Entrez.efetch(
    db='nucleotide', 
    id=["FJ817486","JX069768"],
    rettype='fasta'
)

records = handle.read()

print(records)
del records, handle

>FJ817486.1 Malus hybrid cultivar flavanone 3-hydroxylase protein (F3H) mRNA, complete cds
CGCGTATTTCGTTTGAGCCAATACCAAGTAGACAGAACCAACAAATTCGACACCAAATATGGCTCCTGCT
ACTACGCTCACATCCATAGCGCATGAGAAAACCCTGCAACAAAAATTTGTCCGAGACGAAGACGAGCGTC
CAAAGGTTGCCTACAACGACTTCAGCAACGAAATTCCGATCATCTCGCTTGCCGGGATCGATGAGGTGGA
AGGCCGCCGGGGCGAGATTTGCAAGAAGATTGTAGCGGCTTGTGAAGACTGGGGTATTTTCCAGATTGTT
GACCATGGGGTTGATGCTGAGCTCATATCGGAAATGACCGGTCTCGCTAGAGAGTTCTTTGCTTTGCCAT
CGGAGGAGAAGCTCCGCTTCGACATGTCCGGTGGCAAAAAGGGTGGCTTCATCGTGTCCAGTCATTTACA
GGGAGAAGCTGTGCAAGATTGGCGTGAAATTGTGACCTACTTTTCATATCCGATTCGTCACCGGGACTAT
TCGAGGTGGCCAGACAAGCCTGAGGCCTGGAGGGAGGTGACAAAGAAGTACAGTGACGAGTTGATGGGGC
TGGCATGCAAGCTCTTGGGCGTTTTATCAGAAGCCATGGGGTTGGATACAGAGGCATTGACAAAGGCATG
TGTGGACATGGACCAAAAAGTCGTCGTGAATTTCTACCCAAAATGCCCTCAGCCCGACCTAACCCTTGGC
CTCAAGCGCCATACCGACCCGGGCACAATTACCCTTCTGCTTCAAGACCAAGTTGGGGGCCTCCAGGCTA
CTCGGGATGATGGGAAAACGTGGATCACCGTTCAACCAGTGGAAGGAGCTTTTGTGGTCAATCTTGGAGA
TCATGGTCATCTTCTGAGCAATGGGAGGTTCAAGAATGCTGATCACCAAGCAGTGGT

In [None]:
Entrez.email = "robb.young.2011@gmail.com"

handle = Entrez.efetch(
    db='nucleotide', 
    id="FJ817486",
    rettype='fasta'
)

record = list(SeqIO.parse(handle, "fasta"))
print(record[0].id)
print(len(record[0].seq))

del handle, record

FJ817486.1
1370


In [None]:
# Access example from Sample Dataset

Entrez.email = "robb.young.2011@gmail.com"

with open("rosalind_armory_sample_dataset.txt", "r") as f:
    sample = f.read().rstrip('\n')
del f

handle = Entrez.efetch(
    db='nucleotide', 
    id=[sample],
    rettype='fasta'
)
records = list(SeqIO.parse(handle, "fasta"))

min = 1e7
min_label = ''
for i in range(len(sample.split())):
    if len(records[i].seq) < min:
        min = len(records[i].seq)
        min_label = records[i].id
print('Minimum: ', min_label, str(min))
del handle, records


handle = Entrez.efetch(
    db='nucleotide', 
    id=[min_label],
    rettype='fasta'
)
records = handle.read()
print(records)
del records, handle

del min, min_label, sample

Minimum:  JX469983.1 771
>JX469983.1 Zea mays subsp. mays clone UT3343 G2-like transcription factor mRNA, partial cds
ATGATGTATCATGCGAAGAATTTTTCTGTGCCCTTTGCTCCGCAGAGGGCACAGGATAATGAGCATGCAA
GTAATATTGGAGGTATTGGTGGACCCAACATAAGCAACCCTGCTAATCCTGTAGGAAGTGGGAAACAACG
GCTACGGTGGACATCGGATCTTCATAATCGCTTTGTGGATGCCATCGCCCAGCTTGGTGGACCAGACAGA
GCTACACCTAAAGGGGTTCTCACTGTGATGGGTGTACCAGGGATCACAATTTATCATGTGAAGAGCCATC
TGCAGAAGTATCGCCTTGCAAAGTATATACCCGACTCTCCTGCTGAAGGTTCCAAGGACGAAAAGAAAGA
TTCGAGTGATTCCCTCTCGAACACGGATTCGGCACCAGGATTGCAAATCAATGAGGCACTAAAGATGCAA
ATGGAGGTTCAGAAGCGACTACATGAGCAACTCGAGGTTCAAAGACAACTGCAACTAAGAATTGAAGCAC
AAGGAAGATACTTGCAGATGATCATTGAGGAGCAACAAAAGCTTGGTGGATCAATTAAGGCTTCTGAGGA
TCAGAAGCTTTCTGATTCACCTCCAAGCTTAGATGACTACCCAGAGAGCATGCAACCTTCTCCCAAGAAA
CCAAGGATAGACGCATTATCACCAGATTCAGAGCGCGATACAACACAACCTGAATTCGAATCCCATTTGA
TCGGTCCGTGGGATCACGGCATTGCATTCCCAGTGGAGGAGTTCAAAGCAGGCCCTGCTATGAGCAAGTC
A




In [36]:
Entrez.email = "robb.young.2011@gmail.com"

with open("rosalind_frmt.txt", "r") as f:
    sample = f.read().rstrip('\n')
del f

handle = Entrez.efetch(
    db='nucleotide', 
    id=[sample],
    rettype='fasta'
)
records = list(SeqIO.parse(handle, "fasta"))

min = 1e7
min_label = ''
for i in range(len(sample.split())):
    if len(records[i].seq) < min:
        min = len(records[i].seq)
        min_label = records[i].id
del handle, records, min, i


handle = Entrez.efetch(
    db='nucleotide', 
    id=[min_label],
    rettype='fasta'
)
records = handle.read()
print(records)
del records, handle, min_label, sample

>NM_001003102.2 Canis lupus familiaris ribosomal protein L27 (RPL27), mRNA
CGTTTCTTCCTTTCTGCTGTAGGCTCGAGTGGCTGGTGTCGAGATGGGCAAGTTCATGAAACCCGGGAAG
GTGGTGCTGGTCCTGGCCGGACGCTACTCCGGACGCAAAGCGGTCATCGTGAAGAACATTGATGATGGCA
CCTCAGACCGTCCCTACAGCCATGCTCTGGTGGCCGGAATAGACCGCTATCCCCGAAAAGTGACAGCTGC
CATGGGCAAGAAGAAAATCGCCAAGAGGTCAAAGATCAAGTCTTTTGTGAAAGTTTATAACTACAATCAC
CTCATGCCCACAAGGTACTCTGTGGATATCCCTTTGGACAAAACTGTCGTCAACAAGGATGTCTTCAGAG
ACCCTGCTCTTAAACGCAAGGCCCGACGAGAGGCCAAGGTCAAGTTCGAGGAGAGGTACAAGACTGGCAA
GAATAAGTGGTTCTTCCAGAAGCTGCGGTTTTAGATTTCTTTCAGTCATTAAAAATAAATTAAAAAAAAA
AAAAAGA




---

### Title (Abbrev)