In [5]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from Bio import Entrez

In [1]:
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAATAATCGC")

print(my_seq)
print(my_seq.complement())
print(my_seq.reverse_complement())
print(my_seq.count("AA"))

mRNA = my_seq.transcribe()
protein = mRNA.translate()

print(f"mRNA: {mRNA}")
print(f"protein: {protein}")


GATCGATGGGCCTATATAGGATCGAATAATCGC
CTAGCTACCCGGATATATCCTAGCTTATTAGCG
GCGATTATTCGATCCTATATAGGCCCATCGATC
2
mRNA: GAUCGAUGGGCCUAUAUAGGAUCGAAUAAUCGC
protein: DRWAYIGSNNR


In [14]:
record = SeqIO.read("NC_005816.fna", "fasta")
print(record)

ID: gi|45478711|ref|NC_005816.1|
Name: gi|45478711|ref|NC_005816.1|
Description: gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Number of features: 0
Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...CTG', SingleLetterAlphabet())


In [6]:
Entrez.email = "@"
handle = Entrez.esearch(db="nucleotide", term="NC_005816")
record = Entrez.read(handle)
for key in record.keys():
    print(key, record[key])

Count 1
RetMax 1
RetStart 0
IdList ['45478711']
TranslationSet []
QueryTranslation 


In [29]:
IdList = record['IdList']
handle = Entrez.efetch(db="nucleotide", id=IdList, rettype="gb", retmode="text")
text = handle.read()
print(text)

LOCUS       NC_005816               9609 bp    DNA     circular CON 25-OCT-2020
DEFINITION  Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete
            sequence.
ACCESSION   NC_005816
VERSION     NC_005816.1
DBLINK      BioProject: PRJNA224116
            BioSample: SAMN02602970
            Assembly: GCF_000007885.1
KEYWORDS    RefSeq.
SOURCE      Yersinia pestis biovar Microtus str. 91001
  ORGANISM  Yersinia pestis biovar Microtus str. 91001
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Yersiniaceae; Yersinia.
REFERENCE   1  (bases 1 to 9609)
  AUTHORS   Zhou,D., Tong,Z., Song,Y., Han,Y., Pei,D., Pang,X., Zhai,J., Li,M.,
            Cui,B., Qi,Z., Jin,L., Dai,R., Du,Z., Wang,J., Guo,Z., Wang,J.,
            Huang,P. and Yang,R.
  TITLE     Genetics of metabolic variations between Yersinia pestis biovars
            and the proposal of a new biovar, microtus
  JOURNAL   J Bacteriol 186 (15), 5147-5152 (2004)
   PUBMED   152

In [30]:
IdList = record['IdList']
handle = Entrez.efetch(db="nucleotide", id=IdList, rettype="gb", retmode="text")
records = SeqIO.parse(handle, 'gb')
SeqIO.write(records, 'NC_005816.gb', 'gb')

1

In [31]:
records = SeqIO.parse('NC_005816.gb', 'genbank')
for record in records:
    print(record)


ID: NC_005816.1
Name: NC_005816
Description: Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence
Database cross-references: BioProject:PRJNA224116, BioSample:SAMN02602970, Assembly:GCF_000007885.1
Number of features: 19
/molecule_type=DNA
/topology=circular
/data_file_division=CON
/date=25-OCT-2020
/accessions=['NC_005816']
/sequence_version=1
/keywords=['RefSeq']
/source=Yersinia pestis biovar Microtus str. 91001
/organism=Yersinia pestis biovar Microtus str. 91001
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacterales', 'Yersiniaceae', 'Yersinia']
/references=[Reference(title='Genetics of metabolic variations between Yersinia pestis biovars and the proposal of a new biovar, microtus', ...), Reference(title='Complete genome sequence of Yersinia pestis strain 91001, an isolate avirulent to humans', ...), Reference(title='Direct Submission', ...)]
/comment=REFSEQ INFORMATION: The reference sequence is identical to
AE017046.1.
The annota

In [34]:
handle = Entrez.esearch(db="nucleotide", term="SARS-CoV-2 AND complete genome")
record = Entrez.read(handle)
IdList = record['IdList']
handle = Entrez.efetch(db="nucleotide", id=IdList, rettype="gb", retmode="text")
records = SeqIO.parse(handle, 'gb')
SeqIO.write(records, 'sars-cov-2.gb', 'gb')

20

In [53]:
records = SeqIO.parse('sars-cov-2.gb', 'gb')
srecords = []
for record in records:
    featureset = record.features
    for feature in featureset:
        if feature.type == "CDS":
            product = feature.qualifiers['product'][0]
            if product == 'surface glycoprotein':
                srecord = SeqRecord(Seq(feature.qualifiers['translation'][0]))
                srecord.id = record.id
                srecord.description = record.description
                srecords.append(srecord)
                print(record.id, record.description)
                print(feature.qualifiers['translation'][0])
                print()
SeqIO.write(srecords, 'sars-cov-2-sprotein.fasta', 'fasta')

MZ062216.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/GA-CDC-2-4225086/2021, complete genome
MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFANPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRGLPQGFSALEPLVDLPIGINITRFQTLHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVKGFNCYFPLQSYGFQPTYGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQGVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGVENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSA

20