# ACCESS BIOINFORMATICS DATABASES WITH BIO-PYTHON


## Import module

In [1]:
# module installation script
#pip install biopython

In [2]:
from Bio.Blast import NCBIWWW
from Bio import SeqIO, SearchIO

In [3]:
#help(NCBIWWW)

### 1. Nucliotide blast search
A nucleotide BLAST (Basic Local Alignment Search Tool) search compares a given DNA sequence (query) against a database of nucleotide sequences to identify regions of similarity.

In [None]:
# read a sequence from a file
record = SeqIO.read("nuc_seq.fasta", format="fasta")
len(record)

MT598137.1
ATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGT


In [5]:
# blast the sequence against the nt database
result_handle = NCBIWWW.qblast("blastn", "nt", record.seq)

In [6]:
# save the result to a file
blast_result = SearchIO.read(result_handle, "blast-xml")

In [7]:
print(blast_result[0:10])  # print the first 10 hits#

Program: blastn (2.17.0+)
  Query: No (774)
         definition line
 Target: core_nt
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  gi|2549881539|emb|OY359284.1|  Severe acute respiratory...
            1      1  gi|2505160398|emb|OX602250.1|  Severe acute respiratory...
            2      1  gi|2633227592|emb|OY964483.1|  Severe acute respiratory...
            3      1  gi|2521543283|emb|OX665185.1|  Severe acute respiratory...
            4      1  gi|2506157127|emb|OX609575.1|  Severe acute respiratory...
            5      1  gi|2555098633|emb|OY614841.1|  Severe acute respiratory...
            6      1  gi|2633229606|emb|OY965041.1|  Severe acute respiratory...
            7      1  gi|2633194945|emb|OY957481.1|  Severe acute respiratory...
            8      1  gi|2565342413|emb|OY618720.1|  Severe acute

In [9]:
seq = blast_result[0]
print(f"Sequence ID: {seq.id}")
print(f"Sequence Description: {seq.description}")

detials = seq[0]
print(f"E-value: {detials.evalue}")

Sequence ID: gi|2549881539|emb|OY359284.1|
Sequence Description: Severe acute respiratory syndrome coronavirus 2 genome assembly, complete genome: monopartite
E-value: 0.0


In [None]:
print(f"alignment:\n{detials.aln}") #give the alignment of the sequence with the hit

alignment:
Alignment with 2 rows and 774 columns
ATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAA...GGT No
ATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAA...GGT gi|2549881539|emb|OY359284.1|


## 1.2. Protein BLAST


In [None]:
prot_record = SeqIO.read("prot_seq.fasta", format="fasta")
len(prot_record)

258

In [12]:
result_handle = NCBIWWW.qblast("blastp", "pdb", prot_record.seq)
blast_result = SearchIO.read(result_handle, "blast-xml")

In [None]:
print(blast_result[0:2])

In [None]:
Seq = blast_result [0]
print(f"Sequence ID: {Seq.id}")
print(f"Sequence Description: {Seq.description}")

details = Seq[0]
print(f"E-value: {details.evalue}")

In [None]:
print(f"alignment:\n {details.aln}")