In [9]:
from Bio import SeqIO
from Bio.SeqIO import parse 
from Bio.SeqRecord import SeqRecord 
from Bio.Seq import Seq 


#Open the fasta file and store the header and sequence
with open("SARS_CoV-2_UP000464024_Proteome.fasta") as handle:
    for record in SeqIO.parse(handle, "fasta"):
        print("Id: %s" % record.id) 
        print("Name: %s" % record.name) 
        print("Description: %s" % record.description) 
        print("Sequence Data: %s" % record.seq)

Id: sp|P0DTC2|SPIKE_SARS2
Name: sp|P0DTC2|SPIKE_SARS2
Description: sp|P0DTC2|SPIKE_SARS2 Spike glycoprotein OS=Severe acute respiratory syndrome coronavirus 2 OX=2697049 GN=S PE=1 SV=1
Sequence Data: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGF

In [5]:
#blast using the gi accession
from Bio.Blast import NCBIWWW
result_handle = NCBIWWW.qblast("blastp", "nr", "1796318600") #SARS-CoV-2 envelope

#result_handle2 = NCBIWWW.qblast("blastp", "p", "1798174255") SARS-CoV-2 nucleocapsid
#result_handle2 = NCBIWWW.qblast("blastp", "p", "1796318601") SARS-CoV-2 membrane glycoprotein

with open("my_blast.xml", "w") as out_handle:
    out_handle.write(result_handle.read())

    
    
    
    

In [6]:
from Bio.Blast import NCBIXML
result=open("my_blast.xml","r")
records= NCBIXML.parse(result)
item=next(records)
for alignment in item.alignments:
          for hsp in alignment.hsps:
                 if hsp.expect <0.01:
                         print('****Alignment****')
                         print('sequence:', alignment.title) 
                         print('length:', alignment.length)
                         print('score:', hsp.score)
                         print('gaps:', hsp.gaps)
                         print('e value:', hsp.expect)
                         print(hsp.query[0:90] + '...')
                         print(hsp.match[0:90] + '...')
                         print(hsp.sbjct[0:90] + '...')

****Alignment****
sequence: gb|QPK75943.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QQP46919.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QSW51860.1| envelope protein [Severe acute respiratory syndrome coronavirus 2]
length: 75
score: 363.0
gaps: 0
e value: 3.43657e-43
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV...
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVS+VKPSFYVYSRVKNLNSSRVPDLLV...
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSJVKPSFYVYSRVKNLNSSRVPDLLV...
****Alignment****
sequence: gi|1796318600|ref|YP_009724392.1| envelope protein [Severe acute respiratory syndrome coronavirus 2]
length: 75
score: 362.0
gaps: 0
e value: 3.67065e-43
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV...
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV...
MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV...
****Alignment**

In [7]:
E_VALUE_THRESH = 1e-20 
for record in NCBIXML.parse(open("my_blast.xml")): 
    if record.alignments: 
        print("\n") 
        print("query: %s" % record.query[:100]) 
        for align in record.alignments:
            for hsp in align.hsps: 
                if hsp.expect < E_VALUE_THRESH: 
                    print("match: %s " % align.title[:100])



query: envelope protein [Severe acute respiratory syndrome coronavirus 2]
match: gb|QPK75943.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QQP46919.1| en 
match: gi|1796318600|ref|YP_009724392.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] 
match: gb|QUE12501.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QUJ12162.1| en 
match: gb|QRG50750.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QRI47604.1| en 
match: gb|QQZ31248.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QRF77144.1| en 
match: gb|QRK19029.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QRO03437.1| en 
match: gb|QQX20997.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QRX26088.1| en 
match: gb|QLH58110.1| envelope protein [Severe acute respiratory syndrome coronavirus 2] >gb|QMU93458.1| en 
match: gb|QSG29993.1| envelope protein [Severe acute