# BLAST
Experimenting with BLAST in python. 

[biopython documentation](https://biopython.readthedocs.io/en/latest/install/Installation.html#)

In [1]:
from Bio.Blast.NCBIWWW import qblast
from Bio.Blast import NCBIXML

# Experimenting with a single P450 sequence

In [2]:
query = "MTIKEMPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEACDEKRFDKNLGKGLQKVREFGGDGLATSWTHEPNWQKAHRILLPSFSQKAMKGYHSMMLDIAVQLIQKWARLNPNEAVDVPGDMTRLTLDTIGLCGFNYRFNSYYRETPHPFINSMVRALDEAMHQMQRLDVQDKLMVRTKRQFRYDIQTMFSLVDSIIAERRANGDQDEKDLLARMLNVEDPETGEKLDDENIRFQIITFLIAGHETTSGLLSFATYFLLKHPDKLKKAYEEVDRVLTDAAPTYKQVLELTYIRMILNESLRLWPTAPAFSLYAKEDTVLGGEYPLEKGDELMVLIPQLHRDKTIWGDDVEEFRPERFENPSAIPQHAFKPFGNGQRACIGQQFALHEATLVLGMILKYFTLIDHENYELDIKQTLTLKPGDFHISVQSRHQEAIHADVQAAE"

In [3]:
out = qblast(program = "blastp", database = "nr", sequence = query)

In [4]:
blast_record = NCBIXML.read(out)

### Viewing a Single High Scoring Pair (HSP) Alignment for the query sequence

In [20]:
E_VALUE_THRESH = 0.04
for alignment in blast_record.alignments:
    print("***** new alignment of length", alignment.length, "*****")
    for hsp in alignment.hsps:
        if hsp.expect < E_VALUE_THRESH:
            print('hsp e value:', hsp.expect)
            print(hsp.query[0:50] + '...')
            print(hsp.match[0:50] + '...')
            print(hsp.sbjct[0:50] + '...')

***** new alignment of length 1061 *****
hsp e value: 0.0
MPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEACDEKRFDKNLGKGLQKVR...
+PQPKTFG L NLPL++ DKP  +L+K+A+E G IF+   P   T  +S   L+KE CDE+RFDK++   L+KVR...
IPQPKTFGPLGNLPLIDKDKPTLSLIKLAEEQGPIFQIHTPAGTTIVVSGHELVKEVCDEERFDKSIEGALEKVR...
***** new alignment of length 1061 *****
hsp e value: 0.0
MPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEACDEKRFDKNLGKGLQKVR...
+PQPKTFG L NLPL++ DKP  +L+K+A+E G IF+   P   T  +S   L+KE CDE+RFDK++   L+KVR...
IPQPKTFGPLGNLPLIDKDKPTLSLIKLAEEQGPIFQIHTPAGTTIVVSGHELVKEVCDEERFDKSIEGALEKVR...
***** new alignment of length 1061 *****
hsp e value: 0.0
MPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEACDEKRFDKNLGKGLQKVR...
+PQPKTFG L NLPL++ DKP  +L+K+A+E G IF+   P   T  +S   L+KE CDE+RFDK++   L+KVR...
IPQPKTFGPLGNLPLIDKDKPTLSLIKLAEEQGPIFQIHTPAGTTIVVSGHELVKEVCDEERFDKSIEGALEKVR...
***** new alignment of length 1061 *****
hsp e value: 0.0
MPQPKTFGELKNLPLLNTDKPVQALMKIADELGEIFKFEAPGRVTRYLSSQRLIKEA