--- Script fornecido pelo Grupo 7 e adaptado para o nosso trabalho ---

# *Klebsiella pneumoniae*
## BLAST individuais

# KPC-2

In [11]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW 

x = "KPC-2"
f = open(f"{x}.fasta")
seq = SeqIO.read(f, "fasta")

result_handle = NCBIWWW.qblast("blastn", "nt", seq.format("fasta"))
save_file = open(f"{x}_BLAST.xml", "w")
save_file.write(result_handle.read())
save_file.close()
result_handle.close()

## Análise Ficheiro xml

In [14]:
from Bio.Blast import NCBIXML
import json

result_handle = open(f"{x}_BLAST.xml")
blast_records = NCBIXML.parse(result_handle)

allhits = []
hits = []
ids = []
for blast_record in blast_records:
    for alignment in blast_record.alignments: 
        for hsp in alignment.hsps:
            dict_all = {}
            info_hit = []
            if alignment.hit_id.split("|")[3] not in ids:
                info_hit.extend((alignment.hit_def, alignment.hit_id.split("|")[3], alignment.length, hsp.expect, hsp.score, alignment.accession))
                dict_all["Info"] = alignment.hit_def
                dict_all["Hit"] = alignment.hit_id.split("|")[3]
                ids.append(alignment.hit_id.split("|")[3])
                dict_all["Length"] = alignment.length
                dict_all["E-value"] = hsp.expect
                dict_all["Score"] = hsp.score
                dict_all["Accession Number"] = alignment.accession
                allhits.append(dict_all)
                hits.append(info_hit)
            
print(f"Number of hits: {len(allhits)}")

print(json.dumps(allhits, indent = 3))

Number of hits: 50
[
   {
      "Info": "Pseudomonas aeruginosa strain HdC plasmid unnamed, complete sequence",
      "Hit": "OL780449.1",
      "Length": 42750,
      "E-value": 0.0,
      "Score": 1764.0,
      "Accession Number": "OL780449"
   },
   {
      "Info": "Klebsiella pneumoniae strain WCHKP115011 plasmid pKPC2_115011, complete sequence",
      "Hit": "CP089954.1",
      "Length": 157388,
      "E-value": 0.0,
      "Score": 1764.0,
      "Accession Number": "CP089954"
   },
   {
      "Info": "Klebsiella pneumoniae strain 37 plasmid pZZDX37-KPC, complete sequence",
      "Hit": "MT810376.1",
      "Length": 91216,
      "E-value": 0.0,
      "Score": 1764.0,
      "Accession Number": "MT810376"
   },
   {
      "Info": "Klebsiella pneumoniae strain 2014052905 plasmid p52905-KPC, complete sequence",
      "Hit": "MT810375.1",
      "Length": 177735,
      "E-value": 0.0,
      "Score": 1764.0,
      "Accession Number": "MT810375"
   },
   {
      "Info": "Klebsiella pneumon

## Obtenção do Dataframe para melhor análise do BLAST de KPC-2

In [9]:
import numpy as np
import pandas as pd
from tabulate import tabulate

info_firstline = ["Organism", "Identifier", "Length", "E-Value", "Score", "Accession Number"]

numpy_data = np.array(hits)
numpy_data_frame = numpy_data.reshape((len(numpy_data), 6))

info_genes = pd.DataFrame(data = numpy_data_frame, columns = info_firstline)
print(tabulate(info_genes, headers='keys', tablefmt='psql'))

+----+--------------------------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------+
|    | Organism                                                                                         | Identifier   |   Lenght |   E-Value |   Score | Accession Number   |
|----+--------------------------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------|
|  0 | Pseudomonas aeruginosa strain HdC plasmid unnamed, complete sequence                             | OL780449.1   |    42750 |         0 |    1764 | OL780449           |
|  1 | Klebsiella pneumoniae strain WCHKP115011 plasmid pKPC2_115011, complete sequence                 | CP089954.1   |   157388 |         0 |    1764 | CP089954           |
|  2 | Klebsiella pneumoniae strain 37 plasmid pZZDX37-KPC, complete sequence                           | MT810376.1   |    9

# CTX-M-14

In [10]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW 

x = "CTX-M-14"
f = open(f"{x}.fasta")
seq = SeqIO.read(f, "fasta")

result_handle = NCBIWWW.qblast("blastn", "nt", seq.format("fasta"))
save_file = open(f"{x}_BLAST.xml", "w")
save_file.write(result_handle.read())
save_file.close()
result_handle.close()

## Análise Ficheiro xml

In [11]:
from Bio.Blast import NCBIXML
import json

result_handle = open(f"{x}_BLAST.xml")
blast_records = NCBIXML.parse(result_handle)

allhits = []
hits = []
ids = []
for blast_record in blast_records:
    for alignment in blast_record.alignments: 
        for hsp in alignment.hsps:
            dict_all = {}
            info_hit = []
            if alignment.hit_id.split("|")[3] not in ids:
                info_hit.extend((alignment.hit_def, alignment.hit_id.split("|")[3], alignment.length, hsp.expect, hsp.score, alignment.accession))
                dict_all["Info"] = alignment.hit_def
                dict_all["Hit"] = alignment.hit_id.split("|")[3]
                ids.append(alignment.hit_id.split("|")[3])
                dict_all["Length"] = alignment.length
                dict_all["E-value"] = hsp.expect
                dict_all["Score"] = hsp.score
                dict_all["Accession Number"] = alignment.accession
                allhits.append(dict_all)
                hits.append(info_hit)
            
#print(f"Number of hits: {len(allhits)}")
#print(json.dumps(allhits, indent = 3))

## Obtenção do Dataframe para melhor análise do BLAST de CTX-M-14

In [12]:
import numpy as np
import pandas as pd
from tabulate import tabulate

info_firstline = ["Organism", "Identifier", "Length", "E-Value", "Score", "Accession Number"]

numpy_data = np.array(hits)
numpy_data_frame = numpy_data.reshape((len(numpy_data), 6))

info_genes = pd.DataFrame(data = numpy_data_frame, columns = info_firstline)
print(tabulate(info_genes, headers='keys', tablefmt='psql'))

+----+-------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------+
|    | Organism                                                                      | Identifier   |   Lenght |   E-Value |   Score | Accession Number   |
|----+-------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------|
|  0 | Klebsiella pneumoniae strain 2014052905 plasmid p52905-KPC, complete sequence | MT810375.1   |   177735 |         0 |    1704 | MT810375           |
|  1 | Klebsiella pneumoniae strain A1825 plasmid pA1825-KPC, complete sequence      | MT810360.1   |   261717 |         0 |    1704 | MT810360           |
|  2 | Proteus mirabilis strain PM1162 chromosome, complete genome                   | CP089317.1   |  4202509 |         0 |    1704 | CP089317           |
|  3 | Klebsiella pneumoniae KpWEA4-2 plasmid P2 DNA, complete s

# MdtC

In [4]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW 

x = "MdtC"
f = open(f"{x}.fasta")
seq = SeqIO.read(f, "fasta")

result_handle = NCBIWWW.qblast("blastn", "nt", seq.format("fasta"))
save_file = open(f"{x}_BLAST.xml", "w")
save_file.write(result_handle.read())
save_file.close()
result_handle.close()

## Análise Ficheiro xml

In [5]:
from Bio.Blast import NCBIXML
import json
x = "MdtC"
result_handle = open(f"{x}_BLAST.xml")
blast_records = NCBIXML.parse(result_handle)

allhits = []
hits = []
ids = []
for blast_record in blast_records:
    for alignment in blast_record.alignments: 
        for hsp in alignment.hsps:
            dict_all = {}
            info_hit = []
            if alignment.hit_id.split("|")[3] not in ids:
                info_hit.extend((alignment.hit_def, alignment.hit_id.split("|")[3], alignment.length, hsp.expect, hsp.score, alignment.accession))
                dict_all["Info"] = alignment.hit_def
                dict_all["Hit"] = alignment.hit_id.split("|")[3]
                ids.append(alignment.hit_id.split("|")[3])
                dict_all["Length"] = alignment.length
                dict_all["E-value"] = hsp.expect
                dict_all["Score"] = hsp.score
                dict_all["Accession Number"] = alignment.accession
                allhits.append(dict_all)
                hits.append(info_hit)
            
#print(f"Number of hits: {len(allhits)}")
#print(json.dumps(allhits, indent = 3))

## Obtenção do Dataframe para melhor análise do BLAST de MdtC

In [6]:
import numpy as np
import pandas as pd
from tabulate import tabulate

info_firstline = ["Organism", "Identifier", "Length", "E-Value", "Score", "Accession Number"]

numpy_data = np.array(hits)
numpy_data_frame = numpy_data.reshape((len(numpy_data), 6))

info_genes = pd.DataFrame(data = numpy_data_frame, columns = info_firstline)
print(tabulate(info_genes, headers='keys', tablefmt='psql'))

+----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------+
|    | Organism                                                                                                                                                                                              | Identifier   |   Length |   E-Value |   Score | Accession Number   |
|----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------+----------+-----------+---------+--------------------|
|  0 | Klebsiella pneumoniae strain F17KP0054 chromosome, complete genome                                                                                                   