# [DataFrames](#data)
[CARD](https://card.mcmaster.ca/)<br/>
[ARG-Annot]()<br/>
[Resfinder](https://cge.cbs.dtu.dk/services/ResFinder/)<br/>
[MegaRes](https://megares.meglab.org/)<br/>
[NCBI](https://www.ncbi.nlm.nih.gov/bioproject/PRJNA313047)<br/>

---

# Código <a class="anchor" id="code"></a>

In [71]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np 
import customfunc
import matrix
#from venn import venn

# ARG-ANNOT

In [72]:
#!abricate ~/documentos/sentinel/genomasbrasil/*.gbff --db argannot --noheader --threads 12 --quiet --minid 50 > ~/documentos/github/ARG-Sentinel/spreadsheets/argannot_br.csv

In [73]:
catalogo_bacterias_br = pd.read_csv("/home/tiago/documents/github/ARG-Sentinel/spreadsheets/catalogo_teste.csv")
catalogo_bacterias_br.head(1)

Unnamed: 0,accession,colection_date,host,source,coord,country,organism,strain,plasmid
0,GCF_002113285.1_ASM211328v1_genomic.gbff,May-2014,Na,bulk soil from sugarcane field,22.7 S 47.6 W,Brazil,Pseudomonas sp. B11(2017),B11(2017),Na


**Lidando com valores ausentes ou errados**

In [74]:
catalogo_bacterias_br['country'].value_counts()

Brazil                                      209
Brazil: Hospital das Clinicas, Sao Paulo    131
USA                                          67
Brazil: Sao Paulo                            42
Na                                           31
                                           ... 
Brazil: Seropedica, Rio de Janeiro            1
Brazil: Lavras                                1
Brazil:South                                  1
Brazil: Tucurui Power Plant Reservoir         1
Brazil: Arraial do Cabo                       1
Name: country, Length: 98, dtype: int64

In [75]:
catalogo_bacterias_br = catalogo_bacterias_br[catalogo_bacterias_br["country"].str.contains("Brazil")]

In [76]:
catalogo_bacterias_br["source"].str.upper().value_counts().drop('NA')

BLOOD                                      72
URINE                                      52
BRAIN                                      43
BULK SOIL FROM SUGARCANE FIELD             40
RHIZOSPHERE SOIL FROM SUGARE CANE FIELD    36
                                           ..
NASAL FOSSA                                 1
PLANT (MAIZE/SORGHUM/RICE)                  1
FRESHWATER                                  1
MANGROVE SWAMP                              1
VACCINE STRAIN                              1
Name: source, Length: 134, dtype: int64

In [77]:
argannot_database = pd.read_csv("/home/tiago/documents/github/ARG-Sentinel/spreadsheets/argannot_br.csv",sep = "\t", names = ['file','sequence','start','end','strand','gene','coverage','coverage_map','gaps','coverage_pec','identity_perc','database','accession','product','resistance'])
argannot_database['file'] = argannot_database["file"].apply(lambda caminho: caminho.split("/")[-1])
argannot_database['tag'] = "ARG-Annot"
argannot_database = customfunc.filldf(argannot_database, catalogo_bacterias_br)
argannot_database.reset_index(inplace = True)

---

# Perguntas sobre diferenças nos dataframes

In [78]:
import glob
total_arquivos_gbff = glob.glob("/home/tiago/documents/genomasbrasil/*.gbff")
print(f"Foram baixados: {len(total_arquivos_gbff)} genomas brasileiros do NCBI")
print(f"{len(catalogo_bacterias_br)} contem informações sobre sua coleta no Brasil")

Foram baixados: 701 genomas brasileiros do NCBI
567 contem informações sobre sua coleta no Brasil


---

## Filogenias

In [79]:
from Bio import Phylo, SeqIO, AlignIO
import os
import shutil
import glob

In [80]:
def acessar_coordenadas(dataframe):
    dataframe_com_coordenadas = dataframe.query("colection_date != 'Na' ").query("coordenates != 'Na'").query("country != 'Na'")
    dataframe_com_coordenadas = dataframe_com_coordenadas.set_index("country").filter(like = "Brazil", axis = 0)
    dataframe_com_coordenadas.reset_index(inplace = True)
    customfunc.SplitCoords(dataframe_com_coordenadas, "lat")
    customfunc.SplitCoords(dataframe_com_coordenadas, "lon")
    dataframe_com_coordenadas["lat"] = dataframe_com_coordenadas.lat.apply(lambda x: customfunc.NumCoord(x))
    dataframe_com_coordenadas["lon"] = dataframe_com_coordenadas.lon.apply(lambda x: customfunc.NumCoord(x))
    return dataframe_com_coordenadas

In [81]:
argannot_com_coordenadas = acessar_coordenadas(argannot_database)

**_K. pneumoniae_**

In [82]:
klebsiellas_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("Klebsiella")].file)

In [83]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "klebsiella_br", genomas = klebsiellas_do_argannot)

In [84]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/documents/github/ARG-Sentinel/klebsiella_br")

---

**_Escherichia coli_**

In [85]:
escherichia_coli_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("Escherichia")].file)

In [86]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "ecoli_br", genomas = escherichia_coli_do_argannot)

In [87]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/documents/github/ARG-Sentinel/ecoli_br")

---

**_P. aeruginosa_**

In [88]:
pseudomonas_aeruginosa_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("aeruginosa")].file)

In [89]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "peruginosa_br", genomas = pseudomonas_aeruginosa_do_argannot)

In [90]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/document/github/ARG-Sentinel/peruginosa_br")

---

**_S. aureus_**

In [91]:
esph_aureus_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("aureus")].file)

In [92]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "eaureus_br", genomas = esph_aureus_do_argannot)

In [93]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/documents/github/ARG-Sentinel/eaureus_br")

---

**_Acibetobacter spp_**

In [94]:
acinetobacter_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("Acinetobacter")].file)

In [95]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "acinetobacter_br", genomas = acinetobacter_do_argannot)

In [96]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/documents/github/ARG-Sentinel/acinetobacter_br")

---

**_Enterocbacter spp_**

In [97]:
enterobacter_do_argannot = set(argannot_com_coordenadas[argannot_com_coordenadas['organism'].str.contains("Enterobacter")].file)

In [98]:
customfunc.transferir_genomas(origem = "/mnt/c/Users/tiago/Google Drive/lab/fapesp/Mestrado/projetospython/abricate/genomasbrasil/", nome_diretorio = "enterobacter_br", genomas = enterobacter_do_argannot)

In [99]:
customfunc.converter_gbff_fasta(diretorio = "/home/tiago/documents/github/ARG-Sentinel/enterobacter_br")

In [100]:
#!parsnp -c -r ! -d /home/tiago/documentos/github/ARG-Sentinel/enterobacter_br -o /home/tiago/documentos/github/ARG-Sentinel/enterobacter_br

---

## **Figura 3**. Mapas <a class="anchor" id="Figure3"></a>

In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [None]:
#Cria Objeto Figura
fig = plt.figure(figsize = (20,10))

ax1 = fig.add_subplot(121, projection = ccrs.PlateCarree())
ax1.stock_img()
ax1.set_extent([-80, -32, -35, 6], crs=ccrs.PlateCarree())
#Add features ao mapa
ax1.add_feature(cfeature.LAND)
ax1.add_feature(cfeature.STATES.with_scale('10m'),linestyle='--')
ax1.add_feature(cfeature.OCEAN)
ax1.add_feature(cfeature.COASTLINE)
ax1.add_feature(cfeature.BORDERS, linestyle='-', linewidth = 2)
ax1.add_feature(cfeature.LAKES, alpha=0.5)
ax1.gridlines(draw_labels=True)

#Adicionar os genes
plt.plot(argannot_com_coordenadas.set_index("file").lon,
         argannot_com_coordenadas.set_index("file").lat,
         linestyle = '',
         marker = 'o', 
         color = '#0000FF',
         transform = ccrs.Geodetic())

fig.add_subplot(122)
Phylo.draw(tree)




---

## **Figura 4**. Grafos <a class="anchor" id="Figure4"></a>

In [None]:
argannot_adj_matrix = argannot_database.set_index("file")['gene'].str.get_dummies().groupby(level = 0).sum()
argannot_adj_matrix.sort_index(inplace = True)
argannot_adj_matrix.head(2)

In [None]:
argannot_adj_matrix_new_index = argannot_database[["file","organism"]].sort_values(by = 'file').drop_duplicates().organism.values

In [None]:
argannot_adj_matrix.set_index(argannot_adj_matrix_new_index, inplace = True)
argannot_adj_matrix.head()

In [None]:
import networkx as nx

In [None]:
ArgannotGraph = nx.MultiGraph()

---

## **Figura 5** <a class="anchor id=Figure5"></a>

---

# Flash talks 18/09/2020

<p style='text-align: justify;'> 
    Como pesquisar a movimentação dos ARGs pelo território nacional?<br />
    Preciso unir das informações: ano de coleta da amostra, coordenada e linhagens onde esses ARGs aparecem. <br />
    Figura resultante: um mapa do Brasil com grafo sobreposto.
</p>

In [None]:
klebsiella_pneumoniae_classes = args_com_coordenadas.set_index("organism").filter(like = "Klebsiella", axis = 0)[['file','gene']].set_index("file")
klebsiella_pneumoniae_classes_dummies = pd.get_dummies(klebsiella_pneumoniae_classes).groupby(level = 0).sum()
klebsiella_pneumoniae_classes_dummies

In [None]:
sns.heatmap(klebsiella_pneumoniae_classes_dummies.T, cmap = "RdBu_r")
#plt.savefig("/home/tiago/documentos/github/ARG-Sentinel/Klebsiella_br/figuras/heatmap_kpneu_br_class.png")

In [None]:
klebsiella_pneumoniae_bla = args_com_coordenadas.set_index("organism").filter(like = "Klebsiella", axis = 0)[['file','gene']].set_index("file").sort_values('gene')
klebsiella_pneumoniae_bla_dummies = pd.get_dummies(klebsiella_pneumoniae_bla).groupby(level = 0).sum()
sns.clustermap(klebsiella_pneumoniae_bla_dummies.filter(like = '(Bla)', axis = 1), cmap = "RdBu_r", col_cluster=False)
#plt.savefig("/home/tiago/documentos/github/ARG-Sentinel/Klebsiella_br/figuras/clustermap_kpneu_br.png",dpi = 300)

In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [None]:
#Cria Objeto Figura
fig = plt.figure(figsize = (20,20))
#add plot ao obj figura
ax1 = fig.add_subplot(121, projection=ccrs.PlateCarree())
ax1.stock_img()
ax1.set_extent([-80, -32, -35, 6], crs=ccrs.PlateCarree())
#Add features ao mapa
ax1.add_feature(cfeature.LAND)
ax1.add_feature(cfeature.STATES.with_scale('10m'),linestyle='--')
ax1.add_feature(cfeature.OCEAN)
ax1.add_feature(cfeature.COASTLINE)
ax1.add_feature(cfeature.BORDERS, linestyle='-', linewidth = 2)
ax1.add_feature(cfeature.LAKES, alpha=0.5)
ax1.gridlines(draw_labels=True)

#Adicionar os genes
plt.plot(args_com_coordenadas.set_index("file").lon,
         args_com_coordenadas.set_index("file").lat,
         linestyle = '',
         marker = 'o', 
         color = 'r',
         transform = ccrs.Geodetic())


#plt.savefig("/home/tiago/documentos/github/ARG-Sentinel/Klebsiella_br/figuras//map_Kpneumoniae_br.png",dpi = 400)

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

In [None]:
world.plot()

In [None]:
import pandas