In [3]:
%load_ext autoreload
%autoreload 2

In [27]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
import subprocess as sp
import sys
sys.path.insert(0, '..')

from src import circlize_etl

In [7]:
sixteenS_tax = pd.read_csv('../data/input/16S_AUV/taxonomy.csv')
sixteenS = pd.read_csv('../data/input/16S_AUV/NO-plastid-dada2-table.csv')
sixteenS_tax = sixteenS.merge(sixteenS_tax, on='OTUID')
sixteenS_tax['Taxonomy'] = sixteenS_tax['Domain'].str.cat(
    sixteenS_tax[['Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']].astype(str), sep=";").str.replace(
    '(;nan)*', '', regex=True
)
sixteenS_tax = sixteenS_tax[['OTUID', 'Taxonomy']]
sixteenS_tax.head(5)

Unnamed: 0,OTUID,Taxonomy
0,9ca38c3d39e04cb5b8df99612854db5a,Bacteria;Bacteroidota;Bacteroidia;Flavobacteri...
1,5a64f7f0dca0044d5382daaaa1c07302,Bacteria;Proteobacteria;Alphaproteobacteria;Rh...
2,b96ac4d2af2d09c41326188c8529716b,Bacteria;Proteobacteria;Gammaproteobacteria;Al...
3,0db63e23987f0ca4d689e7ae0f114ab2,Bacteria;Proteobacteria;Alphaproteobacteria
4,dc8bc8d09bf3fcb0f7010677c5c7022e,Bacteria;Proteobacteria;Alphaproteobacteria;Rh...


In [8]:
eighteenS_tax = pd.read_csv('../data/input/18SV9_AUV/taxonomy_AUV18SV9.csv')
eighteenS = pd.read_csv('../data/input/18SV9_AUV/dada2-table.csv')
eighteenS_tax = eighteenS.merge(eighteenS_tax, on='OTUID')[['OTUID', 'Domain', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']]
eighteenS_tax['Taxonomy'] = eighteenS_tax['Domain'].str.cat(
    eighteenS_tax[['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']].astype(str), sep=";").str.replace(
    '(;nan)*', '', regex=True
)
eighteenS_tax = eighteenS_tax[['OTUID', 'Taxonomy']]
eighteenS_tax.head(5)

Unnamed: 0,OTUID,Taxonomy
0,b183e6fbfef6e343cb656326c468382c,Eukaryota;Stramenopiles;Ochrophyta;Bacillariop...
1,38b73295285fc2e17a8f6915533541cf,Eukaryota;Hacrobia;Haptophyta;Prymnesiophyceae...
2,f103458f35af71ff3b14aa6479ee8408,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae...
3,6d2848220b25d52137b36be6a85c69d9,Eukaryota;Stramenopiles;Ochrophyta;Bacillariop...
4,43fc0f0172e7aaac61d17259daa5beb4,Eukaryota;Opisthokonta;Metazoa;Arthropoda;Crus...


## Network with 18S v9


In [11]:
gph = nx.read_gml('../data/graph/AUV_network_output_v9.gml')
len(gph.nodes)

1718

### Pseudo-nitzschia

In [17]:
edgelist = circlize_etl.get_edgelist(sixteenS_tax, eighteenS_tax, '../data/graph/AUV_network_output_v9.gml')
edgelist.to_csv('../data/edgelist/AUV_network_output_v9_edgelist.csv', index=False)

In [18]:
edgelist[edgelist['node1Taxonomy'].str.contains('Pseudo-nitzschia') | 
         edgelist['node2Taxonomy'].str.contains('Pseudo-nitzschia')]\
        .to_csv('../data/edgelist/AUV_network_v9_edgelist_pseudo-nitzschia.csv', index=False)

In [21]:
auv_v9_focus_edges_df = circlize_etl.circlize_etl(sixteenS_tax, eighteenS_tax,
                                                  '../data/graph/AUV_network_output_v9.gml',
                                                  'Pseudo-nitzschia', 20)

In [22]:
auv_v9_focus_edges_df.to_csv('../data/circlize/AUV_network_v9_circlize_edgelist_pseudo-nitzschia.csv', index=False)

### Bacillariophyta

In [23]:
auv_v9_bacillariophyta_df = circlize_etl.circlize_etl(sixteenS_tax, eighteenS_tax,
                                                      '../data/graph/AUV_network_output_v9.gml',
                                                      'Bacillariophyta', 20)

In [25]:
edgelist[edgelist['node1Taxonomy'].str.contains('Pseudo-nitzschia') | 
         edgelist['node2Taxonomy'].str.contains('Pseudo-nitzschia')]\
        .to_csv('../data/edgelist/AUV_network_v9_edgelist_bacillariophyta.csv', index=False)

In [26]:
auv_v9_bacillariophyta_df.to_csv('../data/circlize/AUV_network_v9_circlize_edgelist_bacillariophyta.csv', index=False)

## Run circlize in R

#### Pseudo-nitzschia

In [31]:
sp.run(['Rscript', 'src/circlize_plot.R', 'data/circlize/AUV_network_v9_circlize_edgelist_pseudo-nitzschia.csv',
        'plots/circos/AUV_network_v9_pseudo-nitzschia_circos_positive.png',
        'plots/circos/AUV_network_v9_pseudo-nitzschia_circos_negative.png'],
       cwd='..')

circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))



[1] "Command line arguments: [Input Edgelist .csv] [Output Circos Positive .png] [Output Circos Negative .png]"
[1] "data/circlize/AUV_network_v9_circlize_edgelist_pseudo-nitzschia.csv plots/circos/AUV_network_v9_pseudo-nitzschia_circos_positive.png plots/circos/AUV_network_v9_pseudo-nitzschia_circos_negative.png"
[1] "Building circos plot with  9  classes"
null device 
          1 
[1] "Building circos plot with  9  classes"
null device 
          1 


CompletedProcess(args=['Rscript', 'src/circlize_plot.R', 'data/circlize/AUV_network_v9_circlize_edgelist_pseudo-nitzschia.csv', 'plots/circos/AUV_network_v9_pseudo-nitzschia_circos_positive.png', 'plots/circos/AUV_network_v9_pseudo-nitzschia_circos_negative.png'], returncode=0)

#### Bacillariophyta

In [35]:
sp.run(['Rscript', 'src/circlize_plot.R', 'data/circlize/AUV_network_v9_circlize_edgelist_bacillariophyta.csv',
        'plots/circos/AUV_network_v9_bacillariophyta_circos_positive.png',
        'plots/circos/AUV_network_v9_bacillariophyta_circos_negative.png'],
       cwd='..')

circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))



[1] "Command line arguments: [Input Edgelist .csv] [Output Circos Positive .png] [Output Circos Negative .png]"
[1] "data/circlize/AUV_network_v9_circlize_edgelist_bacillariophyta.csv plots/circos/AUV_network_v9_bacillariophyta_circos_positive.png plots/circos/AUV_network_v9_bacillariophyta_circos_negative.png"
[1] "Building circos plot with  20  classes"
null device 
          1 
[1] "Building circos plot with  20  classes"
null device 
          1 


CompletedProcess(args=['Rscript', 'src/circlize_plot.R', 'data/circlize/AUV_network_v9_circlize_edgelist_bacillariophyta.csv', 'plots/circos/AUV_network_v9_bacillariophyta_circos_positive.png', 'plots/circos/AUV_network_v9_bacillariophyta_circos_negative.png'], returncode=0)