In [1]:
# Imports
import os

import bokeh
import bokeh.plotting
import bokeh.palettes
import numpy as np
import pandas as pd

bokeh.io.output_notebook()

In [2]:
data_path = "../data/clean/"
# Import necessary file
relative_known_function_file = os.path.join(data_path, "relative_known_function_with_metadata.csv")
OG_relative_df = pd.read_csv(relative_known_function_file)
OG_relative_df = OG_relative_df.drop('label', axis=1)
OG_relative_df = OG_relative_df[OG_relative_df['Environmental Feature'] != 'DCM/OMZ']

In [3]:
features = OG_relative_df['Environmental Feature'].unique()
features.sort()

In [4]:
df1 = OG_relative_df.iloc[:, 0:21]
df2 = OG_relative_df['Temperature (°C)']
df3 = OG_relative_df['Environmental Feature']
OG_relative_df = pd.concat([df1, df2, df3], axis=1)

In [5]:
OG_relative_df = OG_relative_df.sort_values(by=['Temperature (°C)'])
OG_relative_df['Temperature (°C)'] = OG_relative_df['Temperature (°C)'].astype(str)

In [6]:
unique_x2 = []
factor_count = {}

x1 = OG_relative_df['Environmental Feature'].values
x2 = OG_relative_df['Temperature (°C)'].values

for factor in x2:
    if factor in factor_count:
        factor_count[factor] += 1
        unique_x2.append(' '*factor_count[factor]+factor)
    else:
        factor_count[factor] = 0
        unique_x2.append(factor)
        
x = list(zip(x1, unique_x2))
OG_relative_df['x'] = x

In [7]:
# Define custom sorting order
features = ['SRF', 'DCM', 'MIX', 'MES/OMZ', 'MES']
# Define custom sorting function
def custom_sort(item):
    return features.index(item[0])
lst = list(OG_relative_df["x"])
# Sort the list based on the custom sorting function
lst = sorted(lst, key=custom_sort)

In [27]:
# Get temperatures to use as x-axis of plot
categories = OG_relative_df.columns.tolist()[:21]
color = bokeh.palettes.turbo(len(categories))
p = bokeh.plotting.figure(
    frame_height=450,
    frame_width=1700,
    x_axis_label='Temperature (°C)',
    y_axis_label='Relative Abundance',
    x_range=bokeh.models.FactorRange(*lst)
)
p.add_layout(bokeh.models.Legend(), 'right')
p.vbar_stack(categories, x="x", source=OG_relative_df, width=.9, color=color, legend_label=categories)
p.xaxis.major_label_orientation = np.pi/2
p.y_range.range_padding = 0.0
p.y_range.flipped = True 
bokeh.io.show(p)

In [28]:
# Set path to export plot
data_path = "../plots/"
p.output_backend = "svg"
plot_file = os.path.join(data_path, "function.svg")
bokeh.io.export_svg(p, filename=plot_file)

['../plots/function.svg']

In [10]:
# Set path to collect data files
data_path = "../data/clean/"

# Import necessary file
taxonomy_file = os.path.join(data_path, "taxonomic_profiles.csv")
relative_phyla_file = os.path.join(data_path, "relative_phyla_with_metadata.csv")
phyla_relative_df = pd.read_csv(relative_phyla_file)
phyla_relative_df = phyla_relative_df.drop('label', axis=1)
phyla_relative_df = phyla_relative_df[phyla_relative_df['Environmental Feature'] != 'DCM/OMZ']

In [11]:
columns_to_drop = phyla_relative_df.iloc[:, :55].columns[(phyla_relative_df.iloc[:, :55] < 0.01).all()]
phyla_relative_df = phyla_relative_df.drop(columns=columns_to_drop)

In [12]:
df1 = phyla_relative_df.iloc[:, :12]
row_sums = df1.sum(axis=1)
df1['Other'] = 1 - row_sums
df2 = phyla_relative_df['Temperature (°C)']
df3 = phyla_relative_df['Environmental Feature']
phyla_relative_df = pd.concat([df1, df2, df3], axis=1)

In [13]:
phyla_relative_df = phyla_relative_df.sort_values(by=['Temperature (°C)'])
phyla_relative_df['Temperature (°C)'] = phyla_relative_df['Temperature (°C)'].astype(str)

In [14]:
unique_x2 = []
factor_count = {}

x1 = phyla_relative_df['Environmental Feature'].values
x2 = phyla_relative_df['Temperature (°C)'].values

for factor in x2:
    if factor in factor_count:
        factor_count[factor] += 1
        unique_x2.append(' '*factor_count[factor]+factor)
    else:
        factor_count[factor] = 0
        unique_x2.append(factor)
        
x = list(zip(x1, unique_x2))
phyla_relative_df['x'] = x

In [15]:
order = {'SRF': 0, 'DCM': 1, 'MIX': 2, 'MES/OMZ': 3, 'MES': 4}

In [16]:
# Define custom sorting order
features = ['SRF', 'DCM', 'MIX', 'MES/OMZ', 'MES']
# Define custom sorting function
def custom_sort(item):
    return features.index(item[0])
lst = list(phyla_relative_df["x"])
# Sort the list based on the custom sorting function
lst = sorted(lst, key=custom_sort)

In [25]:
# Get temperatures to use as x-axis of plot
categories = phyla_relative_df.columns.tolist()[:13]
color = bokeh.palettes.turbo(len(categories))
p = bokeh.plotting.figure(
    frame_height=450,
    frame_width=1700,
    x_axis_label='Temperature (°C)',
    y_axis_label='Relative Abundance',
    x_range=bokeh.models.FactorRange(*lst)
)
p.add_layout(bokeh.models.Legend(), 'right')
p.vbar_stack(categories, x="x", source=phyla_relative_df, width=.9, color=color, legend_label=categories)
p.xaxis.visible = False
p.y_range.range_padding = 0.0
p.y_range.flipped = True 
bokeh.io.show(p)

In [26]:
# Set path to export plot
data_path = "../plots/"
p.output_backend = "svg"
plot_file = os.path.join(data_path, "phyla.svg")
bokeh.io.export_svg(p, filename=plot_file)

['../plots/phyla.svg']