In [None]:
import os

import bokeh
import bokeh.plotting
import numpy as np
import pandas as pd

bokeh.io.output_notebook()

In [None]:
data_path = "../data/clean/"

taxonomy_file = os.path.join(data_path, "taxonomic_profiles.csv")
metadata_file = os.path.join(data_path, "companion_table_W1.csv")
nutrient_file = os.path.join(data_path, "nutrient_temp_table.csv")

In [None]:
taxa_df = pd.read_csv(taxonomy_file, header=[1])
meta_df = pd.read_csv(metadata_file)
cond_df = pd.read_csv(nutrient_file)

In [None]:
total_sums = taxa_df.iloc[:,7:].sum()

In [None]:
cyanobacteria_sums = taxa_df.loc[taxa_df["Phylum"] == "Cyanobacteria"].iloc[:,7:].sum()

In [None]:
cyano_df = pd.DataFrame(100*cyanobacteria_sums/total_sums, columns=["percent_cyano"])
cyano_df["label"] = cyano_df.index
cyano_df

In [None]:
id_df = meta_df.iloc[:,[0, 4, 15]]
id_df = id_df.rename(columns={"Sample label [TARA_station#_environmental-feature_size-fraction]": "label", 
                              "PANGAEA sample identifier": "pangea_id",
                              "Ocean and sea regions (IHO General Sea Areas 1953) [MRGID registered at www.marineregions.com]":"loc"})
id_df

In [None]:
temp_df = cond_df.iloc[:,[0, 5]]
temp_df = temp_df.rename(columns={"PANGAEA Sample ID": "pangea_id", 
                              "Mean_Temperature [deg C]*": "temp_C"})
temp_df

In [None]:
cyano_temp_df = id_df.merge(temp_df, how="left", on="pangea_id").merge(cyano_df, how="left", on="label")

In [None]:
cyano_temp_df

In [None]:
color_dict = {}
colors = bokeh.palettes.Dark2[8]
for idx, loc in enumerate(np.unique(locs)):
    color_dict[loc] = colors[idx]

In [None]:
color_dict

In [None]:
p = bokeh.plotting.figure(width=900, height=500)

temps = np.array(cyano_temp_df["temp_C"])[:242]
cyano = np.nan_to_num(np.array(cyano_temp_df["percent_cyano"])[:242])
locs = np.array(cyano_temp_df["loc"])[:242]


for idx in range(len(temps)):
    p.circle(temps[idx], cyano[idx], legend_label = locs[idx], color=color_dict[locs[idx]], size=5)

p.add_layout(p.legend[0], 'right')
p.xaxis.axis_label = "temperature (C)"
p.yaxis.axis_label = "% cyanobacteria"
p.title = "% cyanobactera vs. temperature for various ocean sites"
p.legend.click_policy="hide"

bokeh.io.show(p)