In [3]:
# Set path to collect data files
data_path = "../data/clean/"

# Import necessary file
taxonomy_file = os.path.join(data_path, "taxonomic_profiles.csv")
relative_taxonomy_file = os.path.join(data_path, "relative_taxonomies_with_metadata.csv")
taxa_df = pd.read_csv(taxonomy_file, header=[1])
taxa_relative_df = pd.read_csv(relative_taxonomy_file)
taxa_relative_df = taxa_relative_df.drop('label', axis=1)
meta_df = taxa_relative_df.iloc[:, 35651:]
meta_df = meta_df[meta_df['Environmental Feature'] != 'DCM/OMZ']

In [25]:
phyla_score_file = os.path.join(data_path, "phyla_model_scores.csv")
function_score_file = os.path.join(data_path, "function_model_scores.csv")
OG_score_file = os.path.join(data_path, "OG_model_scores.csv")
taxa_score_file = os.path.join(data_path, "taxa_model_scores.csv")
phyla_score_df = pd.read_csv(phyla_score_file)
function_score_df = pd.read_csv(function_score_file)
OG_score_df = pd.read_csv(OG_score_file)
taxa_score_df = pd.read_csv(taxa_score_file)
score_df = pd.concat([phyla_score_df, function_score_df, OG_score_df, taxa_score_df], ignore_index=True)

In [67]:
models = ['Taxa', 'OG', 'Phyla', 'Function'][::-1]
variables = ['Latitude', 'Longitude', 'Depth (m)', 'Temperature (°C)', 'Salinity (PSU)',
       'Oxygen (µmol/kg)', 'Nitrates (µmol/L)', 'NO2 (µmol/L)',
       'PO4 (µmol/L)', 'Environmental Feature', 'Biome', 'Region']
scale = 50
mean_score_df = score_df.groupby(['Model', 'Data']).mean().reset_index()
mean_score_df['Score'] = mean_score_df['Score'].where(mean_score_df['Score'] >= 0, 0.1)
mean_score_df['Scale'] = mean_score_df['Score']**2*scale
mean_source = bokeh.models.ColumnDataSource(mean_score_df)
std_score_df = score_df.groupby(['Model', 'Data']).std().reset_index()
std_score_df = std_score_df.groupby(['Model', 'Data']).mean().reset_index()
std_score_df['Scale'] = mean_score_df['Scale'] + np.sqrt(std_score_df['Score'])*20
std_source = bokeh.models.ColumnDataSource(std_score_df)

p = bokeh.plotting.figure(frame_height=200, frame_width=800, x_range=variables, y_range=models)
color_mapper = bokeh.models.LinearColorMapper(palette=bokeh.palettes.Viridis256, low=mean_score_df['Score'].min(), high=mean_score_df['Score'].max())
color_bar = bokeh.models.ColorBar(color_mapper=color_mapper, location = (0, 0), ticker=bokeh.models.BasicTicker(), title='Score')
p.add_layout(color_bar, 'right')
p.scatter(x='Model', y='Data', size='Scale', color=bokeh.transform.transform('Score', color_mapper), source=std_source, alpha=0.5)
p.scatter(x='Model', y='Data', size='Scale', color=bokeh.transform.transform('Score', color_mapper), source=mean_source)
p.xaxis.major_label_orientation = np.pi/4
bokeh.io.show(p)

In [57]:
# Set path to export plot
data_path = "../plots/"
p.output_backend = "svg"
plot_file = os.path.join(data_path, "scores.svg")
bokeh.io.export_svg(p, filename=plot_file)

['../plots/scores.svg']