In [103]:
from pathlib import Path
from bokeh.plotting import output_notebook, show, figure, ColumnDataSource
from bokeh.models import HoverTool, glyphs, FactorRange
from bokeh.transform import jitter
from src.utils import get_data_dir
import pandas as pd
import numpy as np


# force bokeh plot output to jupyter notebook
output_notebook()

# convenient access to data directories
DIRS = get_data_dir(str(Path('data').resolve()))

# Class Harmonization

In [99]:
src = pd.read_csv(str(DIRS.ana / 'class_harmon.csv'))

# clean up
src.rename(columns=lambda x: x.upper() if x[:2] == 'jc' else x, inplace=True)
src.drop('smc0 smc10 smc20 smc30'.split(), axis=1, inplace=True)
src.drop(src.columns[0], axis=1, inplace=True)
src.dropna(axis=0, how='any', inplace=True)

src.head()

Unnamed: 0,tile,region,JC0,JC10,JC20,JC30
0,10N_114E,Asia,0.8391,0.8382,0.8353,0.8318
1,20N_084E,Asia,0.6087,0.5962,0.5611,0.5083
2,05N_048E,Africa,0.0293,0.0,0.0,0.0
3,05N_018E,Africa,0.6003,0.6006,0.6107,0.6266
4,20N_114E,Asia,0.5078,0.5024,0.496,0.4863


In [190]:
# Colorcode the classes and provide a legend
melted = src.melt(id_vars='tile region'.split(), var_name='jc_class', value_name='score')

source = ColumnDataSource({'x': list(zip(melted.region, melted.jc_class)),
                           'y': melted.score,
                           'id': melted.tile,})
hover = HoverTool(tooltips=[('Region/Class', '@x'),
                            ('Tile', '@id'),
                            ('JC-Score', '@y'),])
factors = [(reg, cls) 
           for reg in pd.unique(melted.region) 
           for cls in pd.unique(melted.jc_class)]

plot = figure(x_range=FactorRange(*factors), plot_width=950, plot_height=600,
              tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset', 'box_zoom'],
              title="Jaccard index")

plot.x(x=jitter('x', width=0.6, range=plot.x_range), y='y', source=source)

plot.xgrid.grid_line_color = None
plot.xaxis.axis_label = "Region/Class"
plot.yaxis.axis_label = "Jaccard score"
plot.y_range.start = -0.01

show(plot)

# To refactor

In [185]:
src.describe()

Unnamed: 0,JC0,JC10,JC20,JC30
count,280.0,280.0,280.0,280.0
mean,0.51894,0.518309,0.501016,0.489995
std,0.317543,0.323564,0.336737,0.339287
min,0.0,0.0,0.0,0.0
25%,0.2229,0.2554,0.13465,0.097175
50%,0.60455,0.6064,0.60725,0.5856
75%,0.7849,0.788075,0.7899,0.7839
max,0.9787,0.9792,0.9796,0.9798


In [187]:
america = data[data.region == 'Americas']
asia = data[data.region == 'Asia'] 
africa = data[data.region == 'Africa']
oceania = data[data.region == 'Oceania']
sevenseas = data[data.region == 'Seven seas (open ocean)']

america.head()

Unnamed: 0,tile,region,jc0,jc10,jc20,jc30
6,00N_060W,Americas,0.924,0.926,0.9274,0.9288
7,05N_072W,Americas,0.9139,0.9153,0.9166,0.9178
11,05S_048W,Americas,0.2874,0.3055,0.3234,0.3435
13,10N_054W,Americas,0.9132,0.9145,0.915,0.9155
17,05S_072W,Americas,0.9241,0.9248,0.9257,0.9262


# Bokeh demo

In [157]:
data = america

In [158]:
boxplot = data.quantile(q=(0.25, 0.5, 0.75)).T
boxplot.columns = ['q1', 'q2', 'q3']
boxplot['iqr'] = boxplot.q3 - boxplot.q1
boxplot['tukey_lower_whisker'] = boxplot.q1 - 1.5 * boxplot.iqr
boxplot['tukey_upper_whisker'] = boxplot.q3 + 1.5 * boxplot.iqr
boxplot['q_lower_whisker'] = data.quantile(q=0.025)
boxplot['q_upper_whisker'] = data.quantile(q=0.975)
boxplot['min_whisker'] = data.min()
boxplot['max_whisker'] = data.max()
mean = data.mean()
boxplot['means'] = mean

boxplot

Unnamed: 0,q1,q2,q3,iqr,tukey_lower_whisker,tukey_upper_whisker,q_lower_whisker,q_upper_whisker,min_whisker,max_whisker,means
jc0,0.42395,0.6663,0.79755,0.3736,-0.13645,1.35795,0.0,0.92798,0,0.9787,0.603339
jc10,0.44285,0.6717,0.79895,0.3561,-0.0913,1.3331,0.0,0.92918,0,0.9792,0.605072
jc20,0.46085,0.6611,0.79545,0.3346,-0.04105,1.29735,0.0,0.93023,0,0.9796,0.603666
jc30,0.4522,0.6561,0.796,0.3438,-0.0635,1.3117,0.0,0.931365,0,0.9798,0.601632


In [159]:
hover = HoverTool(tooltips=[("Index", "@index"),
                            ("Q1", "@q1"),
                            ("Q2", "@q2"),
                            ("Q3", "@q3"),
                            ("IQR", "@iqr"),
                            ("lWhisker", "@min_whisker"),
                            ("uWhisker", "@max_whisker"),
                            ("Mean", "@means"),])
source = ColumnDataSource(boxplot)

# plotting
plot = figure(x_range=list(boxplot.index), tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset', 'box_zoom'])

# box
plot.vbar(x='index', width=0.7, bottom='q1', top='q2',
          line_color='black', fill_color='#f7f7f7', fill_alpha=0.7, source=source)
plot.vbar(x='index', width=0.7, bottom='q2', top='q3',
          line_color='black', fill_color='#67a9cf', fill_alpha=0.7, source=source)

# whiskers
plot.rect(x='index', y='min_whisker', width=0.2, height=0.001,
          line_color="black", source=source)
plot.rect(x='index', y='max_whisker', width=0.2, height=0.001,
          line_color="black", source=source)

# stems
plot.segment(x0='index', y0='min_whisker', x1='index', y1='q1',
             line_color='black', source=source)
#plot.segment(x0='index', y0='q3', x1='index', y1='max_whisker',
#             color='black', source=source)
seg = glyphs.Segment(x0='index', y0='q3', x1='index', y1='max_whisker', line_color='black')

# mean cross
plot.x(x='index', y='means', color='#ef8a62', size=15, source=source)

plot.add_glyph(source, seg)
show(plot)