## First test

First test to understand how bokeh works.

In [17]:
from bokeh.plotting import figure, show
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, Slider, CustomJS

# Mock data
initial_data = {'x': [1, 2, 3, 4], 'y': [1, 1, 1, 1]}

source = ColumnDataSource(data=initial_data)

plot = figure(title="Test interactive plot with sliders", x_axis_label='review number', y_axis_label='expertness score', width=600, height=400)
plot.line('x', 'y', source=source, line_width=2)

slider1 = Slider(start=0, end=2, step=1, value=1, title="Slider 1")
slider2 = Slider(start=0, end=2, step=1, value=1, title="Slider 2")
slider3 = Slider(start=0, end=2, step=1, value=1, title="Slider 3")
slider4 = Slider(start=0, end=2, step=1, value=1, title="Slider 4")

# Callback
callback = CustomJS(
    args=dict(source=source, s1=slider1, s2=slider2, s3=slider3, s4=slider4, original_data=initial_data),
    code="""
        const x = source.data['x'];
        const y = source.data['y'];
        const original_y = original_data['y'];

        console.log(original_y)

        y[0] = s1.value * original_y[0];
        y[1] = s2.value * original_y[1];
        y[2] = s3.value * original_y[2];
        y[3] = s4.value * original_y[3];

        source.change.emit();
    """
)

slider1.js_on_change('value', callback)
slider2.js_on_change('value', callback)
slider3.js_on_change('value', callback)
slider4.js_on_change('value', callback)

# have plot on the left, and sliders on the right
sliders = column(slider1, slider2, slider3, slider4, width=200, sizing_mode="stretch_height")
layout = row(plot, sliders)

# show(layout) # Not working somehow

In [18]:
from bokeh.embed import file_html
from bokeh.resources import CDN

html = file_html(layout, CDN, "Test Plot")
with open("test_plot.html", "w") as f:
    f.write(html)

### Data

Let's get the actual data that we need for this graph

In [1]:
import pandas as pd

df = pd.read_json('Categorical_avg_evolution.json')

In [2]:
df.columns

Index(['('flavor', 'mean')', '('flavor', 'sem')', '('mouthfeel', 'mean')',
       '('mouthfeel', 'sem')', '('brewing', 'mean')', '('brewing', 'sem')',
       '('technical', 'mean')', '('technical', 'sem')',
       '('appearance', 'mean')', '('appearance', 'sem')',
       '('off_flavors', 'mean')', '('off_flavors', 'sem')',
       '('expertness_score', 'mean')', '('expertness_score', 'sem')'],
      dtype='object')

In [3]:
# df.drop(columns=["('flavor', 'sem')", "('mouthfeel', 'sem')", "('brewing', 'sem')", "('technical', 'sem')", "('appearance', 'sem')", "('off_flavors', 'sem')", "('expertness_score', 'sem')"], inplace=True)
df.head()

Unnamed: 0,"('flavor', 'mean')","('flavor', 'sem')","('mouthfeel', 'mean')","('mouthfeel', 'sem')","('brewing', 'mean')","('brewing', 'sem')","('technical', 'mean')","('technical', 'sem')","('appearance', 'mean')","('appearance', 'sem')","('off_flavors', 'mean')","('off_flavors', 'sem')","('expertness_score', 'mean')","('expertness_score', 'sem')"
0,0.320652,0.00125,0.280379,0.001414,0.218909,0.001528,0.229955,0.001368,0.316795,0.001525,0.056398,0.000886,0.478511,0.001118
1,0.323113,0.00124,0.282032,0.001422,0.2272,0.00153,0.228409,0.001362,0.321512,0.001523,0.057797,0.000894,0.482191,0.001104
2,0.325922,0.001243,0.284064,0.001418,0.231575,0.001536,0.227126,0.001362,0.326271,0.001525,0.058089,0.000902,0.484821,0.001101
3,0.328999,0.00124,0.285401,0.001418,0.238432,0.001542,0.230872,0.001369,0.330814,0.001526,0.059793,0.000913,0.489229,0.001085
4,0.333814,0.001233,0.289116,0.001424,0.241001,0.001547,0.230132,0.00137,0.33342,0.001528,0.060266,0.000922,0.491447,0.001097


## Selector

We try now to have a showing dirrent part of the data

In [25]:
from bokeh.plotting import figure, show
from bokeh.layouts import row
from bokeh.models import ColumnDataSource, Select, CustomJS, Range1d

x_axis = [x for x in range(0, 1000)]

# Mock datasets with SEM
datasets = {
    "expertness score": {
        'x': x_axis,
        'y': df["('expertness_score', 'mean')"].to_list(),
        'sem': df["('expertness_score', 'sem')"].to_list()
    },
    "flavor": {
        'x': x_axis,
        'y': df["('flavor', 'mean')"].to_list(),
        'sem': df["('flavor', 'sem')"].to_list()
    },
    "mouthfeel": {
        'x': x_axis,
        'y': df["('mouthfeel', 'mean')"].to_list(),
        'sem': df["('mouthfeel', 'sem')"].to_list()
    },
    "brewing": {
        'x': x_axis,
        'y': df["('brewing', 'mean')"].to_list(),
        'sem': df["('brewing', 'sem')"].to_list()
    },
    "technical": {
        'x': x_axis,
        'y': df["('technical', 'mean')"].to_list(),
        'sem': df["('technical', 'sem')"].to_list()
    },
    "appearance": {
        'x': x_axis,
        'y': df["('appearance', 'mean')"].to_list(),
        'sem': df["('appearance', 'sem')"].to_list()
    },
    "off flavors": {
        'x': x_axis,
        'y': df["('off_flavors', 'mean')"].to_list(),
        'sem': df["('off_flavors', 'sem')"].to_list()
    },
}

# Color map for datasets
colors = {
    "expertness score": "blue",
    "flavor": "green",
    "mouthfeel": "orange",
    "brewing": "purple",
    "technical": "red",
    "appearance": "brown",
    "off flavors": "cyan",
}

# Calculate fixed y-axis range across all datasets, considering SEM
all_y_values = []
for key, data in datasets.items():
    y = data['y']
    sem = data['sem']
    all_y_values.extend([y_val + sem_val for y_val, sem_val in zip(y, sem)])
    all_y_values.extend([y_val - sem_val for y_val, sem_val in zip(y, sem)])

y_min = min(all_y_values)
y_max = max(all_y_values)

# Initial dataset
initial_data = datasets["expertness score"]
source = ColumnDataSource(data=initial_data)

# Create the plot
plot = figure(
    title="Average standardized metric scores for the first 200 reviews of non-expert users",
    x_axis_label='Review number',
    y_axis_label='Y-axis', # TODO: Add appropriate label
    width=1200,
    height=800,
    x_range=Range1d(0, 1000),
    # y_range=Range1d(y_min, y_max),
    y_range=Range1d(0, 0.7)
)

# Main line
line_renderer = plot.line('x', 'y', source=source, line_width=2, color=colors["expertness score"], legend_label="Mean")

# Error band
error_band_data = {
    'x': x_axis + x_axis[::-1],
    'y': [y + sem for y, sem in list(zip(initial_data['y'], initial_data['sem']))] +
         [y - sem for y, sem in list(zip(initial_data['y'], initial_data['sem']))[::-1]],
}
error_band_source = ColumnDataSource(data=error_band_data)
patch_renderer = plot.patch('x', 'y', source=error_band_source, color=colors["expertness score"], alpha=0.2, legend_label="SEM")

# Selector widget
select = Select(title="Select aspect of a beer to analyze", value="expertness score", options=list(datasets.keys()))

# Callback for updating the main line, error bands, and colors
callback = CustomJS(
    args=dict(source=source, error_band_source=error_band_source, datasets=datasets, line_renderer=line_renderer,
              patch_renderer=patch_renderer, colors=colors),
    code="""
        const selected_dataset = datasets[cb_obj.value];
        source.data = selected_dataset;
        source.change.emit();

        // Update error band
        const x = selected_dataset.x;
        const y = selected_dataset.y;
        const sem = selected_dataset.sem;

        const upper = y.map((val, idx) => val + sem[idx]);
        const lower = y.map((val, idx) => val - sem[idx]);
        const error_band = {
            x: x.concat(x.slice().reverse()),  // x forwards and backwards
            y: upper.concat(lower.reverse()), // y upper and lower
        };

        error_band_source.data = error_band;
        error_band_source.change.emit();

        // Update colors
        const selected_color = colors[cb_obj.value];
        line_renderer.glyph.line_color = selected_color;
        patch_renderer.glyph.fill_color = selected_color;
    """
)
select.js_on_change('value', callback)

# Layout
layout = row(plot, select)



In [26]:
from bokeh.embed import file_html
from bokeh.resources import CDN

html = file_html(layout, CDN, "Test Plot")
with open("metric_categories_plot.html", "w") as f:
    f.write(html)