## First test

First test to understand how bokeh works.

In [17]:
from bokeh.plotting import figure, show
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, Slider, CustomJS

# Mock data
initial_data = {'x': [1, 2, 3, 4], 'y': [1, 1, 1, 1]}

source = ColumnDataSource(data=initial_data)

plot = figure(title="Test interactive plot with sliders", x_axis_label='review number', y_axis_label='expertness score', width=600, height=400)
plot.line('x', 'y', source=source, line_width=2)

slider1 = Slider(start=0, end=2, step=1, value=1, title="Slider 1")
slider2 = Slider(start=0, end=2, step=1, value=1, title="Slider 2")
slider3 = Slider(start=0, end=2, step=1, value=1, title="Slider 3")
slider4 = Slider(start=0, end=2, step=1, value=1, title="Slider 4")

# Callback
callback = CustomJS(
    args=dict(source=source, s1=slider1, s2=slider2, s3=slider3, s4=slider4, original_data=initial_data),
    code="""
        const x = source.data['x'];
        const y = source.data['y'];
        const original_y = original_data['y'];

        console.log(original_y)

        y[0] = s1.value * original_y[0];
        y[1] = s2.value * original_y[1];
        y[2] = s3.value * original_y[2];
        y[3] = s4.value * original_y[3];

        source.change.emit();
    """
)

slider1.js_on_change('value', callback)
slider2.js_on_change('value', callback)
slider3.js_on_change('value', callback)
slider4.js_on_change('value', callback)

# have plot on the left, and sliders on the right
sliders = column(slider1, slider2, slider3, slider4, width=200, sizing_mode="stretch_height")
layout = row(plot, sliders)

# show(layout) # Not working somehow

In [18]:
from bokeh.embed import file_html
from bokeh.resources import CDN

html = file_html(layout, CDN, "Test Plot")
with open("test_plot.html", "w") as f:
    f.write(html)

### Data

Let's get the actual data that we need for this graph

In [20]:
import pandas as pd

df = pd.read_json('avg_no_exp_scores.json')

In [21]:
df.columns

Index(['('flavor', 'mean')', '('flavor', 'sem')', '('mouthfeel', 'mean')',
       '('mouthfeel', 'sem')', '('brewing', 'mean')', '('brewing', 'sem')',
       '('technical', 'mean')', '('technical', 'sem')',
       '('appearance', 'mean')', '('appearance', 'sem')',
       '('off_flavors', 'mean')', '('off_flavors', 'sem')',
       '('expertness_score', 'mean')', '('expertness_score', 'sem')'],
      dtype='object')

In [22]:
# df.drop(columns=["('flavor', 'sem')", "('mouthfeel', 'sem')", "('brewing', 'sem')", "('technical', 'sem')", "('appearance', 'sem')", "('off_flavors', 'sem')", "('expertness_score', 'sem')"], inplace=True)
df.head()

Unnamed: 0,"('flavor', 'mean')","('flavor', 'sem')","('mouthfeel', 'mean')","('mouthfeel', 'sem')","('brewing', 'mean')","('brewing', 'sem')","('technical', 'mean')","('technical', 'sem')","('appearance', 'mean')","('appearance', 'sem')","('off_flavors', 'mean')","('off_flavors', 'sem')","('expertness_score', 'mean')","('expertness_score', 'sem')"
0,-0.774329,0.01617,-0.679132,0.018501,-0.683452,0.022178,-0.47541,0.017846,-0.773374,0.020555,-0.26202,0.017749,-1.022608,0.015838
1,-0.760405,0.01627,-0.655368,0.018413,-0.679733,0.02212,-0.467862,0.018392,-0.726652,0.020956,-0.285993,0.0167,-0.998637,0.015046
2,-0.736056,0.016665,-0.698692,0.01859,-0.664428,0.022927,-0.479205,0.018867,-0.739444,0.020859,-0.285151,0.015486,-1.001258,0.015266
3,-0.718008,0.015981,-0.680837,0.018227,-0.610481,0.022352,-0.47749,0.018994,-0.656673,0.021968,-0.23831,0.018584,-0.952639,0.01497
4,-0.698993,0.016827,-0.649216,0.018673,-0.589076,0.022812,-0.43236,0.019018,-0.617202,0.021916,-0.261431,0.017191,-0.911484,0.015435


## Selector

We try now to have a showing dirrent part of the data

In [32]:
from bokeh.models import Range1d

# Calculate fixed y-axis range across all datasets, considering SEM
all_y_values = []
for key, data in datasets.items():
    y = data['y']
    sem = data['sem']
    all_y_values.extend([y_val + sem_val for y_val, sem_val in zip(y, sem)])
    all_y_values.extend([y_val - sem_val for y_val, sem_val in zip(y, sem)])

y_min = min(all_y_values)
y_max = max(all_y_values)

# Initial dataset
initial_data = datasets["expertness score"]
source = ColumnDataSource(data=initial_data)

# Create the plot
plot = figure(
    title="Average standardized metric scores for the first 200 reviews of non-expert users",
    x_axis_label='Review number',
    y_axis_label='Y-axis', #TODO
    width=1200,
    height=800,
    y_range=Range1d(y_min, y_max),  # Fix the y-axis range
)

# Main line
plot.line('x', 'y', source=source, line_width=2, color='blue', legend_label="Mean")

# Error band (initially plotted for the first dataset)
error_band_data = {
    'x': x_axis + x_axis[::-1],  # x-axis forwards and backwards
    'y': [y + sem for y, sem in list(zip(initial_data['y'], initial_data['sem']))] +
         [y - sem for y, sem in list(zip(initial_data['y'], initial_data['sem']))[::-1]],
}
error_band_source = ColumnDataSource(data=error_band_data)
plot.patch('x', 'y', source=error_band_source, color='blue', alpha=0.2, legend_label="SEM")

# Selector widget
select = Select(title="Select aspect of a beer to analyze", value="expertness score", options=list(datasets.keys()))

# Callback for updating the main line and error bands
callback = CustomJS(
    args=dict(source=source, error_band_source=error_band_source, datasets=datasets),
    code="""
        const selected_dataset = datasets[cb_obj.value];
        source.data = selected_dataset;
        source.change.emit();

        // Update error band
        const x = selected_dataset.x;
        const y = selected_dataset.y;
        const sem = selected_dataset.sem;

        const upper = y.map((val, idx) => val + sem[idx]);
        const lower = y.map((val, idx) => val - sem[idx]);
        const error_band = {
            x: x.concat(x.slice().reverse()),  // x forwards and backwards
            y: upper.concat(lower.reverse()), // y upper and lower
        };

        error_band_source.data = error_band;
        error_band_source.change.emit();
    """
)
select.js_on_change('value', callback)

# Layout
layout = row(plot, select)

In [33]:
from bokeh.embed import file_html
from bokeh.resources import CDN

html = file_html(layout, CDN, "Test Plot")
with open("test_select_plot.html", "w") as f:
    f.write(html)