In [4]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure


df = pd.read_csv("data/movies.csv")

facet_column = "Major Genre"
value_column = "Production Budget"

df[facet_column] = df[facet_column].fillna("Unknown")

facet_values = df[facet_column].unique()

grouped = df.groupby(facet_column)[value_column]
categories = list(grouped.groups.keys())

q1 = grouped.quantile(q=0.25)
q2 = grouped.quantile(q=0.5)
q3 = grouped.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

source = ColumnDataSource(
    pd.DataFrame(dict(cat=categories, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower))
)

p = figure(x_range=facet_values)
p.segment(categories, upper, categories, q3, line_color="black")
p.segment(categories, lower, categories, q1, line_color="black")

p.vbar(x="cat", width=0.7, top="q3", bottom="q2", source=source, line_color="black")
p.vbar(x="cat", width=0.7, top="q2", bottom="q1", source=source, line_color="black")
output_notebook()
show(p)


In [3]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10

df = pd.read_csv("data/movies.csv")

group_column = "MPAA Rating"
facet_column = "Major Genre"
value_column = "Production Budget"

df[group_column] = df[group_column].fillna("Unknown")
df[facet_column] = df[facet_column].fillna("Unknown")
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[8], 
    factors=group_values)

plots = []

for value in facet_values:
    df_facet = df[df[facet_column] == value]
    grouped = df_facet.groupby(group_column)[value_column]
    categories = list(grouped.groups.keys())

    q1 = grouped.quantile(q=0.25)
    q2 = grouped.quantile(q=0.5)
    q3 = grouped.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5 * iqr
    lower = q1 - 1.5 * iqr

    source = ColumnDataSource(
        pd.DataFrame(
            dict(cat=categories, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower)
        )
    )

    p = figure(
        background_fill_color="#efefef", x_range=group_values, y_range=[y_min, y_max]
    )
    p.title.text = value

    p.segment(categories, upper, categories, q3)
    p.segment(categories, lower, categories, q1)

    p.vbar(
        x="cat",
        width=0.7,
        top="q3",
        bottom="q2",
        source=source,
        fill_color=color_mapper,
        line_color="black",
    )
    p.vbar(
        x="cat",
        width=0.7,
        top="q2",
        bottom="q1",
        source=source,
        fill_color=color_mapper,
        line_color="black",
    )

    plots.append(p)

# Arrange the plots in a grid and display them
grid = gridplot(plots, ncols=5, width=200, height=200)
output_notebook()
show(grid)


In [5]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10

df = pd.read_csv("data/movies.csv")

group_column = 'MPAA Rating'
facet_column = 'Major Genre'
value_column = 'Production Budget'

df[group_column] = df[group_column].fillna('Unknown')
df[facet_column] = df[facet_column].fillna('Unknown')
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[8], factors=group_values)

plots = []

for value in facet_values:    
    df_facet = df[df[facet_column] == value]
    grouped = df_facet.groupby([group_column, facet_column])[value_column]
    categories = list(grouped.groups.keys())
    
    q1 = grouped.quantile(q=0.25)
    q2 = grouped.quantile(q=0.5)
    q3 = grouped.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr

    # Create a new DataFrame to store the calculated values for each category
    source = ColumnDataSource(pd.DataFrame({
        x_label: [f"{cat[0]}_{cat[1]}" for cat in grouped.groups.keys()],
        'group': [cat[0] for cat in grouped.groups.keys()],
        'color_group': [cat[1] for cat in grouped.groups.keys()],
        q1=q1, q2=q2, q3=q3, upper=upper, lower = lower
    }))

    p = figure(background_fill_color="#efefef", x_range=group_values, y_range=[y_min, y_max])
    p.title.text = value

    p.segment(categories, upper, categories, q3)
    p.segment(categories, lower, categories, q1)

    p.vbar(x='cat', width=0.7, top='q3', bottom='q2', source=source, fill_color=color_mapper, line_color="black")
    p.vbar(x='cat', width=0.7, top='q2', bottom='q1', source=source, fill_color=color_mapper, line_color="black")

    plots.append(p)

# Arrange the plots in a grid and display them
grid = gridplot(plots, ncols=5, width=200, height=200)
output_notebook()
show(grid)


SyntaxError: ':' expected after dictionary key (2519916073.py, line 43)

In [6]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10


df = pd.read_csv("data/movies.csv")

group_column = "MPAA Rating"
facet_column = "Major Genre"
value_column = "Production Budget"

df[group_column] = df[group_column].fillna("Unknown")
df[facet_column] = df[facet_column].fillna("Unknown")
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[10], factors=group_values)

grouped = df.groupby(group_column)[value_column]

q1 = grouped.quantile(q=0.25)
q2 = grouped.quantile(q=0.5)
q3 = grouped.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

source = ColumnDataSource(
    pd.DataFrame(dict(cat=group_values, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower))
)

p = figure(x_range=group_values, y_range=[y_min, y_max])
p.segment(group_values, upper, group_values, q3, line_color="black")
p.segment(group_values, lower, group_values, q1, line_color="black")

p.vbar(
    x="cat",
    width=0.7,
    top="q3",
    bottom="q2",
    source=source,
    fill_color=color_mapper,
    line_color="black",
)
p.vbar(
    x="cat",
    width=0.7,
    top="q2",
    bottom="q1",
    source=source,
    fill_color=color_mapper,
    line_color="black",
)
output_notebook()
show(p)
