In [4]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure


df = pd.read_csv("data/movies.csv")

facet_column = "Major Genre"
value_column = "Production Budget"

df[facet_column] = df[facet_column].fillna("Unknown")

facet_values = df[facet_column].unique()

grouped = df.groupby(facet_column)[value_column]
categories = list(grouped.groups.keys())

q1 = grouped.quantile(q=0.25)
q2 = grouped.quantile(q=0.5)
q3 = grouped.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

source = ColumnDataSource(
    pd.DataFrame(dict(cat=categories, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower))
)

p = figure(x_range=facet_values)
p.segment(categories, upper, categories, q3, line_color="black")
p.segment(categories, lower, categories, q1, line_color="black")

p.vbar(x="cat", width=0.7, top="q3", bottom="q2", source=source, line_color="black")
p.vbar(x="cat", width=0.7, top="q2", bottom="q1", source=source, line_color="black")
output_notebook()
show(p)


In [3]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10

df = pd.read_csv("data/movies.csv")

group_column = "MPAA Rating"
facet_column = "Major Genre"
value_column = "Production Budget"

df[group_column] = df[group_column].fillna("Unknown")
df[facet_column] = df[facet_column].fillna("Unknown")
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[8], 
    factors=group_values)

plots = []

for value in facet_values:
    df_facet = df[df[facet_column] == value]
    grouped = df_facet.groupby(group_column)[value_column]
    categories = list(grouped.groups.keys())

    q1 = grouped.quantile(q=0.25)
    q2 = grouped.quantile(q=0.5)
    q3 = grouped.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5 * iqr
    lower = q1 - 1.5 * iqr

    source = ColumnDataSource(
        pd.DataFrame(
            dict(cat=categories, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower)
        )
    )

    p = figure(
        background_fill_color="#efefef", x_range=group_values, y_range=[y_min, y_max]
    )
    p.title.text = value

    p.segment(categories, upper, categories, q3)
    p.segment(categories, lower, categories, q1)

    p.vbar(
        x="cat",
        width=0.7,
        top="q3",
        bottom="q2",
        source=source,
        fill_color=color_mapper,
        line_color="black",
    )
    p.vbar(
        x="cat",
        width=0.7,
        top="q2",
        bottom="q1",
        source=source,
        fill_color=color_mapper,
        line_color="black",
    )

    plots.append(p)

# Arrange the plots in a grid and display them
grid = gridplot(plots, ncols=5, width=200, height=200)
output_notebook()
show(grid)


In [5]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10

df = pd.read_csv("data/movies.csv")

group_column = 'MPAA Rating'
facet_column = 'Major Genre'
value_column = 'Production Budget'

df[group_column] = df[group_column].fillna('Unknown')
df[facet_column] = df[facet_column].fillna('Unknown')
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[8], factors=group_values)

plots = []

for value in facet_values:    
    df_facet = df[df[facet_column] == value]
    grouped = df_facet.groupby([group_column, facet_column])[value_column]
    categories = list(grouped.groups.keys())
    
    q1 = grouped.quantile(q=0.25)
    q2 = grouped.quantile(q=0.5)
    q3 = grouped.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr

    # Create a new DataFrame to store the calculated values for each category
    source = ColumnDataSource(pd.DataFrame({
        x_label: [f"{cat[0]}_{cat[1]}" for cat in grouped.groups.keys()],
        'group': [cat[0] for cat in grouped.groups.keys()],
        'color_group': [cat[1] for cat in grouped.groups.keys()],
        q1=q1, q2=q2, q3=q3, upper=upper, lower = lower
    }))

    p = figure(background_fill_color="#efefef", x_range=group_values, y_range=[y_min, y_max])
    p.title.text = value

    p.segment(categories, upper, categories, q3)
    p.segment(categories, lower, categories, q1)

    p.vbar(x='cat', width=0.7, top='q3', bottom='q2', source=source, fill_color=color_mapper, line_color="black")
    p.vbar(x='cat', width=0.7, top='q2', bottom='q1', source=source, fill_color=color_mapper, line_color="black")

    plots.append(p)

# Arrange the plots in a grid and display them
grid = gridplot(plots, ncols=5, width=200, height=200)
output_notebook()
show(grid)


SyntaxError: ':' expected after dictionary key (2519916073.py, line 43)

In [6]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10


df = pd.read_csv("data/movies.csv")

group_column = "MPAA Rating"
facet_column = "Major Genre"
value_column = "Production Budget"

df[group_column] = df[group_column].fillna("Unknown")
df[facet_column] = df[facet_column].fillna("Unknown")
y_min = df[value_column].min()
y_max = df[value_column].max()

group_values = df[group_column].unique()
facet_values = df[facet_column].unique()
color_mapper = factor_cmap(group_column, palette=Category10[10], factors=group_values)

grouped = df.groupby(group_column)[value_column]

q1 = grouped.quantile(q=0.25)
q2 = grouped.quantile(q=0.5)
q3 = grouped.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5 * iqr
lower = q1 - 1.5 * iqr

source = ColumnDataSource(
    pd.DataFrame(dict(cat=group_values, q1=q1, q2=q2, q3=q3, upper=upper, lower=lower))
)

p = figure(x_range=group_values, y_range=[y_min, y_max])
p.segment(group_values, upper, group_values, q3, line_color="black")
p.segment(group_values, lower, group_values, q1, line_color="black")

p.vbar(
    x="cat",
    width=0.7,
    top="q3",
    bottom="q2",
    source=source,
    fill_color=color_mapper,
    line_color="black",
)
p.vbar(
    x="cat",
    width=0.7,
    top="q2",
    bottom="q1",
    source=source,
    fill_color=color_mapper,
    line_color="black",
)
output_notebook()
show(p)


In [6]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.palettes import Category20
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource

output_notebook()

df = pd.read_csv("data/movies.csv").fillna("unknown")
grouped_df = df.groupby(['Major Genre', 'MPAA Rating']).size().reset_index(name='counts')

major_genre = (df['Major Genre'].unique())
mpaa_rating = (df['MPAA Rating'].unique())

source = ColumnDataSource(grouped_df)

palette = Category20[len(mpaa_rating)]

p = figure(x_range=major_genre, toolbar_location=None, title="Major Genre Counts by MPAA Rating")
p.vbar(x='Major Genre', top='counts', width=0.9, source=source,
       fill_color=factor_cmap('MPAA Rating', palette=palette, factors=mpaa_rating))

show(p)

In [13]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category20

output_notebook()

df = pd.read_csv("data/movies.csv")

# Fill NaN values in 'Major Genre' and 'MPAA Rating' columns with 'Unknown'
df['Major Genre'] = df['Major Genre'].fillna('Unknown')
df['MPAA Rating'] = df['MPAA Rating'].fillna('Unknown')

grouped_df = df.groupby(['Major Genre', 'MPAA Rating']).size().reset_index(name='counts')

major_genre = sorted(df['Major Genre'].unique())
mpaa_rating = sorted(df['MPAA Rating'].unique())

# Pivot the DataFrame
pivot_df = grouped_df.pivot_table(index='Major Genre', columns='MPAA Rating', values='counts', fill_value=0).reset_index()

source = ColumnDataSource(pivot_df)

palette = Category20[len(mpaa_rating)]

p = figure(x_range=major_genre, toolbar_location=None, title="Major Genre Counts by MPAA Rating")

p.vbar_stack(mpaa_rating, x='Major Genre', width=0.9, color=palette, source=source, legend_label=mpaa_rating)

p.xgrid.grid_line_color = None
p.legend.title = 'MPAA Rating'
p.legend.location = "top_right"
p.legend.orientation = "vertical"

show(p)

In [20]:
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.palettes import Category20
from bokeh.transform import factor_cmap, dodge
from bokeh.models import ColumnDataSource

output_notebook()

df = pd.read_csv("data/movies.csv")

# Remove rows with NaN values in 'Major Genre' and 'MPAA Rating' columns
df = df.dropna(subset=['Major Genre', 'MPAA Rating'])

grouped_df = df.groupby(['Major Genre', 'MPAA Rating']).size().reset_index(name='counts')

major_genre = sorted(df['Major Genre'].unique())
mpaa_rating = sorted(df['MPAA Rating'].unique())

source = ColumnDataSource(grouped_df)

palette = Category20[len(mpaa_rating)]

p = figure(x_range=major_genre)

bar_width = 0.9 / len(mpaa_rating)

for idx, rating in enumerate(mpaa_rating):
    p.vbar(x=dodge('Major Genre', idx * bar_width - (0.9 / 2) + (bar_width / 2), range=p.x_range), top='counts', width=bar_width, source=source,
           legend_label=rating, fill_color=palette[idx])

p.legend.title = "MPAA Rating"
p.legend.location = "top_right"
p.legend.orientation = "vertical"

show(p)

In [24]:
grouped_df = df.pivot_table(index='MPAA Rating', columns='Major Genre', values='Production Budget', aggfunc='mean')
grouped_df

Major Genre,Action,Adventure,Black Comedy,Comedy,Concert/Performance,Documentary,Drama,Horror,Musical,Romantic Comedy,Thriller/Suspense,Western
MPAA Rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
G,,55091490.0,,57785710.0,10750000.0,41700000.0,27680000.0,,39825290.0,45000000.0,,
NC-17,,,,1000000.0,,2000000.0,20000000.0,375000.0,,,200000.0,
Not Rated,5442400.0,113500000.0,,4287286.0,,2058348.0,5973250.0,1307000.0,425000.0,4325000.0,2221400.0,
Open,,,,,,,2600000.0,,,,,
PG,52000000.0,68666670.0,,47907740.0,,675220.0,24160470.0,16000000.0,33200000.0,28000000.0,14000000.0,
PG-13,80381330.0,101125000.0,16000000.0,32719260.0,,2059286.0,31481740.0,32270000.0,31916670.0,32564290.0,45875850.0,41981820.0
R,52193870.0,56057140.0,13669350.0,15963310.0,3000000.0,5111111.0,21044870.0,21503190.0,13062500.0,23208330.0,32055560.0,30410000.0


In [4]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.transform import dodge
import numpy as np
import pandas as pd

df = pd.read_csv("data/movies.csv")
df2 = df.pivot_table(
    index="Major Genre",
    columns="MPAA Rating",
    values="Production Budget",
    aggfunc="mean",
)

genre = df2.index.tolist()
ratings = df2.columns.tolist()
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']

source = ColumnDataSource(df2)

p = figure(x_range=genre, y_range=(0, df2.max().max() + 10), 
           title="Average Production Budget by Genre and MPAA Rating",
           toolbar_location=None, tools="")

p.vbar(x=dodge('Major Genre', -0.4, range=p.x_range), top='G', width=0.2, source=source, 
       color=colors[0], legend_label="G")
p.vbar(x=dodge('Major Genre', -0.2, range=p.x_range), top='NC-17', width=0.2, source=source, 
       color=colors[1], legend_label="NC-17")
p.vbar(x=dodge('Major Genre', 0.0, range=p.x_range), top='PG', width=0.2, source=source, 
       color=colors[2], legend_label="PG")
p.vbar(x=dodge('Major Genre', 0.2, range=p.x_range), top='PG-13', width=0.2, source=source, 
       color=colors[3], legend_label="PG-13")
p.vbar(x=dodge('Major Genre', 0.4, range=p.x_range), top='R', width=0.2, source=source, 
       color=colors[4], legend_label="R")
p.vbar(x=dodge('Major Genre', 0.6, range=p.x_range), top='Unrated', width=0.2, source=source, 
       color=colors[5], legend_label="Unrated")

p.legend.location = "top_left"
p.legend.click_policy="hide"
p.xaxis.axis_label = "Major Genre"
p.yaxis.axis_label = "Average Budget"

show(p)


In [7]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Magma256
from bokeh.models import LinearColorMapper, ColorBar
import numpy as np
import pandas as pd

df = pd.read_csv("data/movies.csv")
df2 = df.pivot_table(
    index="MPAA Rating",
    columns="Major Genre",
    values="Production Budget",
    aggfunc="mean",
)
genre = df2.columns.tolist()
rating = df2.index.tolist()
colors = list(reversed(Magma256))

source = ColumnDataSource(df2.stack().reset_index(name='value'))

p = figure(x_range=genre, y_range=list(reversed(rating)), 
           title="Average Production Budget by MPAA Rating and Genre",
           toolbar_location=None, tools="", 
           sizing_mode="stretch_both")

p.rect(x="Major Genre", y="MPAA Rating", width=1, height=1, source=source,
       fill_color={'field': 'value', 'transform': LinearColorMapper(palette=colors, low=df2.min().min(), high=df2.max().max())},
       line_color=None)

p.xaxis.axis_label = "Major Genre"
p.yaxis.axis_label = "MPAA Rating"

color_bar = ColorBar(color_mapper=LinearColorMapper(palette=colors, low=df2.min().min(), high=df2.max().max()),
                     label_standoff=12, border_line_color=None, location=(0,0))

p.add_layout(color_bar, 'right')

show(p)


In [10]:
from bokeh.plotting import figure, show
from bokeh.palettes import Magma256
import pandas as pd

df = pd.read_csv("data/movies.csv")
df2 = df["MPAA Rating"].value_counts()
colors = Magma256[len(df2)]

p = figure(title="MPAA Rating Counts", 
           sizing_mode="stretch_both", 
           toolbar_location=None, 
           tools="")
p.wedge(x=0.5, y=0.5, radius=0.4, start_angle=0, end_angle=df2/df2.sum()*2*np.pi, 
        line_color='white', fill_color=colors, legend_field='index', source=df2.reset_index())
p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)

RuntimeError: 

Expected end_angle to reference fields in the supplied data source.

When a 'source' argument is passed to a glyph method, values that are sequences
(like lists or arrays) must come from references to data columns in the source.

For instance, as an example:

    source = ColumnDataSource(data=dict(x=a_list, y=an_array))

    p.circle(x='x', y='y', source=source, ...) # pass column names and a source

Alternatively, *all* data sequences may be provided as literals as long as a
source is *not* provided:

    p.circle(x=a_list, y=an_array, ...)  # pass actual sequences and no source

