In [134]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
from matplotlib.colors import LinearSegmentedColormap, to_hex
from datetime import timedelta
import ipywidgets as widgets
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt

# Display options:
pd.set_option("display.width", 1200)
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 300)

In [200]:
# Read the data:
df_videos = pd.read_csv("data/videos_data.csv", sep = ";")

In [201]:
df_videos

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments
0,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,A few of the best math explainers from this su...,F3Qixy-r_rQ,2021-10-23T18:11:23Z,361195.0,24407.0,82.0,659.0
1,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,How a Mandelbrot set arises from Newton’s work,LqbZpur38nw,2021-10-15T16:41:50Z,554691.0,25722.0,144.0,1205.0
2,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,Newton's Fractal (which Newton knew nothing ab...,-RdOwhmqP5s,2021-10-07T02:19:39Z,1179922.0,61162.0,285.0,2792.0
3,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,The Summer of Math Exposition,ojjzXyQCzso,2021-07-16T15:37:16Z,609396.0,29267.0,214.0,1719.0
4,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,A quick trick for computing eigenvalues | Chap...,e50Bj7jn9IQ,2021-05-07T19:01:16Z,419586.0,16984.0,144.0,1145.0
...,...,...,...,...,...,...,...,...,...
203003,zefrank1,UCVpankR4HtoAVtYnFDUieYA,HardTimes :: Hole,LPfcGXMpKds,2009-06-06T22:16:56Z,90791.0,1791.0,23.0,75.0
203004,zefrank1,UCVpankR4HtoAVtYnFDUieYA,HardTimes :: Affordable Button,h3lvSflNixI,2009-05-30T00:13:46Z,103154.0,1566.0,47.0,18.0
203005,zefrank1,UCVpankR4HtoAVtYnFDUieYA,HardTimes :: Optical Illusion,8mS5RK0Yo6w,2009-05-23T22:36:58Z,1072148.0,9848.0,1201.0,
203006,zefrank1,UCVpankR4HtoAVtYnFDUieYA,HardTimes :: Outsource,zxIbQ6ZG4OI,2009-05-17T00:50:31Z,78193.0,1620.0,20.0,87.0


In [202]:
df_videos.dtypes

channel_title         object
channel_id            object
video_title           object
video_id              object
video_upload_date     object
views                float64
likes                float64
dislikes             float64
comments             float64
dtype: object

In [5]:
############################# Data wrangling #############################

In [203]:
### Age in days (until the last upload date)

# Change to datetime:
df_videos["video_upload_date"] = pd.to_datetime(df_videos["video_upload_date"])

# Filter the wrong upload dates (coming from the API as being the request date) considering a margin of 2 days:
last_date = max(df_videos["video_upload_date"])
age = []
for i in range(df_videos.shape[0]):
    date_i = df_videos.loc[i, "video_upload_date"]
    diff_days = last_date - date_i
    if diff_days < timedelta(days = 2):
        age += [np.nan]
    else:
        age += [diff_days.days]
df_videos["age_days"] = age

In [204]:
df_videos["age_days"].dropna().describe()

count    202896.000000
mean       1547.755387
std        1244.428607
min           2.000000
25%         444.000000
50%        1299.000000
75%        2219.000000
max        5734.000000
Name: age_days, dtype: float64

In [205]:
df_videos.dropna().sort_values("age_days").apply(lambda x: pd.concat([x.head(1), x.tail(1)]))

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments,age_days
0,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,A few of the best math explainers from this su...,F3Qixy-r_rQ,2021-10-23 18:11:23+00:00,361195.0,24407.0,82.0,659.0,2.0
92000,Jacques Slade,UCZ9l_6_f0PWRYXN5Y7Lcl2A,Get Your Hands Up,FXLXBfmlaOk,2006-02-12 20:31:24+00:00,28226.0,206.0,9.0,108.0,5734.0


In [206]:
### Likes-dislikes ratio

df_videos["likes_dislikes_ratio"] = df_videos["likes"]/df_videos["dislikes"]
df_videos["likes_dislikes_ratio"] = df_videos["likes_dislikes_ratio"].replace([np.inf, -np.inf], np.nan)

In [207]:
df_videos["likes_dislikes_ratio"].dropna().describe()

count    172162.000000
mean         42.583515
std          68.247050
min           0.000000
25%           7.573576
50%          20.666667
75%          49.333333
max        2635.000000
Name: likes_dislikes_ratio, dtype: float64

In [208]:
df_videos.dropna(subset = ["likes_dislikes_ratio"]).sort_values("likes_dislikes_ratio").apply(lambda x: pd.concat([x.head(1), x.tail(1)]))

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments,age_days,likes_dislikes_ratio
57657,CNBC,UCvJJ_dzjViJCoLf5uKUTwoA,Aetna CEO: Obamacare Risk-Adjustment Program I...,EK2lKEmSwLA,2016-10-27 20:07:55+00:00,559.0,0.0,1.0,1.0,1824.0,0.0
176509,TÁ NA MESA VEGG,UCSxFcMDGmwgDlSP9qmYVx1w,"COMO ARMAZENAR FOLHAS, ORGANIZAR FEIRA + SAL T...",GGqRCQmtx-U,2021-05-21 01:16:50+00:00,15330.0,2635.0,1.0,261.0,157.0,2635.0


In [209]:
### Comments-views ratio

df_videos["comments_views_ratio"] = df_videos["comments"]/df_videos["views"]
df_videos["comments_views_ratio"] = df_videos["comments_views_ratio"].replace([np.inf, -np.inf], np.nan)

# Here I decided to remove "Why do YouTube views freeze at 301?" from "Numberphile" because its views count was artificialy fixed:
df_videos = df_videos[df_videos["video_title"] != "Why do YouTube views freeze at 301?"]

In [225]:
df_videos["comments_views_ratio"].dropna().describe()

count    200742.000000
mean          0.003143
std           0.006700
min           0.000000
25%           0.000461
50%           0.001498
75%           0.003574
max           0.500000
Name: comments_views_ratio, dtype: float64

In [222]:
df_videos.sort_values("comments_views_ratio").dropna().apply(lambda x: pd.concat([x.head(1), x.tail(1)]))

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments,age_days,likes_dislikes_ratio,comments_views_ratio
34175,British Pathé,UCGp4u0WHLsK8OAxnvwiTyhA,British battlecruiser HMS Vanguard on the Rive...,EQ5uby5UiXU,2020-11-10 16:15:23+00:00,98.0,5.0,1.0,0.0,349.0,5.0,0.0
178158,UrAvgConsumer,UC9fSZHEh6XsRpX-xJc6lT3A,"Summer Giveaway Finale! (PS Vita, Nexus 7 2013...",T2LccukparY,2013-09-18 16:28:38+00:00,16217.0,2248.0,12.0,7141.0,2959.0,187.333333,0.44034


In [223]:
### Mean views per day

df_videos["mean_views_day"] = df_videos["views"]/df_videos["age_days"]
df_videos["mean_views_day"] = df_videos["mean_views_day"].replace([np.inf, -np.inf], np.nan)

In [224]:
df_videos["mean_views_day"].dropna().describe()

count    202839.000000
mean        703.974901
std        5960.601170
min           0.000000
25%           3.216458
50%          32.423693
75%         234.375439
max      791007.151515
Name: mean_views_day, dtype: float64

In [226]:
df_videos.sort_values("mean_views_day").dropna().apply(lambda x: pd.concat([x.head(1), x.tail(1)]))

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments,age_days,likes_dislikes_ratio,comments_views_ratio,mean_views_day
44332,Business Insider,UCcyq283he07B7_KUX07mmtA,IGNITION 2017 LIVE - Day One: Morning Session,mppUHGYL_Bk,2017-11-27 14:58:54+00:00,1.0,31.0,16.0,0.0,1428.0,1.9375,0.0,0.0007
88110,Guinness World Records,UCeSRjhfeeqIgr--AcP9qhyg,The fastest man on two hands - Guinness World ...,cEItmb_a20M,2021-09-22 14:18:34+00:00,26103236.0,593301.0,7578.0,40932.0,33.0,78.292557,0.001568,791007.151515


In [18]:
############################# Plots #############################

In [234]:
# 1D Density of views:
x_var_name = "views"
# x_vals = df_videos.loc[df_videos["channel_title"] == "Earthling Ed", ["age_days"]].replace([np.inf, -np.inf], np.nan).dropna().values.flatten()
x_vals = df_videos["views"].dropna()
# x_vals = x_vals[x_vals > 200]
# x_vals = x_vals[x_vals < 500]
nbins = 200
binsize = np.abs(np.max(x_vals) - np.min(x_vals))/nbins

fig = ff.create_distplot(
    hist_data = [x_vals],
    group_labels = ["x"],
    curve_type = "kde",
    show_curve = True,
    show_hist = True,
    bin_size = binsize,
    show_rug = False,
    colors = ["#813DDA"]
)
fig.update_traces(
    hovertemplate = "<b>Density: %{y:,}<br>" + 
                    x_var_name + ": %{x:,}</b><extra></extra>"
)
fig.update_layout(
    xaxis_title = "<b>" + x_var_name + "</b>" ,
    yaxis_title = "<b>Density</b>",
    font = dict(
        size = 18
    ),
    showlegend = False,
    plot_bgcolor = "white",
    hoverlabel = dict(
        font_size = 18,
        font_family = "Rockwell"
    )
)

In [231]:
# Scatter of Mean views per day and Likes-disikes ratio:
x_var_name = "Likes-dislikes ratio"
y_var_name = "Mean views per day"
color_var_name = "Age (days)"
x_var = "likes_dislikes_ratio"
y_var = "mean_views_day"
color_var = "age_days"

channel = "Business Insider"
df_plot = df_videos[df_videos["channel_title"] == channel]

n_colors = 100
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [to_hex(j) for j in  [cmap(i/n_colors) for i in np.array(range(n_colors))]]

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = df_plot[x_var],
        y = df_plot[y_var],
        mode = "markers",
        marker = {
            "size": 7,
            "color": df_plot[color_var],
            "colorscale": my_palette,
            "showscale": True,
            "colorbar": {
                "title": "<b>" + color_var_name + "</b>"
            }
        },        
        text = df_plot["video_title"],
        customdata = df_plot[color_var],
        hovertemplate = "<b>" + x_var_name + ": %{x:}<br>" +
                         y_var_name + ": %{y:}<br>" +
                         color_var_name + ": %{customdata}<br>" +
                         "Video: %{text}</b><extra></extra>"
    )
)
fig.update_layout(
    title = "<b>" + channel + "</b>" ,
    xaxis_title = "<b>" + x_var_name + "</b>" ,
    yaxis_title = "<b>" + y_var_name + "</b>",
    font = dict(
        size = 18
    ),
    plot_bgcolor = "white",
    hoverlabel = dict(
        font_size = 20,
        font_family = "Rockwell"
    )
)

In [78]:
# Scatter of likes and dislikes:
x_var_name = "Ages (days)"
y_var_name = "Comments-views ratio"
color_var_name = "Mean views per day"
size_var_name = "Likes-dislikes ratio"
x_var = "age_days"
y_var = "comments_views_ratio"
color_var = "mean_views_day"
size_var = "likes_dislikes_ratio"

channel = "Kurzgesagt – In a Nutshell"
df_plot = df_videos[df_videos["channel_title"] == channel]

n_colors = 100
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [to_hex(j) for j in  [cmap(i/n_colors) for i in np.array(range(n_colors))]]

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = df_plot[x_var],
        y = df_plot[y_var],
        mode = "markers",
        marker = {
            "color": df_plot[color_var],
            "colorscale": my_palette,
            "showscale": True,
            "colorbar": {
                "title": "<b>" + color_var_name + "</b>"
            },
            "size": df_plot[size_var],
            "opacity": 0.9,
            "sizemode": "area",
            "sizeref": 2.*max(df_plot[size_var])/(40.**2),
            "sizemin": 4            
        },
        text = df_plot["video_title"],
        customdata = df_plot[[color_var, size_var]],
        hovertemplate = "<b>" + x_var_name + ": %{x:.0f}<br>" +
                         y_var_name + ": %{y:.2g}<br>" +
                         color_var_name + ": %{customdata[0]:.0f}<br>" +
                         size_var_name + ": %{customdata[1]:.3f}<br>" +
                         "Video: %{text}</b><extra></extra>"
    )
)
fig.update_layout(
    title = "<b>" + channel + "</b>" ,
    xaxis_title = "<b>" + x_var_name + "</b>" ,
    yaxis_title = "<b>" + y_var_name + "</b>",
    font = dict(
        size = 18
    ),
    plot_bgcolor = "white",
    hoverlabel = dict(
        font_size = 20,
        font_family = "Rockwell"
    )
)

In [None]:
### Dynamic plots (choosing the channel, the variables and filters)

In [236]:
# Variables names:
numvars = {
    "views": "View",
    "likes": "Likes",
    "dislikes": "Dislikes",
    "comments": "Comments",
    "likes_dislikes_ratio": "Likes-dislikes ratio",
    "age_days": "Age (days)",
    "comments_views_ratio": "Comment-views ratio",
    "mean_views_day": "Mean views per day"
}
catvars = {
    "channel_title": "Channel",
    "video_title": "Video"
}

In [237]:
# Options:
opts_channel = ["All channels"] + np.sort(df_videos["channel_title"].unique()).tolist()
opts_vars = list(numvars.keys())
opts_vars = [i for i in numvars.keys()]
opts_vars.sort()

In [None]:
### 1D histogram with density



In [241]:
opts_channel

['All channels',
 '3Blue1Brown',
 'AWE me',
 'Adam Savage’s Tested',
 'Aero Por Trás da Aviação',
 'Anglo-Link',
 'Animalogic',
 'Ask a Mortician',
 'Astrum',
 'Atila Iamarino',
 'Aviões e Músicas com Lito Sousa',
 'Azusa Barbie',
 'BBC',
 'Bob Ross',
 'British Pathé',
 'Buenas Ideias',
 'Business Insider',
 'CNBC',
 'Canal do Schwarza',
 'CaspianReport',
 'Chaves Estranho',
 'Cheddar',
 'Classic Mr Bean',
 'Cole and Marmalade',
 'Computerphile',
 'Cradle Of Filth',
 'Curious Droid',
 'Código Fonte TV',
 'DW Euromaxx',
 'Dinosaurs',
 'Disney',
 'Disney Parks',
 'Earthling Ed',
 'European Space Agency, ESA',
 'Everyday Astronaut',
 'Fabio Chaves',
 'Fala Vegan',
 'Fancy Fairy Wings & Things',
 'Gary L. Francione',
 'Guinness World Records',
 'Hello Korea',
 'Huygens Optics',
 'Hydraulic Press Channel',
 'Integrando Conhecimento',
 'Intensivo Pedagógico',
 'Jacques Slade',
 'JamesRandiFoundation',
 'Jay Foreman',
 'Jessica in the Kitchen - Vegan Recipes',
 'Joe Scott',
 'Journey to the M

In [242]:
opts_vars

['age_days',
 'comments',
 'comments_views_ratio',
 'dislikes',
 'likes',
 'likes_dislikes_ratio',
 'mean_views_day',
 'views']

In [11]:
### 1D histogram with density



channel = opts_channel[0]
x_var = opts_vars[0]
nbins = 100
x_min = 10
x_max = 4000
plot_color = "#813DDA"

# Nice name:
x_var_name = numvars[x_var]

# Filter the channel:
if channel == "All channels":
    x_vals = df_videos[x_var].replace([np.inf, -np.inf], np.nan).dropna()
else:
    x_vals = df_videos.loc[df_videos["channel_title"] == channel, x_var].replace([np.inf, -np.inf], np.nan).dropna()

# Checks the range:
msng = ""
warning_msng = "*Choose valid filter values."
if x_min >= x_max or x_max < np.min(x_vals) or x_min > np.max(x_vals):
    msng = warning_msng
    pass
else:
    if x_min < np.min(x_vals):
        msng = warning_msng
        pass
    else:
        x_vals = x_vals[x_vals > x_min]
    if x_max > np.max(x_vals):
        msng = warning_msng
        pass
    else:
        x_vals = x_vals[x_vals < x_max]

# Bin size:
binsize = np.abs(np.max(x_vals) - np.min(x_vals))/nbins

# Plot:
fig = ff.create_distplot(
    hist_data = [x_vals],
    group_labels = ["x"],
    curve_type = "kde",
    show_curve = True,
    show_hist = True,
    bin_size = binsize,
    show_rug = False,
    colors = [plot_color]
)
fig.update_traces(
    hovertemplate = "<b>Density: %{y:.2g}<br>" + 
                    x_var_name + ": %{x:.0f}</b><extra></extra>"
)
fig.update_layout(
    title = msng,
    xaxis_title = "<b>" + x_var_name + "</b>" ,
    yaxis_title = "<b>Density</b>",
    font = dict(
        size = 18
    ),
    showlegend = False,
    plot_bgcolor = "white",
    hoverlabel = dict(
        font_size = 18,
        font_family = "Rockwell"
    )
)



In [245]:
plot_output = widgets.Output()

dropdown_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

slide_likes = widgets.IntSlider(
    description = "Likes: ",
    min = 1000,
    max = 1000000,
    step = 1000,
    value = 50000
)

fig = go.FigureWidget(data = [go.Histogram(x = df_videos["likes"])])


def filtering(chosen_channel, chosen_likes):
    # plot_output.clear_output()

    if chosen_channel == "All channels":
        df_filtered = df_videos[df_videos["likes"] > chosen_likes]
    else:
        df_filtered = df_videos[(df_videos["likes"] > chosen_likes) & (df_videos["channel_title"] == chosen_channel)]    
    

        
def dropdown_channels_eventhandler(change):
    filtering(chosen_channel = change.new,
              chosen_likes = slide_likes.value)

def slide_likes_eventhandler(change):
    filtering(chosen_channel = dropdown_channels.value,
              chosen_likes = change.new)

dropdown_channels.observe(dropdown_channels_eventhandler,
                          names = "value")
slide_likes.observe(slide_likes_eventhandler,
                    names = "value")

display(dropdown_channels)
display(slide_likes)
display(plot_output)

Dropdown(description='Channel: ', options=('All channels', '3Blue1Brown', 'AWE me', 'Adam Savage’s Tested', 'A…

IntSlider(value=50000, description='Likes: ', max=1000000, min=1000, step=1000)

Output()

In [244]:
df

Unnamed: 0,year,month,day,dep_time,dep_delay,arr_time,arr_delay,carrier,tailnum,flight,origin,dest,air_time,distance,hour,minute
0,2013,1,1,517.0,2.0,830.0,11.0,UA,N14228,1545,EWR,IAH,227.0,1400,5.0,17.0
1,2013,1,1,533.0,4.0,850.0,20.0,UA,N24211,1714,LGA,IAH,227.0,1416,5.0,33.0
2,2013,1,1,542.0,2.0,923.0,33.0,AA,N619AA,1141,JFK,MIA,160.0,1089,5.0,42.0
3,2013,1,1,544.0,-1.0,1004.0,-18.0,B6,N804JB,725,JFK,BQN,183.0,1576,5.0,44.0
4,2013,1,1,554.0,-6.0,812.0,-25.0,DL,N668DN,461,LGA,ATL,116.0,762,5.0,54.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336771,2013,9,30,,,,,9E,,3393,JFK,DCA,,213,,
336772,2013,9,30,,,,,9E,,3525,LGA,SYR,,198,,
336773,2013,9,30,,,,,MQ,N535MQ,3461,LGA,BNA,,764,,
336774,2013,9,30,,,,,MQ,N511MQ,3572,LGA,CLE,,419,,


In [243]:
df = pd.read_csv('https://raw.githubusercontent.com/yankev/testing/master/datasets/nycflights.csv')
df = df.drop(df.columns[[0]], axis=1)

month = widgets.IntSlider(
    value=1.0,
    min=1.0,
    max=12.0,
    step=1.0,
    description='Month:',
    continuous_update=False
)

use_date = widgets.Checkbox(
    description='Date: ',
    value=True,
)

container = widgets.HBox(children=[use_date, month])

textbox = widgets.Dropdown(
    description='Airline:   ',
    value='DL',
    options=df['carrier'].unique().tolist()
)

origin = widgets.Dropdown(
    options=list(df['origin'].unique()),
    value='LGA',
    description='Origin Airport:',
)


# Assign an empty figure widget
trace = go.Histogram(
    x = df['arr_delay']
)
g = go.FigureWidget(
    data = [trace],
    layout = go.Layout(
        title = dict(
            text = 'NYC FlightDatabase'
        ),
        barmode = 'overlay'
                    ))

def validate():
    if origin.value in df['origin'].unique() and textbox.value in df['carrier'].unique():
        return True
    else:
        return False


def response(change):
    if validate():
        if use_date.value:
            filter_list = [i and j and k for i, j, k in
                           zip(df['month'] == month.value, 
                               df['carrier'] == textbox.value,
                               df['origin'] == origin.value)]
            temp_df = df[filter_list]

        else:
            filter_list = [i and j for i, j in
                           zip(df['carrier'] == 'DL', 
                               df['origin'] == origin.value)]
            temp_df = df[filter_list]
        x1 = temp_df['arr_delay']
        x2 = temp_df['dep_delay']
        with g.batch_update():
            g.data[0].x = x1
            g.data[1].x = x2
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = 'Delay in Minutes'
            g.layout.yaxis.title = 'Number of Delays'

# Observes:
origin.observe(response, names = "value")
textbox.observe(response, names = "value")
month.observe(response, names = "value")
use_date.observe(response, names = "value")

# container:
container2 = widgets.HBox(
    [
        origin,
        textbox
    ]
)

# Main box:
widgets.VBox(
    [
        container,
        container2,
        g
    ]
)

VBox(children=(HBox(children=(Checkbox(value=True, description='Date: '), IntSlider(value=1, continuous_update…

In [250]:
### 1D histogram

# Variable:
x_var = "age_days"
x_var_name = "Age (days)"

# Filters:
dropdown_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

slide_bins = widgets.IntSlider(
    description = "Bins: ",
    min = 10,
    max = 1000,
    step = 10,
    value = 200
)

# Initialize an empty figure:
fig = go.FigureWidget(
    data = [
        go.Histogram(
            x = x_vals,
            histfunc = "count",
            nbinsx = 50,
            marker_color = "#813DDA",
            opacity = 0.9
        )
    ],
    layout = go.Layout(
        xaxis_title = "<b>" + x_var_name + "</b>" ,
        yaxis_title = "<b>Counts</b>",
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        )
    )
)


def filtering(chosen_channel, chosen_bins):
    if chosen_channel == "All channels":
        df_filtered = df_videos.copy()
    else:
        df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    x_vals = df_filtered[x_var]
    with fig.batch_update():
        fig.data[0].x = x_vals
        fig.data[0].nbinsx = chosen_bins


def dropdown_channels_eventhandler(change):
    filtering(chosen_channel = change.new,
              chosen_bins = slide_bins.value)

def slide_bins_eventhandler(change):
    filtering(chosen_channel = dropdown_channels.value,
              chosen_bins = change.new)

dropdown_channels.observe(dropdown_channels_eventhandler,
                          names = "value")
slide_bins.observe(slide_bins_eventhandler,
                   names = "value")


# Box of filters:
row_filters = widgets.HBox(
    [
        dropdown_channels,
        slide_bins
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)


VBox(children=(HBox(children=(Dropdown(description='Channel: ', options=('All channels', '3Blue1Brown', 'AWE m…

In [None]:
# Scatter with colors:

In [None]:
# Bubble with colors:

In [None]:
# 2D density:

In [None]:
# Heatmap numeric:

In [None]:
# Lines with 3 variables (the categoric being the channel, until a limit of say 5 channels):

In [None]:
# Correlation matrix continuous: