### *This notebook runs ok in Jupyter, but not so well in Google Colab, due to a possible bug in Colab interaction with ipywidgets and plotly.

# Imports and options

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from matplotlib.colors import LinearSegmentedColormap, to_hex
import ipywidgets as widgets
from scipy.stats import skew
import plotly.express as px

# Display options:
pd.set_option("display.width", 1200)
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 300)

# Dataset

In [2]:
# Read the data:
df_videos = pd.read_csv("data/videos_data.csv", sep = ";")

In [3]:
df_videos.head(5)

Unnamed: 0,channel_title,channel_id,video_title,video_id,video_upload_date,views,likes,dislikes,comments,age_days,likes_dislikes_ratio,comments_views_ratio,mean_views_day
0,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,How a Mandelbrot set arises from Newton’s work,LqbZpur38nw,2021-10-15 16:41:50+00:00,568689,26102,146,1216,8.0,178.780822,0.002138,71086.125
1,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,Newton's Fractal (which Newton knew nothing ab...,-RdOwhmqP5s,2021-10-07 02:19:39+00:00,1195545,61723,284,2810,16.0,217.334507,0.00235,74721.5625
2,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,The Summer of Math Exposition,ojjzXyQCzso,2021-07-16 15:37:16+00:00,610550,29300,215,1719,99.0,136.27907,0.002815,6167.171717
3,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,A quick trick for computing eigenvalues | Chap...,e50Bj7jn9IQ,2021-05-07 19:01:16+00:00,421914,17032,145,1146,169.0,117.462069,0.002716,2496.532544
4,3Blue1Brown,UCYO_jab_esuFRV4b17AJtAw,How (and why) to raise e to the power of a mat...,O85OWBJ2ayo,2021-04-01 06:38:35+00:00,1161654,46425,296,2586,205.0,156.841216,0.002226,5666.604878


In [4]:
# Int to float:
df_videos = df_videos.astype({
    "views": "float64",
    "likes": "float64",
    "dislikes": "float64",
    "comments": "float64"
})

In [5]:
df_videos.dtypes

channel_title            object
channel_id               object
video_title              object
video_id                 object
video_upload_date        object
views                   float64
likes                   float64
dislikes                float64
comments                float64
age_days                float64
likes_dislikes_ratio    float64
comments_views_ratio    float64
mean_views_day          float64
dtype: object

In [6]:
df_videos.describe()

Unnamed: 0,views,likes,dislikes,comments,age_days,likes_dislikes_ratio,comments_views_ratio,mean_views_day
count,109846.0,109846.0,109846.0,109846.0,109846.0,109846.0,109846.0,109846.0
mean,661028.6,13309.79,465.904,769.009704,1832.925086,56.087365,0.003138,971.0691
std,3376990.0,47332.49,7937.29,3076.690459,1226.744715,78.51294,0.006612,8986.223
min,1.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0007012623
25%,16229.25,227.0,10.0,24.0,883.0,12.797303,0.000647,9.232827
50%,67931.5,1356.0,40.0,120.0,1648.0,32.076923,0.001555,50.30444
75%,288042.5,7036.0,169.0,509.0,2570.0,68.630233,0.003415,294.6639
max,324479900.0,3061149.0,2384345.0,298356.0,5732.0,2635.0,0.440313,1030206.0


# Plots

## Barplot of the number of videos by channel

In [9]:
# Frequencie of videos:
video_counts = df_videos['channel_title'].value_counts()
df_plot = pd.DataFrame(
    {
        "channel": video_counts.index.tolist(),
        "freq": video_counts.tolist()
    }
)

# Relative frequency:
df_plot["freq_rel"] = [round(i/sum(df_plot["freq"])*100, 3) for i in df_plot["freq"]]
df_plot["freq_rel_char"] = [str(i) + "%" for i in df_plot["freq_rel"]]

# Palette:
n_levels = df_plot.shape[0]
cmap = LinearSegmentedColormap.from_list("my_palette", ["#111539", "#97A1D9"])
my_palette = [to_hex(j) for j in  [cmap(i/n_levels) for i in np.array(range(n_levels))]]

# Plot:
fig = px.bar(
    data_frame = df_plot,
    x = "channel",
    y = "freq",
    log_y = True,
    color = "channel",
    color_discrete_sequence = my_palette,
    text = "freq_rel_char"
)
fig.update_traces(
    textposition = "outside",
    textfont_color = my_palette[n_levels//2],
    textfont_size = 15,
    hovertemplate = "<b>Channel: %{x}<br>Frequency: %{y:}</b><extra></extra>"
)
fig.update_layout(
    xaxis_title = "<b>Channel</b>" ,
    yaxis_title = "<b>Number of Videos</b>",
    xaxis = dict(
        tickangle = 40
    ),
    font = dict(
        size = 18
    ),
    showlegend = False,
    plot_bgcolor = "white",
    hoverlabel = dict(
        font_size = 18,
        font_family = "Rockwell"
    ),
    margin = dict(
        l = 20,
        r = 20,
        t = 50,
        b = 20
    ),
    height = 600
)

# Dynamic plots

## Variables and options

In [10]:
# Variables names:
vars_names = {
    "Views": "views",
    "Likes": "likes",
    "Dislikes": "dislikes",
    "Comments": "comments",
    "Likes/dislikes": "likes_dislikes_ratio",
    "Age (days)": "age_days",
    "Comments/views": "comments_views_ratio",
    "Mean views/day": "mean_views_day"
}

# Options:
opts_channel = np.sort(df_videos["channel_title"].unique()).tolist()
opts_vars = list(vars_names.keys())
opts_vars = [i for i in vars_names.keys()]
opts_vars.sort()

# Custom hovers:
custom_vars = list(vars_names.values())
custom_template = "<b>Video title: %{text}<br>"
for i in range(0, len(custom_vars)):
    custom_template += list(vars_names.keys())[i] + ": %{customdata[" + str(i) + "]:}<br>"
custom_template = custom_template + "</b><extra></extra>"

## 1D histogram

In [11]:
# Variable and filters widgets:
input_xvar = widgets.Dropdown(
    description = "Variable: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[0]
)
input_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)
input_bins = widgets.IntSlider(
    description = "Bins: ",
    min = 10,
    max = 1000,
    step = 10,
    value = 200
)
input_xmin = widgets.FloatText(
    value = np.min(df_videos[vars_names[input_xvar.value]]),
    description = "Min: "
)
input_xmax = widgets.FloatText(
    value = np.max(df_videos[vars_names[input_xvar.value]]),
    description = "Max: "
)

# Statistics in the title:
def plot_title(x):
    title = "Filtered sample statistics<br>" +\
            "<b style = 'color: #900c3f'>Mean</b>: " + f"{np.mean(x):.7g}      " +\
            "<b style = 'color: #ffc300'>Median</b>: " + f"{np.median(x):.7g}      " +\
            "<b>Standard deviation</b>: " + f"{np.std(x):.7g}      " +\
            "<b>Skewness</b>: " + f"{skew(x):.3g}"
    return(title)

# Mean and median lines:
def vert_lines(x):
    vertical_lines = [
        {
            'line': {
                'color': '#900c3f',
                'dash': 'dash',
                'width': 2
            },
            'type': 'line',
            'x0': np.mean(x),
            'x1': np.mean(x),
            'xref': 'x',
            'y0': 0,
            'y1': 1,
            'yref': 'paper'
        },
        {
            'line': {
                'color': '#ffc300',
                'dash': 'dash',
                'width': 2
            },
            'type': 'line',
            'x0': np.median(x),
            'x1': np.median(x),
            'xref': 'x',
            'y0': 0,
            'y1': 1,
            'yref': 'paper'
        }
    ]
    return(vertical_lines)

# Initialize the figure:
x_init = df_videos.loc[df_videos["channel_title"] == opts_channel[0], vars_names[input_xvar.value]]
x_mean = np.mean(x_init)
x_median = np.median(x_init)
fig = go.FigureWidget(
    data = [
        go.Histogram(
            x = x_init,
            histfunc = "count",
            nbinsx = input_bins.value,
            marker_color = "#00baad",
            opacity = 0.9
        )
    ],
    layout = go.Layout(
        title = plot_title(x_init),
        xaxis_title = "<b>" + input_xvar.value + "</b>",
        yaxis_title = "<b>Counts</b>",
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 20,
            r = 20,
            t = 100,
            b = 20
        ),
        height = 600,
        shapes = vert_lines(x_init)
    )
)

# Filter and update function:
def filtering(chosen_xvar, chosen_channel, chosen_bins, chosen_xmin, chosen_xmax):
    # Filter by channel:
    df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    # Variable:
    x_vals = df_filtered[vars_names[chosen_xvar]]

    # Filter the range:
    if chosen_xmin >= chosen_xmax or chosen_xmax < np.min(x_vals) or chosen_xmin > np.max(x_vals):
        pass
    else:
        if chosen_xmin < np.min(x_vals):
            pass
        else:
            x_vals = x_vals[x_vals > chosen_xmin]
        if chosen_xmax > np.max(x_vals):
            pass
        else:
            x_vals = x_vals[x_vals < chosen_xmax]
    
    # Drop the nan:
    x_vals = x_vals.dropna()

    # Update the figure:
    with fig.batch_update():
        fig.data[0].x = x_vals
        fig.data[0].nbinsx = chosen_bins
        fig.layout.xaxis.title = "<b>" + chosen_xvar + "</b>"
        fig.layout.title = plot_title(x_vals)
        fig.layout.shapes = vert_lines(x_vals)

# Event handlers:
def eventhandler_xvar(change):
    filtering(chosen_xvar = change.new,
              chosen_channel = input_channels.value,
              chosen_bins = input_bins.value,
              chosen_xmin = input_xmin.value,
              chosen_xmax = input_xmax.value)
def eventhandler_channels(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_channel = change.new,
              chosen_bins = input_bins.value,
              chosen_xmin = input_xmin.value,
              chosen_xmax = input_xmax.value)
def eventhandler_bins(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_channel = input_channels.value,
              chosen_bins = change.new,
              chosen_xmin = input_xmin.value,
              chosen_xmax = input_xmax.value)
def eventhandler_xmin(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_channel = input_channels.value,
              chosen_bins = input_bins.value,
              chosen_xmin = change.new,
              chosen_xmax = input_xmax.value)
def eventhandler_xmax(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_channel = input_channels.value,
              chosen_bins = input_bins.value,
              chosen_xmin = input_xmin.value,
              chosen_xmax = change.new)

# Observes:
input_xvar.observe(eventhandler_xvar,
                   names = "value")
input_channels.observe(eventhandler_channels,
                       names = "value")
input_bins.observe(eventhandler_bins,
                   names = "value")
input_xmin.observe(eventhandler_xmin,
                   names = "value")
input_xmax.observe(eventhandler_xmax,
                   names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_xvar,
        input_channels,
        input_bins,
        input_xmin,
        input_xmax
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='Variable: ', options=('Views', 'Likes', 'Dislikes', 'Comme…

## 2D Density

In [12]:
# Variables and filters widgets:
input_xvar = widgets.Dropdown(
    description = "x: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[0]
)
input_yvar = widgets.Dropdown(
    description = "y: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[1]
)
input_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

# Palette:
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]

# Initialize the figure:
df_plot = df_videos[df_videos["channel_title"] == opts_channel[0]]
x_init = df_plot[vars_names[input_xvar.value]]
y_init = df_plot[vars_names[input_yvar.value]]
fig = go.FigureWidget(
    data = [
        go.Histogram2dContour(
            x = x_init,
            y = y_init,
            colorscale = my_colors,
            ncontours = 10,
            histnorm = "probability density",
            colorbar = dict(
                title = "<b>Density</b>"
            )
        )
    ],
    layout = go.Layout(
        xaxis_title = "<b>" + input_xvar.value + "</b>",
        yaxis_title = "<b>" + input_yvar.value + "</b>",
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 400,
            r = 20,
            t = 20,
            b = 20
        ),
        height = 600,
        width = 1100
    )
)

# Filter and update function:
def filtering(chosen_xvar, chosen_yvar, chosen_channel):
    # Filter by channel:
    df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    # Variables:
    x_vals = df_filtered[vars_names[chosen_xvar]]
    y_vals = df_filtered[vars_names[chosen_yvar]]

    # Update the figure:
    with fig.batch_update():
        fig.data[0].x = x_vals
        fig.data[0].y = y_vals
        fig.layout.xaxis.title = "<b>" + chosen_xvar + "</b>"
        fig.layout.yaxis.title = "<b>" + chosen_yvar + "</b>"

# Event handlers:
def eventhandler_xvar(change):
    filtering(chosen_xvar = change.new,
              chosen_yvar = input_yvar.value,
              chosen_channel = input_channels.value)
def eventhandler_yvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = change.new,
              chosen_channel = input_channels.value)
def eventhandler_channels(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_channel = change.new)

# Observes:
input_xvar.observe(eventhandler_xvar,
                   names = "value")
input_yvar.observe(eventhandler_yvar,
                   names = "value")
input_channels.observe(eventhandler_channels,
                       names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_xvar,
        input_yvar,
        input_channels
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='x: ', options=('Views', 'Likes', 'Dislikes', 'Comments', '…

## Scatter with colors

In [17]:
# Variables and filters widgets:
input_xvar = widgets.Dropdown(
    description = "x: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[0]
)
input_yvar = widgets.Dropdown(
    description = "y: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[1]
)
input_cvar = widgets.Dropdown(
    description = "color: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[2]
)
input_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

# Palette:
n_colors = 100
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [to_hex(j) for j in  [cmap(i/n_colors) for i in np.array(range(n_colors))]]

# Initialize the figure:
df_plot = df_videos[df_videos["channel_title"] == opts_channel[0]]
x_init = df_plot[vars_names[input_xvar.value]]
y_init = df_plot[vars_names[input_yvar.value]]
c_init = df_plot[vars_names[input_cvar.value]]
fig = go.FigureWidget(
    data = [
        go.Scatter(
            x = x_init,
            y = y_init,
            mode = "markers",
            marker = {
                "size": 7,
                "color": c_init,
                "colorscale": my_palette,
                "showscale": True,
                "colorbar": {
                    "title": "<b>" + input_cvar.value + "</b>"
                }
            },
            text = df_plot["video_title"],
            customdata = df_plot[custom_vars],
            hovertemplate = custom_template
        )
    ],
    layout = go.Layout(
        xaxis_title = "<b>" + input_xvar.value + "</b>",
        yaxis_title = "<b>" + input_yvar.value + "</b>",
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 20,
            r = 20,
            t = 20,
            b = 20
        ),
        height = 600
    )
)

# Filter and update function:
def filtering(chosen_xvar, chosen_yvar, chosen_cvar, chosen_channel):
    # Filter by channel:
    df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    # Variables:
    x_vals = df_filtered[vars_names[chosen_xvar]]
    y_vals = df_filtered[vars_names[chosen_yvar]]
    c_vals = df_filtered[vars_names[chosen_cvar]]

    # Update the figure:
    with fig.batch_update():
        fig.data[0].x = x_vals
        fig.data[0].y = y_vals
        fig.layout.xaxis.title = "<b>" + chosen_xvar + "</b>"
        fig.layout.yaxis.title = "<b>" + chosen_yvar + "</b>"
        fig.data[0].marker.color = c_vals
        fig.data[0].marker.colorbar.title = "<b>" + chosen_cvar + "</b>"
        fig.data[0].text = df_filtered["video_title"]
        fig.data[0].customdata = df_filtered[custom_vars]

# Event handlers:
def eventhandler_xvar(change):
    filtering(chosen_xvar = change.new,
              chosen_yvar = input_yvar.value,
              chosen_cvar = input_cvar.value,
              chosen_channel = input_channels.value)
def eventhandler_yvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = change.new,
              chosen_cvar = input_cvar.value,
              chosen_channel = input_channels.value)
def eventhandler_cvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_cvar = change.new,
              chosen_channel = input_channels.value)
def eventhandler_channels(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_cvar = input_cvar.value,
              chosen_channel = change.new)

# Observes:
input_xvar.observe(eventhandler_xvar,
                   names = "value")
input_yvar.observe(eventhandler_yvar,
                   names = "value")
input_cvar.observe(eventhandler_cvar,
                   names = "value")
input_channels.observe(eventhandler_channels,
                       names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_xvar,
        input_yvar,
        input_cvar,
        input_channels
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='x: ', options=('Views', 'Likes', 'Dislikes', 'Comments', '…

## Bubble with colors

In [16]:
# Variables and filters widgets:
input_xvar = widgets.Dropdown(
    description = "x: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[0]
)
input_yvar = widgets.Dropdown(
    description = "y: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[1]
)
input_cvar = widgets.Dropdown(
    description = "color: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[2]
)
input_svar = widgets.Dropdown(
    description = "size: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[3]
)
input_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

# Palette:
n_colors = 100
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [to_hex(j) for j in  [cmap(i/n_colors) for i in np.array(range(n_colors))]]

# Dynamic bubble size:
def size_func(s_vals):
    ref_size = max(s_init)/(20**2)
    return(ref_size)

# Initialize the figure:
df_plot = df_videos[df_videos["channel_title"] == opts_channel[0]]
x_init = df_plot[vars_names[input_xvar.value]]
y_init = df_plot[vars_names[input_yvar.value]]
c_init = df_plot[vars_names[input_cvar.value]]
s_init = df_plot[vars_names[input_svar.value]]
fig = go.FigureWidget(
    data = [
        go.Scatter(
            x = x_init,
            y = y_init,
            mode = "markers",
            marker = {
                "color": c_init,
                "colorscale": my_palette,
                "showscale": True,
                "colorbar": {
                    "title": "<b>" + input_cvar.value + "</b>"
                },
                "size": s_init,
                "opacity": 0.9,
                "sizemode": "area",
                "sizeref": size_func(s_init),
                "sizemin": 2
            },
            text = df_plot["video_title"],
            customdata = df_plot[custom_vars],
            hovertemplate = custom_template
        )
    ],
    layout = go.Layout(
        xaxis_title = "<b>" + input_xvar.value + "</b>",
        yaxis_title = "<b>" + input_yvar.value + "</b>",
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 20,
            r = 20,
            t = 20,
            b = 20
        ),
        height = 600
    )
)

# Filter and update function:
def filtering(chosen_xvar, chosen_yvar, chosen_cvar, chosen_svar, chosen_channel):
    # Filter by channel:
    df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    # Variables:
    x_vals = df_filtered[vars_names[chosen_xvar]]
    y_vals = df_filtered[vars_names[chosen_yvar]]
    c_vals = df_filtered[vars_names[chosen_cvar]]
    s_vals = df_filtered[vars_names[chosen_svar]]

    # Update the figure:
    with fig.batch_update():
        fig.data[0].x = x_vals
        fig.data[0].y = y_vals
        fig.layout.xaxis.title = "<b>" + chosen_xvar + "</b>"
        fig.layout.yaxis.title = "<b>" + chosen_yvar + "</b>"
        fig.data[0].marker.color = c_vals
        fig.data[0].marker.colorbar.title = "<b>" + chosen_cvar + "</b>"
        fig.data[0].text = df_filtered["video_title"]
        fig.data[0].customdata = df_filtered[custom_vars]
        fig.data[0].marker.size = s_vals
        fig.data[0].marker.size = size_func(s_vals)

# Event handlers:
def eventhandler_xvar(change):
    filtering(chosen_xvar = change.new,
              chosen_yvar = input_yvar.value,
              chosen_cvar = input_cvar.value,
              chosen_svar = input_svar.value,
              chosen_channel = input_channels.value)
def eventhandler_yvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = change.new,
              chosen_cvar = input_cvar.value,
              chosen_svar = input_svar.value,
              chosen_channel = input_channels.value)
def eventhandler_cvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_cvar = change.new,
              chosen_svar = input_svar.value,
              chosen_channel = input_channels.value)
def eventhandler_svar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_cvar = input_cvar.value,
              chosen_svar = change.new,
              chosen_channel = input_channels.value)
def eventhandler_channels(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_cvar = input_cvar.value,
              chosen_svar = input_svar.value,
              chosen_channel = change.new)

# Observes:
input_xvar.observe(eventhandler_xvar,
                   names = "value")
input_yvar.observe(eventhandler_yvar,
                   names = "value")
input_cvar.observe(eventhandler_cvar,
                   names = "value")
input_svar.observe(eventhandler_svar,
                   names = "value")
input_channels.observe(eventhandler_channels,
                       names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_xvar,
        input_yvar,
        input_cvar,
        input_svar,
        input_channels
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='x: ', options=('Views', 'Likes', 'Dislikes', 'Comments', '…

## Scatter to compare 2 channels

In [19]:
# Variables and filters widgets:
input_xvar = widgets.Dropdown(
    description = "x: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[0]
)
input_yvar = widgets.Dropdown(
    description = "y: ",
    options = list(vars_names.keys()),
    value = list(vars_names.keys())[1]
)
input_channel1 = widgets.Dropdown(
    description = "Channel 1: ",
    options = opts_channel,
    value = opts_channel[0]
)
input_channel2 = widgets.Dropdown(
    description = "Channel 2: ",
    options = opts_channel,
    value = opts_channel[1]
)

# Initialize the figure:
df_plot1 = df_videos[df_videos["channel_title"] == opts_channel[0]]
x_init1 = df_plot1[vars_names[input_xvar.value]]
y_init1 = df_plot1[vars_names[input_yvar.value]]
df_plot2 = df_videos[df_videos["channel_title"] == opts_channel[1]]
x_init2 = df_plot2[vars_names[input_xvar.value]]
y_init2 = df_plot2[vars_names[input_yvar.value]]

fig = go.FigureWidget(
    data = [
        go.Scatter(
            x = x_init1,
            y = y_init1,
            mode = "markers",
            marker = {
                "size": 7,
                "color": "#2a7b9b"
            },
            name = opts_channel[0],
            text = df_plot1["video_title"],
            customdata = df_plot1[custom_vars],
            hovertemplate = custom_template
        ),
        go.Scatter(
            x = x_init1,
            y = y_init1,
            mode = "markers",
            marker = {
                "size": 7,
                "color": "#ff8d1a"
            },
            name = opts_channel[1],
            text = df_plot2["video_title"],
            customdata = df_plot2[custom_vars],
            hovertemplate = custom_template
        )
    ],
    layout = go.Layout(
        xaxis_title = "<b>" + input_xvar.value + "</b>",
        yaxis_title = "<b>" + input_yvar.value + "</b>",
        font = dict(
            size = 18
        ),
        showlegend = True,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 20,
            r = 20,
            t = 20,
            b = 20
        ),
        height = 600
    )
)

# Filter and update function:
def filtering(chosen_xvar, chosen_yvar, chosen_channel1, chosen_channel2):
    # Filter by channel:
    df_filtered1 = df_videos.copy()[df_videos["channel_title"] == chosen_channel1]
    df_filtered2 = df_videos.copy()[df_videos["channel_title"] == chosen_channel2]
    
    # Variables:
    x_vals1 = df_filtered1[vars_names[chosen_xvar]]
    y_vals1 = df_filtered1[vars_names[chosen_yvar]]
    x_vals2 = df_filtered2[vars_names[chosen_xvar]]
    y_vals2 = df_filtered2[vars_names[chosen_yvar]]

    # Update the figure:
    with fig.batch_update():
        fig.data[0].x = x_vals1
        fig.data[0].y = y_vals1
        fig.data[1].x = x_vals2
        fig.data[1].y = y_vals2
        fig.layout.xaxis.title = "<b>" + chosen_xvar + "</b>"
        fig.layout.yaxis.title = "<b>" + chosen_yvar + "</b>"
        fig.data[0].name = chosen_channel1
        fig.data[1].name = chosen_channel2
        fig.data[0].text = df_filtered1["video_title"]
        fig.data[0].customdata = df_filtered1[custom_vars]
        fig.data[1].text = df_filtered2["video_title"]
        fig.data[1].customdata = df_filtered2[custom_vars]

# Event handlers:
def eventhandler_xvar(change):
    filtering(chosen_xvar = change.new,
              chosen_yvar = input_yvar.value,
              chosen_channel1 = input_channel1.value,
              chosen_channel2 = input_channel2.value)
def eventhandler_yvar(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = change.new,
              chosen_channel1 = input_channel1.value,
              chosen_channel2 = input_channel2.value)
def eventhandler_channel1(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_channel1 = change.new,
              chosen_channel2 = input_channel2.value)
def eventhandler_channel2(change):
    filtering(chosen_xvar = input_xvar.value,
              chosen_yvar = input_yvar.value,
              chosen_channel1 = input_channel1.value,
              chosen_channel2 = change.new)

# Observes:
input_xvar.observe(eventhandler_xvar,
                   names = "value")
input_yvar.observe(eventhandler_yvar,
                   names = "value")
input_channel1.observe(eventhandler_channel1,
                       names = "value")
input_channel2.observe(eventhandler_channel2,
                       names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_xvar,
        input_yvar,
        input_channel1,
        input_channel2
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='x: ', options=('Views', 'Likes', 'Dislikes', 'Comments', '…

## Correlation matrix

In [18]:
# Variables and filters widgets:
input_channels = widgets.Dropdown(
    description = "Channel: ",
    options = opts_channel,
    value = opts_channel[0]
)

# Palette:
n_colors = 100
my_colors = ["#000000", "#E008F8", "#F81D08", "#F88A08", "#F7FE04"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [to_hex(j) for j in  [cmap(i/n_colors) for i in np.array(range(n_colors))]]

# Initialize the figure:
xy_names = list(vars_names.keys())
df_plot = df_videos[df_videos["channel_title"] == opts_channel[0]]
df_plot = df_plot[list(vars_names.values())]
corr_init = df_plot.corr()
fig = go.FigureWidget(
    data = [
        go.Heatmap(
            x = xy_names,
            y = xy_names,
            z = corr_init,
            colorscale = my_palette,
            colorbar = dict(
                title = "<b>Pearson correlation </b>"
            ),
            zmin = -1,
            zmax = 1,
            hovertemplate = "<b>" +
                            "%{x}<br>" +
                            "%{y}</br>" +
                            "Correlation: %{z:, }</b><extra></extra>"
        )
    ],
    layout = go.Layout(
        font = dict(
            size = 18
        ),
        showlegend = False,
        plot_bgcolor = "white",
        hoverlabel = dict(
            font_size = 18,
            font_family = "Rockwell"
        ),
        margin = dict(
            l = 400,
            r = 20,
            t = 20,
            b = 20
        ),
        height = 600,
        width = 1100
    )
)

# Filter and update function:
def filtering(chosen_channel):
    # Filter by channel:
    df_filtered = df_videos.copy()[df_videos["channel_title"] == chosen_channel]
    
    # Variables:
    df_filtered = df_filtered[list(vars_names.values())]
    corr_vals = df_filtered.corr()

    # Update the figure:
    with fig.batch_update():
        fig.data[0].z = corr_vals

# Event handlers:
def eventhandler_channels(change):
    filtering(chosen_channel = change.new)

# Observes:
input_channels.observe(eventhandler_channels,
                       names = "value")

# Row of filters:
row_filters = widgets.HBox(
    [
        input_channels
    ]
)

# Main box:
widgets.VBox(
    [
        row_filters,
        fig
    ]
)

VBox(children=(HBox(children=(Dropdown(description='Channel: ', options=('3Blue1Brown', 'AWE me', 'Adam Savage…