In [1]:
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output, State

import plotly.express as px
import plotly.figure_factory as ff

import pandas as pd
import numpy as np

In [2]:
temp_col = "plotly_dark"
fig_color = px.colors.cyclical.Twilight
# fig_color = px.colors.diverging.Geyser

In [3]:
css_link ="https://codepen.io/chriddyp/pen/bWLwgP.css"
app = dash.Dash(__name__, external_stylesheets =[css_link])

Read Data

In [4]:
df = pd.read_csv("../Files/data.csv")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120746 entries, 0 to 120745
Data columns (total 14 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   trending_date  120746 non-null  object
 1   category_id    120746 non-null  int64 
 2   publish_time   120746 non-null  object
 3   tags           120746 non-null  object
 4   views          120746 non-null  int64 
 5   likes          120746 non-null  int64 
 6   dislikes       120746 non-null  int64 
 7   comment_count  120746 non-null  int64 
 8   Country        120746 non-null  object
 9   category       120746 non-null  object
 10  Year           120746 non-null  int64 
 11  Month          120746 non-null  int64 
 12  Day            120746 non-null  int64 
 13  Weekday        120746 non-null  object
dtypes: int64(8), object(6)
memory usage: 12.9+ MB


Prepare Numerical columns

In [6]:
numerical_cols = ['views','likes','dislikes','comment_count']
log_numerical_cols = []

for col in numerical_cols:
    df[col.upper()] = np.log(df[col] + 1)
    log_numerical_cols.append(col.upper())


Heatmap of the correaltion between important features

In [7]:
z = df[numerical_cols].corr().to_numpy()
z = np.around(z, decimals=2)[::-1]
cor_fig = ff.create_annotated_heatmap(z,x=numerical_cols[::-1], y=numerical_cols, colorscale=fig_color)

cor_fig['layout']['yaxis']['tickfont']['color'] = "white"
cor_fig['layout']['xaxis']['tickfont']['color'] = "white"

Line Plot of date vs (views, count)

In [8]:
df1 = df.groupby(["Country", "trending_date"], as_index=False).sum()
fig_line = px.line(df1, x="trending_date", y="views", color="Country", template= temp_col,color_discrete_sequence=fig_color)

Scatter Plot between Publish_Time vs Trending Date

In [9]:
fig_pub = px.scatter(df,x='trending_date',y="publish_time", template= temp_col,color_discrete_sequence=fig_color)

HTML Page Code

In [10]:
app.layout = html.Div(
    [
        html.H4("Trending YouTube Video Statistics"),
                
        html.Div(
            [
                html.Div(
                    [
                        html.P("Features Distribution."),
                        dcc.RadioItems(
                            id='radio',
                            options=[
                                     {'label': 'views', 'value': 'VIEWS'},
                                     {'label': 'likes', 'value': 'LIKES'},
                                     {'label': 'dislikes', 'value': 'DISLIKES'},
                                     {'label': 'comments count', 'value': 'COMMENT_COUNT'},
                                     {'label': 'All', 'value': 'all'}
                                    ],
                            value='all',
                        ),
                        dcc.Graph(id='graph'),
                        
                    ]
                ),
                html.Div(
                    [
                        html.P("Correaltion Between Numerical Features."),
                        dcc.Graph(figure=cor_fig)
                    ], className= "heatmap"
                ),
                html.Div(
                    [
                        html.P("Time Scale."),
                        dcc.Dropdown(
                            id='dropdown1',
                            options=[{'label': i, 'value': i} for i in ["Year", "Month", "Day", "Weekday"]],
                            value="Year"
                        ),
                        html.P("Trending Videos Over Time."),
                        dcc.Graph(id='my_figure'),
                    ]
                ),
                
                
            ],className= "con"
        ),   
        
        html.Div(
            [
                html.Div(
                    [
                        html.P("Views Over Time"),
                        dcc.Graph(figure = fig_line),
                    ]
                ),
                html.Div(
                    [
                        html.P("Trending Date VS Publish Date"),
                        dcc.Graph(figure = fig_pub)
                    ]
                ),
                
                
                
            ],className= "con"
        ),   
        
        html.Div(
            [
                
                html.Div(
                    [
                        html.P("Statistics About Each Category."),
                        dcc.Graph( id = "cat_figure"),
                        html.Div(
                            [
                                html.P("Select Feature"),
                                dcc.Dropdown(
                                    id='cat_dropdown',
                                    options=[{'label': i, 'value': i} for i in ["Count Videos", "views", "likes", "dislikes", 'comment_count']],
                                    value="Count Videos"
                                ),
                            ], className = "drop_con"
                        ),
                        
                        
                    ]
                ),
                
            ], className= "con"
        ),  
        html.Div(
            [
               html.Div(
                    [
                        html.Span("copyright ©"),
                        html.A("Noha Saeed", href="https://www.linkedin.com/in/noha-saeed-762142205/"),
                        html.A("Sara Zaky", href="https://www.linkedin.com/in/sarazaky/"),
                    ]
                ),
                html.Div(
                    [
                        html.Span("GitHub: "),
                        html.A("Trending YouTube Video", href="https://github.com/sarazaky/Trending-YouTube-Video"),
                    ]
                ), 
            ], className= "footer"
        )
        
    ]
)

Distribution of Numerical feature

In [11]:
@app.callback(
    Output('graph', 'figure'),
    [Input(component_id='radio', component_property='value')]
)
def update_dis_fig(radio_value):
    if radio_value == "all":
        fig_distribution = px.histogram(df, x=log_numerical_cols,color_discrete_sequence=fig_color, template= temp_col)
    else :
        fig_distribution = px.histogram(df, x=radio_value,color_discrete_sequence=fig_color, template= temp_col)
        
    return fig_distribution

NO of Trending videos in each category for each country

In [12]:
@app.callback(
    Output(component_id="cat_figure", component_property="figure"),
    
    Input(component_id="cat_dropdown", component_property="value") 
)
def update_cat_div(drop_value):
    if drop_value == "Count Videos":
        fig = px.histogram(df, x='category',color='Country', color_discrete_sequence=fig_color,  template= temp_col)
    else:
        fig = px.histogram(df, x='category', y=drop_value, color='Country', 
                           color_discrete_sequence=fig_color, template= temp_col)
    return fig

No. of Rows for each Time Period

In [13]:
@app.callback(
    Output(component_id="my_figure", component_property="figure"),
    
    Input(component_id="dropdown1", component_property="value") 
)
def update_my_div(drop_value):
    fig = px.histogram(df, x=drop_value, color='Country', color_discrete_sequence=fig_color, template= temp_col)
    return fig

In [None]:
app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [28/Oct/2021 15:40:14] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:15] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:15] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:16] "[37mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:16] "[37mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:16] "[37mGET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:17] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:17] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:40:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Oct/2021 15:41:58] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0