In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import streamlit as st
from datetime import datetime

In [6]:
@st.cache
def load_data():
    """ Loads in 4 dataframes and does light feature engineering"""
    df_agg = pd.read_csv('data/Aggregated_Metrics_By_Video.csv').iloc[1:,:]
    df_agg.columns = ['Video','Video title','Video publish time','Comments added','Shares','Dislikes','Likes',
                      'Subscribers lost','Subscribers gained','RPM(USD)','CPM(USD)','Average % viewed','Average view duration',
                      'Views','Watch time (hours)','Subscribers','Your estimated revenue (USD)','Impressions','Impressions ctr(%)']
    df_agg['Video publish time'] = pd.to_datetime(df_agg['Video publish time'])
    df_agg['Average view duration'] = df_agg['Average view duration'].apply(lambda x: datetime.strptime(x,'%H:%M:%S'))
    df_agg['Avg_duration_sec'] = df_agg['Average view duration'].apply(lambda x: x.second + x.minute*60 + x.hour*3600)
    df_agg['Engagement_ratio'] =  (df_agg['Comments added'] + df_agg['Shares'] +df_agg['Dislikes'] + df_agg['Likes']) /df_agg.Views
    df_agg['Views / sub gained'] = df_agg['Views'] / df_agg['Subscribers gained']
    df_agg.sort_values('Video publish time', ascending = False, inplace = True)    
    df_agg_sub = pd.read_csv('data/Aggregated_Metrics_By_Country_And_Subscriber_Status.csv')
    df_comments = pd.read_csv('data/Aggregated_Metrics_By_Video.csv')
    df_time = pd.read_csv('data/Video_Performance_Over_Time.csv')
    df_time['Date'] = pd.to_datetime(df_time['Date'])
    return df_agg, df_agg_sub, df_comments, df_time 

df_agg, df_agg_sub, df_comments, df_time = load_data()

In [7]:
df_agg

Unnamed: 0,Video,Video title,Video publish time,Comments added,Shares,Dislikes,Likes,Subscribers lost,Subscribers gained,RPM(USD),...,Average view duration,Views,Watch time (hours),Subscribers,Your estimated revenue (USD),Impressions,Impressions ctr(%),Avg_duration_sec,Engagement_ratio,Views / sub gained
111,0jTtHYie3CU,Should You Be Excited About Web 3? (As a Data ...,2022-01-17,37,43,8,267,14,18,4.055,...,1900-01-01 00:02:38,4383,192.5779,4,16.549,65130,2.95,158,0.080995,243.500000
187,2RWwN5ZT4tA,Should @Luke Barousse Take This Data Analyst ...,2022-01-14,12,2,3,78,1,1,1.882,...,1900-01-01 00:00:38,2401,25.9375,0,1.720,25094,2.64,38,0.039567,2401.000000
64,rEWPqw6rMGI,The Only Data Science Explanation You Need,2022-01-10,62,141,5,722,28,136,5.971,...,1900-01-01 00:04:40,10277,801.5549,108,60.498,215491,2.22,280,0.090493,75.566176
59,o-wsyxWbPOw,We Need to Talk About The LinkedIn Machine Lea...,2022-01-03,65,36,12,592,10,78,5.321,...,1900-01-01 00:02:46,11808,545.6332,68,62.568,166915,3.32,166,0.059705,151.384615
32,xpIFS6jZbe8,How I Would Learn Data Science in 2022 (If I H...,2021-12-27,109,767,53,4413,46,2553,6.836,...,1900-01-01 00:04:29,79283,5945.5420,2507,528.286,1420968,3.31,269,0.067379,31.054837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,RRSRKf9eQxc,Should You Get A Masters in Data Science?,2018-11-14,56,41,10,276,2,81,7.398,...,1900-01-01 00:02:32,18488,782.5937,79,136.708,173610,8.40,152,0.020716,228.246914
190,IFceyuL6GZY,How I Became A Data Scientist From a Business ...,2018-11-12,11,33,4,168,0,81,4.419,...,1900-01-01 00:03:57,5515,363.4858,81,24.358,58816,5.72,237,0.039166,68.086420
204,Y_SMU701qlA,Predicting Season Long NBA Wins Using Multiple...,2018-07-10,7,45,2,159,1,34,2.883,...,1900-01-01 00:02:25,6863,276.7257,33,19.772,53865,4.03,145,0.031036,201.852941
138,qfRhKHV8-t4,Predicting Crypto-Currency Price Using RNN lST...,2017-11-18,28,114,18,247,1,111,1.326,...,1900-01-01 00:01:45,16558,487.2194,110,21.944,168508,5.65,105,0.024580,149.171171
