# Basic data description and analysis

New dataset (25/01/2024)

In [32]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import sys

sys.path.append("..")

from toxmon.utils import generate_latex_table, format_with_commas

In [34]:
import pandas as pd

df_channels = pd.read_pickle("../data/current_data/df_channels.pkl")
df_videos = pd.read_pickle("../data/current_data/df_videos.pkl")
channel_list = pd.read_json("../data/channels_list.json")

In [35]:
df_channels["category"] = df_channels["title"].map(
    channel_list.set_index("name")["drama_category"].to_dict()
)

In [40]:
channel_desc = df_channels[
    ["title", "category", "view_count", "sub_count", "video_count"]
].copy()

columns_rename_map = {
    "title": "Channel",
    "category": "Category",
    "view_count": "\# Views",
    "sub_count": "\# Subscribers",
    "video_count": "\# Videos",
    # "reddit_videos": "\# Videos with Reddit Threads",
}

index_rename_map = {"title": "Channel"}

# Apply formatting to numeric columns
numeric_columns = ["view_count", "sub_count", "video_count"]
channel_desc = channel_desc.astype({col: int for col in numeric_columns})
channel_desc_table = channel_desc.sort_values("sub_count", ascending=False)
channel_desc_table[numeric_columns] = channel_desc_table[numeric_columns].applymap(
    format_with_commas
)

generate_latex_table(
    channel_desc_table,
    "Top 10 YouTube Channels with Reddit Threads",
    "tab:top_10_channels",
    columns_rename_map=columns_rename_map,
)

In [None]:
\begin{table}
\centering
\caption{Top 10 YouTube Channels with Reddit Threads}
\label{tab:top_10_channels}
\begin{tabular}{lccccc}
\toprule
\bfseries Channel & \bfseries Category & \bfseries \# Views & \bfseries \# Subscribers & \# Videos \\
\midrule
SSSniperWolf & Spike & 24,182,326,701 & 34,100,000 & 3,452 \\
James Charles & Popular & 4,213,732,824 & 23,900,000 & 538 \\
Logan Paul & Popular & 5,997,708,386 & 23,600,000 & 716 \\
Jake Paul & Popular & 7,438,794,846 & 20,500,000 & 1,148 \\
JennaMarbles & Popular & 1,816,664,262 & 19,700,000 & 250 \\
shane & Popular & 4,346,185,264 & 19,100,000 & 571 \\
David Dobrik & Popular & 7,208,182,928 & 17,700,000 & 536 \\
jeffreestar & Popular & 2,585,316,202 & 15,800,000 & 433 \\
Colleen Ballinger & Spike & 1,918,346,683 & 8,410,000 & 1,091 \\
The Gabbie Show & Popular & 134,197,836 & 5,150,000 & 46 \\
blndsundoll4mj & Popular & 953,678,678 & 5,100,000 & 2,259 \\
boogie2988 & Spike & 928,068,656 & 4,020,000 & 2,339 \\
Nikocado Avocado & Popular & 778,692,146 & 3,730,000 & 706 \\
The Completionist & Spike & 335,850,138 & 1,620,000 & 699 \\
iilluminaughtii & Spike & 254,152,195 & 1,310,000 & 633 \\
JessiSmiles & Spike & 107,469,141 & 995,000 & 189 \\
Yumi King & Spike & 147,378,377 & 851,000 & 1,087 \\
nickisnotgreen & Spike & 48,417,915 & 690,000 & 128 \\
Life Plus Cindy & Spike & 3,763,757 & 23,600 & 226 \\
lil lunchbox & Spike & 1,072,254 & 6,840 & 308 \\
\bottomrule
\end{tabular}
\end{table}
