In [10]:
# Import reusable path setup
from setup_paths import setup_project_paths

# Get project paths
paths = setup_project_paths()

RAW_DIR = paths["RAW_DIR"]
PROCESSED_DIR = paths["PROCESSED_DIR"]

In [2]:
# 03_data_analysis.ipynb
import os
import pandas as pd


In [3]:
# Load cleaned data safely
clean_csv_path = 'data/processed/trending_videos_clean.csv'
if not os.path.exists(clean_csv_path):
    raise FileNotFoundError(f"{clean_csv_path} not found. Please run 02_data_cleaning.ipynb first.")

df = pd.read_csv(clean_csv_path)

In [4]:
# Descriptive statistics
print("Summary Statistics:")
print(df.describe())

Summary Statistics:
              views         likes      comments  category_id  title_length  \
count  5.000000e+01      50.00000     50.000000    50.000000     50.000000   
mean   6.726700e+05   28570.92000   1737.540000    16.040000     42.280000   
std    1.072167e+06   52379.81878   2708.224972     6.809237     15.502521   
min    1.603800e+04     541.00000      3.000000     1.000000     15.000000   
25%    1.545320e+05    4791.50000    356.000000    10.000000     32.000000   
50%    2.839725e+05   11416.50000    861.000000    20.000000     40.000000   
75%    6.040080e+05   25365.25000   2345.500000    20.000000     50.750000   
max    5.230773e+06  318573.00000  16980.000000    24.000000     89.000000   

       engagement_ratio  
count         50.000000  
mean           0.058766  
std            0.049100  
min            0.002040  
25%            0.018538  
50%            0.042257  
75%            0.083288  
max            0.238994  


In [5]:
# Top 10 videos by views
top_views = df.sort_values('views', ascending=False).head(10)
print("\nTop 10 Videos by Views:")
print(top_views[['title', 'channel', 'views']])


Top 10 Videos by Views:
                                                title               channel  \
1                 Reminders of Him | Official Trailer    Universal Pictures   
27  BOYFRIEND(MUSIC VIDEO) KARAN AUJLA| SUNANDA | ...           Karan Aujla   
5                   never should've played this again          CoryxKenshin   
2           Taco Tuesday Admin Abuse + New BRAINROTS!            CaylusBlox   
23                       steal a brainrot admin abuse            KreekCraft   
29                                 No, I Am Not Human            Markiplier   
24  Ayra Starr, Rema - Who’s Dat Girl (Official Mu...         AyraStarrVEVO   
8                    Steal A Brainrot WANTS ME DEAD..                Foltyn   
15  Madison Beer - bittersweet (Official Music Video)  MadisonBeerMusicVEVO   
37                      People are freaking out now..       Asmongold Clips   

      views  
1   5230773  
27  4461926  
5   3733843  
2   2008316  
23  1782526  
29  1474018  
24  100

In [6]:
# Top 10 videos by engagement ratio
top_engagement = df.sort_values('engagement_ratio', ascending=False).head(10)
print("\nTop 10 Videos by Engagement Ratio:")
print(top_engagement[['title', 'channel', 'engagement_ratio']])


Top 10 Videos by Engagement Ratio:
                                                title               channel  \
30              Jezzy, Arcangel - Que Sensación Remix         Jezzy El Chef   
40                      Upchurch - Throw Away (Audio)         Ryan Upchurch   
34                   “Gabriela” with Laufey | KATSEYE               KATSEYE   
42    Liltwin - Letter to Jack [Official Music Video]               Liltwin   
18          LE SSERAFIM (르세라핌) 'SPAGHETTI' MV TEASER🍅           HYBE LABELS   
9   Blo Ft Lil Baby - Kiss & Tell  (prod. By 2havinn)                   Blo   
10  Character Trailer - "Nefer: Shadowbearing Serp...        Genshin Impact   
21       T-Pain - Club Husband (Official Music Video)                T Pain   
32   Puscifer - "Self Evident" (Official Music Video)        pusciferdotcom   
15  Madison Beer - bittersweet (Official Music Video)  MadisonBeerMusicVEVO   

    engagement_ratio  
30          0.238994  
40          0.171415  
34          0.154961  
42

In [7]:
# Correlation analysis
print("\nCorrelation Matrix:")
print(df[['views','likes','comments','title_length','engagement_ratio']].corr())


Correlation Matrix:
                     views     likes  comments  title_length  engagement_ratio
views             1.000000  0.639756  0.563285      0.035233         -0.178753
likes             0.639756  1.000000  0.947053      0.073061          0.208136
comments          0.563285  0.947053  1.000000      0.066119          0.215105
title_length      0.035233  0.073061  0.066119      1.000000         -0.018781
engagement_ratio -0.178753  0.208136  0.215105     -0.018781          1.000000
