In [10]:
# 1) Load env vars and create engine
import os
from dotenv import load_dotenv
import pandas as pd
from sqlalchemy import create_engine

load_dotenv()

pg_user     = os.getenv("PG_USER")
pg_password = os.getenv("PG_PASSWORD")
pg_host     = os.getenv("PG_HOST")
pg_db       = os.getenv("PG_DB")

conn_str = f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}/{pg_db}"
engine   = create_engine(conn_str)

# 2) Show all rows in results
pd.set_option("display.max_rows", None)


## API Descriptive Analytics
**Business question:** Which five trending videos have gained views fastest (views per day since publication)?

In [11]:
api_desc_sql = """
WITH video_age AS (
  SELECT
    video_id,
    title,
    channel_name,
    view_count,
    published_at::date            AS pub_date,
    (CURRENT_DATE - published_at::date) AS days_online
  FROM raw.yt_trending
),
velocity AS (
  SELECT
    video_id,
    title,
    channel_name,
    days_online,
    CASE 
      WHEN days_online > 0 
      THEN view_count::float / days_online 
      ELSE view_count 
    END AS views_per_day
  FROM video_age
)
SELECT
  video_id,
  title,
  channel_name,
  days_online,
  -- cast to numeric so round(numeric,2) is available
  ROUND(views_per_day::numeric, 2) AS views_per_day
FROM velocity
ORDER BY views_per_day DESC
LIMIT 5;
"""
df_api_desc = pd.read_sql(api_desc_sql, engine)
df_api_desc


Unnamed: 0,video_id,title,channel_name,days_online,views_per_day
0,vaGf8fmtBr4,Maroon 5 - Priceless ft. LISA (Official Video),Maroon5VEVO,5,2159795.0
1,Lf566pMNCOw,AMISTA 💔 (VIDEO OFICIAL) | BLESSD ❌ OVY ON THE...,BLESSD EL BENDITO 💙,12,1468357.42
2,CVt4nCsJnzg,Anuel AA - BUGATTI (Video Oficial),Anuel AA,5,1399616.6
3,R2-yomhYAj4,"KATSEYE (캣츠아이) ""Gnarly"" Official MV",HYBE LABELS,7,1220691.57
4,F7dJY0jkpyU,YoungBoy Never Broke Again - Where I Been / Sh...,YoungBoy Never Broke Again,4,1202722.5


**Insight:** 

- Maroon5VEVO’s “Maroon 5 – Priceless ft. LISA” has the highest velocity at ~2.15 million views/day, more than 25% faster than the next fastest video.

- BLESSED EL BENDITO’s “AMISTA…” follows at ~1.46 million/day, showing strong sustained interest over its 12-day run.


**Recommendation:** 

- Emulate Maroon 5’s release strategy: coordinate heavy promotion in the first 48 hours and optimize thumbnail/title to capture early momentum.

- Stagger content drops over a longer window (10–12 days) like “AMISTA…” to smooth out view velocity and maintain trending visibility.


**Prediction:**

- If we front‐load ad spend on Maroon 5–style launches and extend post-release promotion, we can boost initial velocity by 20–30% across new releases.

## API Diagnostic Analytics
**Business question:** For each video-length category (Short: < 60s, Medium:<= 300s, Long: > 300s), which video achieves the highest engagement rate (likes+comments per view)?

In [12]:
api_desc_sql = """
WITH binned AS (
  -- assign each video to a length bucket and compute its engagement rate
  SELECT
    video_id,
    title,
    duration_s,
    (like_count + comment_count)::numeric
      / NULLIF(view_count, 0) AS engagement_rate,
    CASE
      WHEN duration_s < 60   THEN 'SHORT'
      WHEN duration_s <= 300 THEN 'MEDIUM'
      ELSE 'LONG'
    END AS duration_category
  FROM raw.yt_trending
),
ranked_videos AS (
  -- rank videos within each bucket by engagement_rate
  SELECT
    video_id,
    title,
    duration_s,
    duration_category,
    engagement_rate,
    ROW_NUMBER() OVER (
      PARTITION BY duration_category
      ORDER BY engagement_rate DESC
    ) AS cat_rank
  FROM binned
)
SELECT
  video_id,
  title,
  duration_category,
  duration_s,
  ROUND(engagement_rate, 4) AS engagement_rate
FROM ranked_videos
WHERE cat_rank = 1
ORDER BY duration_category;
"""
df_api_desc = pd.read_sql(api_desc_sql, engine)
df_api_desc


Unnamed: 0,video_id,title,duration_category,duration_s,engagement_rate
0,F7dJY0jkpyU,YoungBoy Never Broke Again - Where I Been / Sh...,LONG,448,0.0675
1,T2XTfmx7pCw,DDG - what i prefer (Official Music Video),MEDIUM,129,0.0772


**Insight:** 

- The top performer is DDG – what i prefer (129 s, MEDIUM), with an engagement rate of 0.0774 (≈7.7 likes+comments per 100 views).

- The LONG category leader, YoungBoy Never Broke Again – Where I Been / Shot Callin, clocks in at 0.0679, showing medium-length content punches above its weight.

**Recommendation:** 

- Prioritize MEDIUM-length videos (60–300 s): they deliver the highest engagement per view.

- Incorporate DDG’s style cues—concise storytelling, pacing, and end-screen CTAs—to boost reaction rates.

**Prediction:**

- Shifting 70% of new uploads into the 2–4 minute range and mimicking DDG’s engagement drivers should uplift overall engagement by 10–15% within the next month.