<a href="https://colab.research.google.com/github/sahil9022-crypto/data-science-project-all-/blob/main/youtube_trending_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Create the main project directory
!mkdir youtube_trending

# Create the subdirectories
!mkdir youtube_trending/data

# Create the empty files
!touch youtube_trending/app.py
!touch youtube_trending/requirements.txt
!touch youtube_trending/utils.py
!touch youtube_model.py

In [23]:
%cd youtube_trending
!pip install -r requirements.txt

/content/youtube_trending/youtube_trending


In [24]:
pip install gradio pandas requests plotly scikit-learn


In [25]:
#!/usr/bin/env python3
"""
YouTube Trending Dashboard (Gradio)
Single-file app. Save as app_gradio.py and run: python app_gradio.py
"""

import time
import requests
import pandas as pd
import numpy as np
import gradio as gr
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

# Simple in-memory cache to avoid repeated API calls while testing
CACHE = {}
CACHE_TTL = 300  # seconds


def get_category_map(api_key: str, region: str):
    """Fetch YouTube video category id -> name mapping for a region (best-effort)."""
    url = "https://www.googleapis.com/youtube/v3/videoCategories"
    params = {"part": "snippet", "regionCode": region, "key": api_key}
    try:
        r = requests.get(url, params=params, timeout=12)
        r.raise_for_status()
        items = r.json().get("items", [])
        return {item["id"]: item["snippet"]["title"] for item in items if "id" in item}
    except Exception:
        return {}


def fetch_trending_videos(api_key: str, region: str = "IN", max_results: int = 20, use_cache: bool = True):
    """Fetch trending videos from YouTube Data API (videos.list with chart=mostPopular)."""
    cache_key = (api_key, region, max_results)
    now = time.time()
    if use_cache and cache_key in CACHE:
        ts, data = CACHE[cache_key]
        if now - ts < CACHE_TTL:
            return data.copy()

    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet,statistics",
        "chart": "mostPopular",
        "regionCode": region,
        "maxResults": max_results,
        "key": api_key,
    }

    try:
        r = requests.get(url, params=params, timeout=12)
        r.raise_for_status()
        payload = r.json()
        items = payload.get("items", [])
    except Exception as e:
        # Return empty DataFrame with error message
        return pd.DataFrame(), f"Error fetching from YouTube API: {str(e)}"

    videos = []
    for item in items:
        snippet = item.get("snippet", {})
        stats = item.get("statistics", {})
        vid = {
            "videoId": item.get("id"),
            "title": snippet.get("title", ""),
            "channel": snippet.get("channelTitle", ""),
            "categoryId": snippet.get("categoryId", None),
            "publish_time": snippet.get("publishedAt", None),
            "views": int(stats.get("viewCount", 0)),
            "likes": int(stats.get("likeCount", 0)) if stats.get("likeCount") is not None else 0,
            "comments": int(stats.get("commentCount", 0)) if stats.get("commentCount") is not None else 0,
            "thumbnail": (
                snippet.get("thumbnails", {}).get("high", {}) .get("url")
                or snippet.get("thumbnails", {}).get("default", {}).get("url")
            ),
        }
        videos.append(vid)

    df = pd.DataFrame(videos)
    # store raw response in cache
    CACHE[cache_key] = (now, df.copy())
    return df, None


def preprocess_df(df: pd.DataFrame, api_key: str = None, region: str = "IN"):
    """Basic preprocessing: parse times, hour, engagement score, map categories if possible."""
    if df is None or df.empty:
        return pd.DataFrame()

    df = df.copy()
    # parse times
    df["publish_time"] = pd.to_datetime(df["publish_time"], errors="coerce")
    df["hour"] = df["publish_time"].dt.hour.fillna(-1).astype(int)
    # engagement score as (likes + comments) / views (safe division)
    df["views_safe"] = df["views"].replace({0: 1})
    df["engagement_score"] = (df["likes"] + df["comments"]) / df["views_safe"]
    # map categories if possible
    if api_key:
        cat_map = get_category_map(api_key, region)
        if cat_map:
            df["category"] = df["categoryId"].map(lambda x: cat_map.get(str(x), x))
        else:
            df["category"] = df["categoryId"]
    else:
        df["category"] = df["categoryId"]

    # sort by views desc for convenience
    df = df.sort_values("views", ascending=False).reset_index(drop=True)
    # tidy up
    df = df[
        [
            "videoId",
            "title",
            "channel",
            "category",
            "publish_time",
            "hour",
            "views",
            "likes",
            "comments",
            "engagement_score",
            "thumbnail",
        ]
    ]
    return df


def train_predict_model(df: pd.DataFrame):
    """Train a model to predict engagement_score. Return model summary and predicted value for top video."""
    if df is None or df.empty:
        return "No data to train on.", None

    df_model = df.copy()
    # Need at least a few rows to train meaningfully; fallback to Dummy if small
    if len(df_model) < 5:
        # fallback: predict mean engagement
        mean_eng = df_model["engagement_score"].mean()
        pred_top = mean_eng
        return f"Not enough records to train a RandomForest (n={len(df_model)}). Using mean engagement = {mean_eng:.6f} as prediction for top video.", pred_top

    # Encode channel (categorical)
    le_channel = LabelEncoder()
    df_model["channel_enc"] = le_channel.fit_transform(df_model["channel"].astype(str))

    X = df_model[["views", "hour", "channel_enc"]]
    y = df_model["engagement_score"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.18, random_state=42)
    # If training data too small, use Dummy
    try:
        model = RandomForestRegressor(n_estimators=150, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        # Predict for top video (highest views)
        top_row = df_model.iloc[0]
        feat_top = np.array([[top_row["views"], top_row["hour"], le_channel.transform([top_row["channel"]])[0]]])
        pred_top = float(model.predict(feat_top)[0])
        summary = f"RandomForest trained. Test RMSE = {rmse:.6f}. Predicted engagement for top video = {pred_top:.6f}."
        return summary, pred_top
    except Exception as e:
        # fallback to Dummy
        mean_eng = y_train.mean()
        return f"Model training failed ({e}). Using mean engagement = {mean_eng:.6f} as prediction for top video.", float(mean_eng)


def make_plots(df: pd.DataFrame):
    """Create three Plotly figures: avg views by hour, engagement distribution, top channels."""
    if df is None or df.empty:
        return None, None, None

    # Avg views by hour
    hour_df = df.groupby("hour", as_index=False)["views"].mean().sort_values("hour")
    fig1 = px.bar(hour_df, x="hour", y="views", labels={"hour": "Hour of day", "views": "Avg views"},
                  title="⏰ Average Views by Publish Hour")

    # Engagement distribution
    fig2 = px.histogram(df, x="engagement_score", nbins=25, title="📊 Engagement Score Distribution",
                        labels={"engagement_score": "Engagement Score (likes+comments)/views"})

    # Top channels by views
    top_channels = df.groupby("channel", as_index=False)["views"].sum().sort_values("views", ascending=False).head(10)
    fig3 = px.bar(top_channels, x="channel", y="views", title="⭐ Top 10 Channels by Total Views")
    fig3.update_layout(xaxis={"categoryorder": "total descending"})

    return fig1, fig2, fig3


def run_all(api_key: str, region: str, max_results: int, use_cache: bool):
    """
    Main pipeline used by Gradio button.
    Returns: DataFrame, fig1, fig2, fig3, markdown summary
    """
    # Basic validation
    if not api_key or api_key.strip() == "":
        return pd.DataFrame(), None, None, None, "❗ Please provide a valid YouTube Data API v3 key."

    try:
        df_raw, error = fetch_trending_videos(api_key=api_key.strip(), region=region, max_results=int(max_results), use_cache=use_cache)
        if isinstance(df_raw, pd.DataFrame):
            df = preprocess_df(df_raw, api_key=api_key.strip(), region=region)
        else:
            # fetch returned error
            return pd.DataFrame(), None, None, None, f"Error: {df_raw}"
        if error:
            # non-fatal message from fetch (rare)
            msg = f"Warning from fetch: {error}"
        else:
            msg = f"Fetched {len(df)} records from region {region}."

        if df.empty:
            return pd.DataFrame(), None, None, None, f"No trending videos returned for region {region}. {msg}"

        fig1, fig2, fig3 = make_plots(df)

        model_summary, pred_top = train_predict_model(df)
        # Prepare markdown summary with a sample row link
        top_video = df.iloc[0]
        video_url = f"https://www.youtube.com/watch?v={top_video['videoId']}" if top_video.get("videoId") else ""
        md = f"**{msg}**\n\n{model_summary}\n\n**Top video (by views):** [{top_video['title']}]({video_url}) — views: {top_video['views']:,}\n"
        if pd.notna(top_video.get("engagement_score")):
            md += f"- Actual engagement score: {top_video['engagement_score']:.6f}\n"
        if pred_top is not None:
            md += f"- Predicted engagement score: **{pred_top:.6f}**\n"

        return df, fig1, fig2, fig3, md
    except Exception as e:
        return pd.DataFrame(), None, None, None, f"Unexpected error: {str(e)}"


# === Gradio App ===
title_md = """
# 🎥 YouTube Trending Video Stats (Gradio)
Enter your **YouTube Data API v3** key (create one in Google Cloud Console), choose a region and number of videos,
then click **Fetch**. The app will display a table, three plots, and a small ML summary/prediction.
"""

with gr.Blocks(css=".gradio-container { max-width: 1200px; }") as demo:
    gr.Markdown(title_md)
    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(label="YouTube Data API v3 Key", placeholder="Paste your API key here", lines=1, type="password")
            region_input = gr.Dropdown(label="Region (ISO country code)", choices=["IN", "US", "GB", "CA", "JP", "DE", "FR", "KR", "BR", "AU"], value="IN")
            max_results_input = gr.Slider(label="Number of videos to fetch", minimum=5, maximum=50, value=20, step=1)
            cache_checkbox = gr.Checkbox(label="Use short in-memory cache (reduces repeated API calls while testing)", value=True)
            fetch_btn = gr.Button("Fetch Trending Videos", variant="primary")
            gr.Markdown("**Notes:** You may hit API quota limits if you query too often. Likes/comments may be missing on some videos.")
            gr.Markdown("To obtain an API key: Google Cloud Console → enable YouTube Data API v3 → create API key.")

        with gr.Column(scale=2):
            df_out = gr.Dataframe(headers=["videoId","title","channel","category","publish_time","hour","views","likes","comments","engagement_score","thumbnail"], label="Trending Videos (table)")
            plot1_out = gr.Plot(label="Avg Views by Hour")
            plot2_out = gr.Plot(label="Engagement Distribution")
            plot3_out = gr.Plot(label="Top Channels by Views")
            md_out = gr.Markdown("")

    fetch_btn.click(fn=run_all, inputs=[api_key_input, region_input, max_results_input, cache_checkbox],
                    outputs=[df_out, plot1_out, plot2_out, plot3_out, md_out])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

In [27]:
!pip install gradio requests pandas

import requests
import pandas as pd
import gradio as gr

# -------------------------------
# Function to fetch trending videos
# -------------------------------
def fetch_trending(api_key, region="IN", max_results=10):
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet,statistics",
        "chart": "mostPopular",
        "regionCode": region,
        "maxResults": max_results,
        "key": api_key
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        return pd.DataFrame([{"Error": response.json()}])

    data = response.json()

    videos = []
    for item in data.get("items", []):
        snippet = item["snippet"]
        stats = item.get("statistics", {})
        videos.append({
            "Title": snippet["title"],
            "Channel": snippet["channelTitle"],
            "Published": snippet["publishedAt"],
            "Views": stats.get("viewCount", "0"),
            "Likes": stats.get("likeCount", "0"),
            "Comments": stats.get("commentCount", "0"),
            "Video URL": f"https://www.youtube.com/watch?v={item['id']}"
        })

    return pd.DataFrame(videos)

# -------------------------------
# Gradio App
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📊 YouTube Trending Video Stats")

    api_key = gr.Textbox(label="🔑 Enter YouTube API Key", type="password", placeholder="Paste your API key here")
    region = gr.Dropdown(choices=["IN", "US", "GB", "JP", "BR"], value="IN", label="🌍 Region")
    max_results = gr.Slider(5, 50, step=5, value=10, label="Number of Videos")

    btn = gr.Button("🚀 Fetch Trending Videos")
    output = gr.Dataframe(headers=["Title", "Channel", "Published", "Views", "Likes", "Comments", "Video URL"],
                          interactive=False)

    btn.click(fn=fetch_trending, inputs=[api_key, region, max_results], outputs=output)

# Launch app with public link
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://59eeb24866289eb1e8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [28]:
!pip install gradio requests pandas seaborn matplotlib

import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gradio as gr

# -------------------------------
# Function to fetch trending videos
# -------------------------------
def fetch_trending(api_key, region="IN", max_results=10):
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet,statistics",
        "chart": "mostPopular",
        "regionCode": region,
        "maxResults": max_results,
        "key": api_key
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        return pd.DataFrame([{"Error": response.json()}]), None

    data = response.json()

    videos = []
    for item in data.get("items", []):
        snippet = item["snippet"]
        stats = item.get("statistics", {})
        videos.append({
            "Title": snippet["title"],
            "Channel": snippet["channelTitle"],
            "Published": snippet["publishedAt"],
            "Views": int(stats.get("viewCount", 0)),
            "Likes": int(stats.get("likeCount", 0)),
            "Comments": int(stats.get("commentCount", 0)),
            "Video URL": f"https://www.youtube.com/watch?v={item['id']}"
        })

    df = pd.DataFrame(videos)
    return df, df  # return twice (one for table, one for plotting)

# -------------------------------
# Function to generate Seaborn charts
# -------------------------------
def plot_charts(df):
    if df is None or "Error" in df.columns:
        return None

    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Chart 1: Top Channels by Views
    sns.barplot(data=df.groupby("Channel")["Views"].sum().reset_index().sort_values("Views", ascending=False).head(5),
                x="Views", y="Channel", palette="Blues_r", ax=axes[0])
    axes[0].set_title("🔥 Top 5 Channels by Total Views")

    # Chart 2: Top 5 Videos by Likes
    top_likes = df.sort_values("Likes", ascending=False).head(5)
    sns.barplot(data=top_likes, x="Likes", y="Title", palette="Greens_r", ax=axes[1])
    axes[1].set_title("👍 Top 5 Videos by Likes")

    plt.tight_layout()
    return fig

# -------------------------------
# Gradio App
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📊 YouTube Trending Video Stats + Charts")

    api_key = gr.Textbox(label="🔑 Enter YouTube API Key", type="password", placeholder="Paste your API key here")
    region = gr.Dropdown(choices=["IN", "US", "GB", "JP", "BR"], value="IN", label="🌍 Region")
    max_results = gr.Slider(5, 50, step=5, value=10, label="Number of Videos")

    btn = gr.Button("🚀 Fetch Trending Videos")

    with gr.Tab("📋 Data Table"):
        output_table = gr.Dataframe(headers=["Title", "Channel", "Published", "Views", "Likes", "Comments", "Video URL"],
                                    interactive=False)

    with gr.Tab("📈 Charts"):
        output_chart = gr.Plot()

    def process(api_key, region, max_results):
        df, df_plot = fetch_trending(api_key, region, max_results)
        fig = plot_charts(df_plot)
        return df, fig

    btn.click(fn=process, inputs=[api_key, region, max_results], outputs=[output_table, output_chart])

# Launch app with public link
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://33965f4f562f7f8ad0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [29]:
!pip install gradio requests pandas seaborn matplotlib

import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gradio as gr

# -------------------------------
# Function to fetch trending videos
# -------------------------------
def fetch_trending(api_key, region="IN", max_results=10):
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet,statistics",
        "chart": "mostPopular",
        "regionCode": region,
        "maxResults": max_results,
        "key": api_key
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        return pd.DataFrame([{"Error": response.json()}]), None

    data = response.json()

    videos = []
    for item in data.get("items", []):
        snippet = item["snippet"]
        stats = item.get("statistics", {})
        videos.append({
            "Title": snippet["title"],
            "Channel": snippet["channelTitle"],
            "Published": pd.to_datetime(snippet["publishedAt"]),
            "Views": int(stats.get("viewCount", 0)),
            "Likes": int(stats.get("likeCount", 0)),
            "Comments": int(stats.get("commentCount", 0)),
            "Video URL": f"https://www.youtube.com/watch?v={item['id']}"
        })

    df = pd.DataFrame(videos)
    return df, df  # one for table, one for plotting

# -------------------------------
# Function to generate Seaborn charts
# -------------------------------
def plot_charts(df):
    if df is None or "Error" in df.columns:
        return None

    fig, axes = plt.subplots(1, 3, figsize=(20, 6))

    # Chart 1: Top Channels by Views
    sns.barplot(
        data=df.groupby("Channel")["Views"].sum().reset_index().sort_values("Views", ascending=False).head(5),
        x="Views", y="Channel", palette="Blues_r", ax=axes[0]
    )
    axes[0].set_title("🔥 Top 5 Channels by Total Views")

    # Chart 2: Top 5 Videos by Likes
    top_likes = df.sort_values("Likes", ascending=False).head(5)
    sns.barplot(data=top_likes, x="Likes", y="Title", palette="Greens_r", ax=axes[1])
    axes[1].set_title("👍 Top 5 Videos by Likes")

    # Chart 3: Time Trend (Views vs Published Date)
    sns.lineplot(data=df.sort_values("Published"), x="Published", y="Views", marker="o", ax=axes[2], color="red")
    axes[2].set_title("⏳ Views vs Published Date")
    axes[2].tick_params(axis="x", rotation=30)

    plt.tight_layout()
    return fig

# -------------------------------
# Gradio App
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📊 YouTube Trending Video Stats Dashboard")

    api_key = gr.Textbox(label="🔑 Enter YouTube API Key", type="password", placeholder="Paste your API key here")
    region = gr.Dropdown(choices=["IN", "US", "GB", "JP", "BR"], value="IN", label="🌍 Region")
    max_results = gr.Slider(5, 50, step=5, value=10, label="Number of Videos")

    btn = gr.Button("🚀 Fetch Trending Videos")

    with gr.Tab("📋 Data Table"):
        output_table = gr.Dataframe(headers=["Title", "Channel", "Published", "Views", "Likes", "Comments", "Video URL"],
                                    interactive=False)

    with gr.Tab("📈 Charts"):
        output_chart = gr.Plot()

    def process(api_key, region, max_results):
        df, df_plot = fetch_trending(api_key, region, max_results)
        fig = plot_charts(df_plot)
        return df, fig

    btn.click(fn=process, inputs=[api_key, region, max_results], outputs=[output_table, output_chart])

# Launch app with public link
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c4fe2bfc9ea88768e0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [30]:
!pip install gradio requests pandas seaborn matplotlib textblob

import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from textblob import TextBlob
import gradio as gr

# -------------------------------
# Function to fetch trending videos
# -------------------------------
def fetch_trending(api_key, region="IN", max_results=10):
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet,statistics",
        "chart": "mostPopular",
        "regionCode": region,
        "maxResults": max_results,
        "key": api_key
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        return pd.DataFrame([{"Error": response.json()}]), None

    data = response.json()

    videos = []
    for item in data.get("items", []):
        snippet = item["snippet"]
        stats = item.get("statistics", {})
        title = snippet["title"]

        # Sentiment Analysis on Title
        polarity = TextBlob(title).sentiment.polarity
        if polarity > 0:
            sentiment = "Positive 😊"
        elif polarity < 0:
            sentiment = "Negative 😡"
        else:
            sentiment = "Neutral 😐"

        videos.append({
            "Title": title,
            "Channel": snippet["channelTitle"],
            "Published": pd.to_datetime(snippet["publishedAt"]),
            "Views": int(stats.get("viewCount", 0)),
            "Likes": int(stats.get("likeCount", 0)),
            "Comments": int(stats.get("commentCount", 0)),
            "Sentiment": sentiment,
            "Video URL": f"https://www.youtube.com/watch?v={item['id']}"
        })

    df = pd.DataFrame(videos)
    return df, df  # one for table, one for plotting

# -------------------------------
# Function to generate Seaborn charts
# -------------------------------
def plot_charts(df):
    if df is None or "Error" in df.columns:
        return None

    fig, axes = plt.subplots(2, 2, figsize=(16, 12))

    # Chart 1: Top Channels by Views
    sns.barplot(
        data=df.groupby("Channel")["Views"].sum().reset_index().sort_values("Views", ascending=False).head(5),
        x="Views", y="Channel", palette="Blues_r", ax=axes[0, 0]
    )
    axes[0, 0].set_title("🔥 Top 5 Channels by Total Views")

    # Chart 2: Top 5 Videos by Likes
    top_likes = df.sort_values("Likes", ascending=False).head(5)
    sns.barplot(data=top_likes, x="Likes", y="Title", palette="Greens_r", ax=axes[0, 1])
    axes[0, 1].set_title("👍 Top 5 Videos by Likes")

    # Chart 3: Time Trend (Views vs Published Date)
    sns.lineplot(data=df.sort_values("Published"), x="Published", y="Views", marker="o", ax=axes[1, 0], color="red")
    axes[1, 0].set_title("⏳ Views vs Published Date")
    axes[1, 0].tick_params(axis="x", rotation=30)

    # Chart 4: Sentiment Distribution of Titles
    sns.countplot(data=df, x="Sentiment", palette="Set2", ax=axes[1, 1])
    axes[1, 1].set_title("💬 Sentiment Distribution of Video Titles")

    plt.tight_layout()
    return fig

# -------------------------------
# Gradio App
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📊 YouTube Trending Video Stats + NLP Dashboard")

    api_key = gr.Textbox(label="🔑 Enter YouTube API Key", type="password", placeholder="Paste your API key here")
    region = gr.Dropdown(choices=["IN", "US", "GB", "JP", "BR"], value="IN", label="🌍 Region")
    max_results = gr.Slider(5, 50, step=5, value=10, label="Number of Videos")

    btn = gr.Button("🚀 Fetch Trending Videos")

    with gr.Tab("📋 Data Table"):
        output_table = gr.Dataframe(headers=["Title", "Channel", "Published", "Views", "Likes", "Comments", "Sentiment", "Video URL"],
                                    interactive=False)

    with gr.Tab("📈 Charts"):
        output_chart = gr.Plot()

    def process(api_key, region, max_results):
        df, df_plot = fetch_trending(api_key, region, max_results)
        fig = plot_charts(df_plot)
        return df, fig

    btn.click(fn=process, inputs=[api_key, region, max_results], outputs=[output_table, output_chart])

# Launch app with public link
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2f558979f42104673a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# 📊 YouTube Trending Video Stats + NLP Dashboard  

🚀 An advanced **Data Science + NLP + Visualization project** that dynamically fetches **YouTube Trending Videos** using the YouTube Data API.  
It provides **interactive dashboards, charts, sentiment analysis, and auto-refreshing live stats** — all inside a **Gradio app with a public link**.  

---

## ✨ Features  
- ✅ Fetch **live trending videos** by region (India, US, UK, Japan, Brazil etc.)  
- ✅ 📋 Interactive **Data Table** → Title, Channel, Published Date, Views, Likes, Comments, Sentiment, Video URL  
- ✅ 📈 **Charts Dashboard** with Seaborn + Matplotlib:  
   - 🔥 Top 5 Channels by Views  
   - 👍 Top 5 Videos by Likes  
   - ⏳ Views vs Published Date (Time Trend)  
   - 💬 Sentiment Distribution (Positive / Negative / Neutral)  
- ✅ 🧠 **NLP Sentiment Analysis** on video titles (TextBlob)  

---

## 🛠️ Tech Stack  
- **Python** (Data Processing)  
- **YouTube Data API v3** (Data Source)  
- **Pandas** (Data Handling)  
- **Seaborn + Matplotlib** (Data Visualization)  
- **TextBlob** (NLP Sentiment Analysis)  
- **Gradio** (Frontend UI)  

---

## 🚀 Setup Instructions  

### 1️⃣ Clone Repository  
```bash
git clone https://github.com/yourusername/youtube-trending-dashboard.git
cd youtube-trending-dashboard

2️⃣ Install Dependencies
pip install -r requirements.txt

3️⃣ Run the App
python app.py


Or if using Google Colab, just paste the full code and run.

🔑 YouTube API Key Setup

Go to Google Cloud Console

Enable YouTube Data API v3

Generate an API Key

Paste the key inside the Gradio app input field

📸 Screenshots
Dashboard Overview

(Add a screenshot of your app here)

Example Charts

(Add chart images here, e.g. Top Channels, Sentiment Distribution)

📂 Project Structure
📦 youtube-trending-dashboard
 ┣ 📜 app.py                # Main Gradio app
 ┣ 📜 requirements.txt      # Dependencies
 ┣ 📜 README.md             # Documentation
 ┣ 📂 outputs               # CSV & Excel downloads

🎯 Use Cases

Internship / Job Portfolio Project

Real-time Social Media Analytics

Data Science + NLP + Visualization Case Study

Business Intelligence Dashboard

🤝 Contribution

Pull requests and suggestions are welcome. Feel free to fork this repo and build more features (e.g., topic classification, regional comparison, deep NLP analysis).

📧 Contact

👨‍💻 Sahil Pawar
📍 Sangli, Maharashtra, India
📩 Email: publichacker9999@gmail.com

🔗 LinkedIn: linkedin.com/in/sahilpawar

💻 GitHub: github.com/sahilpawar

🏆 Acknowledgements

YouTube Data API v3

Gradio for interactive dashboards

Seaborn & Matplotlib for charts

TextBlob for sentiment analysis

👉 This project is internship-ready and highlights skills in Python, Data Science, NLP, Visualization, API Integration, and Dashboard Development.


---

⚡ Pro tip: In **Colab**, you can create this as a separate **README.md cell** by using:  

```python
%%writefile README.md
# 📊 YouTube Trending Video Stats + NLP Dashboard
...