<a href="https://colab.research.google.com/github/sathviksr2001/YouTube-Trend-Analyzer-with-Sentiment-and-Privacy-Risk-Detection/blob/main/YouTube_Trend_Analyzer_with_Sentiment_and_Privacy_Risk_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install required libraries
!pip install --quiet gradio google-api-python-client textblob matplotlib

import gradio as gr
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime, timedelta
from textblob import TextBlob
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import io
import base64
import re

In [3]:
# YouTube API setup
api_key = "YOUR API KEY HERE"
youtube = build('youtube', 'v3', developerKey=api_key)


In [4]:
# Fetch trending videos from YouTube API
def fetch_trending_videos(region='IN', max_results=10):
    request = youtube.videos().list(
        part='snippet,statistics',
        chart='mostPopular',
        regionCode=region,
        maxResults=max_results
    )
    response = request.execute()
    return response

In [5]:
# Privacy Leakage Detection Function
def detect_privacy_leakage(row):
    description = row['description'].lower()
    title = row['title'].lower()
    tags = row['tags'].lower()
    score = 0

    if re.search(r"\b\d{10}\b", description):
        score += 2
    if re.search(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", description):
        score += 2
    if any(word in description for word in ["address", "home", "location", "live at"]):
        score += 1
    if any(word in tags for word in ["personal", "private", "diary", "secrets"]):
        score += 1

    if score >= 3:
        return "High Risk"
    elif score == 2:
        return "Medium Risk"
    else:
        return "Low Risk"

In [6]:
# Parse and engineer features
def parse_response(response):
    data = []
    for item in response['items']:
        snippet = item['snippet']
        stats = item['statistics']
        published_at = snippet['publishedAt']
        published_dt = pd.to_datetime(published_at)

        title = snippet['title']
        description = snippet.get('description', '')
        tags = snippet.get('tags', [])
        title_length = len(title)
        description_length = len(description)
        tags_count = len(tags)
        published_hour = published_dt.hour
        published_day = published_dt.weekday()

        title_sentiment = TextBlob(title).sentiment.polarity
        desc_sentiment = TextBlob(description).sentiment.polarity
        title_sentiment_label = 'Positive' if title_sentiment > 0 else ('Negative' if title_sentiment < 0 else 'Neutral')
        desc_sentiment_label = 'Positive' if desc_sentiment > 0 else ('Negative' if desc_sentiment < 0 else 'Neutral')

        current_views = int(stats.get('viewCount', 0))
        prev_day_views = int(current_views * 0.8)  # Simulate 20% growth
        momentum = current_views - prev_day_views

        data.append({
            'title': title,
            'channel': snippet['channelTitle'],
            'published at': published_at,
            'views': current_views,
            'likes': int(stats.get('likeCount', 0)),
            'comments': int(stats.get('commentCount', 0)),
            'title length': title_length,
            'description': description,
            'tags': ", ".join(tags),
            'description length': description_length,
            'tags count': tags_count,
            'published hour': published_hour,
            'published day': published_day,
            'title sentiment': title_sentiment_label,
            'description sentiment': desc_sentiment_label,
            'momentum': momentum
        })

    df = pd.DataFrame(data)
    df['privacy risk'] = df.apply(detect_privacy_leakage, axis=1)
    return df

In [7]:
# Create matplotlib charts and return base64 HTML images
def plot_momentum(df):
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.bar(df['title'], df['momentum'], color='skyblue')
    ax.set_title("Momentum (Recent View Gain)", fontsize=14)
    ax.set_ylabel("View Gain")
    ax.set_xlabel("Video Title")
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()
    plt.close()
    return f"<img src='data:image/png;base64,{img_base64}'/>"

def plot_privacy_risk_distribution(df):
    fig, ax = plt.subplots()
    df['privacy risk'].value_counts().plot.pie(autopct='%1.1f%%', colors=['#4CAF50','#FFC107','#F44336'], ax=ax)
    ax.set_ylabel('')
    ax.set_title("Privacy Risk Distribution")

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    buf.close()
    plt.close()
    return f"<img src='data:image/png;base64,{img_base64}'/>"

In [8]:
# Gradio function
def get_trending_videos(region='IN', max_results=10):
    response = fetch_trending_videos(region, max_results)
    df = parse_response(response)
    chart_html = plot_momentum(df)
    risk_html = plot_privacy_risk_distribution(df)
    combined_html = chart_html + "<br><br>" + risk_html
    return df, combined_html

In [9]:
# Gradio Interface
gr.Interface(
    fn=get_trending_videos,
    inputs=[
        gr.Textbox(label="Region Code (e.g., IN, US, UK)", value="IN"),
        gr.Slider(1, 20, value=10, step=1, label="Number of Trending Videos")
    ],
    outputs=[
        gr.Dataframe(label="Trending YouTube Videos with Features & Privacy Risk"),
        gr.HTML(label="Visualizations: Momentum + Privacy Risk")
    ],
    title="📊 YouTube Trending Video Analyzer with Privacy Detection",
    description="Fetches real-time trending videos from YouTube, analyzes metadata for sentiment, momentum, and privacy risk using heuristics."
).launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f87eeebab8119cf84e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


