In [None]:
# Cell 1: Installations (YouTube Focused)
!pip install nltk spacy -q # Keep spacy only if actively used, otherwise remove
!pip install requests beautifulsoup4 youtube-transcript-api google-api-python-client google-auth google-auth-oauthlib google-auth-httplib2 -q # Google/YT APIs
!pip install scikit-learn -q # For TF-IDF
!pip install transformers[torch] sentencepiece -q # Transformers + PyTorch backend
!pip install python-telegram-bot --upgrade -q # Telegram Bot library
!pip install nest_asyncio -q # For Colab async compatibility
!pip install vaderSentiment -q # Keep VADER for potential sentence scoring or fallback

print("--- Required packages installation attempted (YouTube Focus) ---")

# Verify telegram bot version
import telegram
print(f"Using python-telegram-bot version: {telegram.__version__}")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Cell 2: NLTK Downloads & Core Imports (YouTube Focus)
import nltk
# import spacy # Only if you actually use it in YT functions
import os
import logging
import re
import string
import html
import pickle
import asyncio
import nest_asyncio

# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- NLTK Downloads ---
print("Downloading NLTK resources (punkt, stopwords, vader_lexicon)...")
try:
    # Force download punkt for Colab robustness
    nltk.download('punkt', download_dir='/root/nltk_data/', force=True, quiet=False, raise_on_error=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('vader_lexicon', quiet=True) # Keep VADER lexicon
    print("NLTK downloads completed/verified.")
    stop_words_nltk = set(stopwords.words('english'))
    logger.info("NLTK stopwords loaded.")
except Exception as e:
    logger.error(f"ERROR downloading/loading NLTK data: {e}. Some features might fail.")
    stop_words_nltk = set() # Fallback

# --- spaCy Download (Remove if not used) ---
# model_name_spacy = 'en_core_web_sm'
# if not spacy.util.is_package(model_name_spacy):
#     print(f"Downloading spaCy model: {model_name_spacy}...")
#     spacy.cli.download(model_name_spacy, "-q")

# Apply nest_asyncio for Colab compatibility
nest_asyncio.apply()
logger.info("nest_asyncio applied.")

print("--- NLTK setup complete ---")

Downloading NLTK resources (punkt, stopwords, vader_lexicon)...


[nltk_data] Downloading package punkt to /root/nltk_data/...
[nltk_data]   Unzipping tokenizers/punkt.zip.
ERROR:__main__:ERROR downloading/loading NLTK data: name 'stopwords' is not defined. Some features might fail.


NLTK downloads completed/verified.
--- NLTK setup complete ---


In [None]:
# Cell 3: Configuration, Global Variables & Initializations (YouTube Focused)

# --- Library Imports needed for this cell ---
from google.colab import userdata # For Colab secrets
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from transformers import pipeline
import torch

# --- YouTube Configuration ---
YOUTUBE_CATEGORY_MAP = {
    "1": "Film & Animation", "2": "Autos & Vehicles", "10": "Music", "15": "Pets & Animals",
    "17": "Sports", "19": "Travel & Events", "20": "Gaming", "22": "People & Blogs",
    "23": "Comedy", "24": "Entertainment", "25": "News & Politics", "26": "Howto & Style",
    "27": "Education", "28": "Science & Technology", "29": "Nonprofits & Activism",
    "30": "Movies", "43": "Shows"
}

# --- Global Variables (Placeholders) ---
youtube_service = None
vader_analyzer = None
sentiment_pipeline_yt = None # Pipeline for YouTube comment sentiment
sentence_tokenizer = None      # Explicitly loaded NLTK sentence tokenizer
TELEGRAM_BOT_TOKEN = None
YOUTUBE_API_KEY = None

# --- Load Secrets ---
logger.info("Loading secrets...")
try:
    TELEGRAM_BOT_TOKEN = userdata.get('TELEGRAM_BOT_TOKEN')
    YOUTUBE_API_KEY = userdata.get('YOUTUBE_API_KEY')
    if not TELEGRAM_BOT_TOKEN: logger.error("TELEGRAM_BOT_TOKEN not found in Colab Secrets.")
    else: logger.info("Telegram Bot Token loaded.")
    if not YOUTUBE_API_KEY: logger.warning("YOUTUBE_API_KEY not found. YouTube analysis may fail.")
    else: logger.info("YouTube API Key loaded.")
except Exception as e:
    logger.error(f"Error loading secrets: {e}")

# --- Initialize VADER Analyzer ---
logger.info("Initializing VADER Analyzer...")
try:
    vader_analyzer = SentimentIntensityAnalyzer()
    logger.info("VADER Analyzer initialized.")
except Exception as e:
    logger.error(f"Failed to initialize VADER: {e}")

# --- Load NLTK Sentence Tokenizer Explicitly ---
logger.info("Loading NLTK Sentence Tokenizer explicitly...")
punkt_english_pickle_path = '/root/nltk_data/tokenizers/punkt/english.pickle'
try:
    if os.path.exists(punkt_english_pickle_path):
        with open(punkt_english_pickle_path, 'rb') as f:
            sentence_tokenizer = pickle.load(f)
        logger.info("Explicitly loaded sentence tokenizer.")
    else:
        logger.error(f"Sentence tokenizer file not found: {punkt_english_pickle_path}. Ensure Cell 2 ran.")
except Exception as e_load:
     logger.error(f"Failed to explicitly load sentence tokenizer: {e_load}")

# --- Initialize YouTube API Service ---
logger.info("Initializing YouTube API Service...")
if YOUTUBE_API_KEY:
    try:
        youtube_service = build("youtube", "v3", developerKey=YOUTUBE_API_KEY, cache_discovery=False)
        logger.info("YouTube API service object created.")
    except Exception as e:
        logger.error(f"Error building YouTube service: {e}")
        youtube_service = None
else:
    logger.warning("YouTube API Key not loaded, skipping YouTube service initialization.")
    youtube_service = None

# --- Initialize Transformer Sentiment Pipeline ---
logger.info("Initializing Transformer Sentiment Pipeline for YouTube...")
device_num = 0 if torch.cuda.is_available() else -1
device_name = 'GPU' if device_num == 0 else 'CPU'
logger.info(f"Attempting to load pipeline on device: {device_name}")

try:
    # Using RoBERTa model - good for comments/reviews
    model_name_sent = "cardiffnlp/twitter-roberta-base-sentiment"
    logger.info(f"Loading sentiment pipeline: {model_name_sent}...")
    sentiment_pipeline_yt = pipeline("sentiment-analysis", model=model_name_sent, device=device_num)
    logger.info("Sentiment analysis pipeline loaded successfully.")
except Exception as e:
    logger.error(f"ERROR loading sentiment pipeline: {e}. Sentiment analysis will fail.", exc_info=True)
    sentiment_pipeline_yt = None

# --- Removed Product Data Loading ---

print("--- Configuration, Globals, and Initializations Complete (YouTube Focus) ---")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


--- Configuration, Globals, and Initializations Complete (YouTube Focus) ---


In [None]:
# Cell 4: NLP Utility & YouTube Helper Functions

# --- Library Imports needed ---
import re
import string
import logging
import html
import torch # For type hints if needed, and checking GPU
from sklearn.feature_extraction.text import TfidfVectorizer
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from youtube_transcript_api._errors import RequestBlocked

# Ensure nltk and stop_words_nltk (global) are available from Cell 2
# Ensure sentence_tokenizer (global) is available from Cell 3
# Ensure vader_analyzer (global) is available from Cell 3 (if needed)
# Ensure sentiment_pipeline_yt (global) is available from Cell 3
# Ensure youtube_service (global) is available from Cell 3
# Ensure YOUTUBE_CATEGORY_MAP (global) is available from Cell 3


# --- Text Preprocessing ---
def preprocess_text(text, remove_stopwords=False):
    """Cleans text: lowercase, removes URL, punctuation, numbers, optional stopwords."""
    if not isinstance(text, str): return ""
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    words = text.split()
    if remove_stopwords:
        custom_stopwords = {'video', 'channel', 'youtube', 'comment', 'watch', 'thanks', 'subscriber', 'like'} # YT specific
        stopwords_to_remove = stop_words_nltk.union(custom_stopwords)
        words = [word for word in words if word not in stopwords_to_remove and len(word) > 2]
    text = " ".join(words)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# --- Sentiment Wrapper (using YT pipeline) ---
# In Cell 4

def get_sentiment_label(text):
    """Gets Positive/Negative/Neutral label using the SHARED pipeline."""
    global sentiment_pipeline_yt
    if not sentiment_pipeline_yt or not isinstance(text, str) or not text.strip():
        logger.warning("Sentiment pipeline unavailable or invalid input.")
        return "Neutral"
    try:
        with torch.no_grad():
             # Rely explicitly on pipeline's truncation, ensure input isn't excessively long initially
             # Reduce slice slightly just in case of tokenization expansion
             result = sentiment_pipeline_yt(text[:500], truncation=True, max_length=512)[0]
        label_map = {"LABEL_0": "Negative", "LABEL_1": "Neutral", "LABEL_2": "Positive"}
        return label_map.get(result['label'], "Neutral")
    # Catch the specific size mismatch error if possible (might be RuntimeError or ValueError depending on backend)
    except (RuntimeError, ValueError) as e:
         if "size" in str(e).lower() and "match" in str(e).lower():
              logger.error(f"Tensor size mismatch error during sentiment analysis: {e}. Input text likely too long/complex after tokenization. Text: '{text[:100]}...'")
              return "Error (Input Length)" # Specific error label
         else:
              logger.error(f"Error during shared sentiment analysis (Non-size): {e}")
              return "Error (Analysis)" # Generic analysis error
    except Exception as e: # Catch other unexpected errors
        logger.error(f"Unexpected error during shared sentiment analysis: {e}", exc_info=True)
        return "Error (Analysis)"
# --- Summarize Description (using explicit sentence tokenizer) ---
def summarize_description(description, num_sentences=5):
    """Creates summary using explicitly loaded sentence tokenizer."""
    global sentence_tokenizer
    if not sentence_tokenizer:
        return "Could not generate summary (Tokenizer unavailable)."
    if not description or not isinstance(description, str):
        return "No description available to summarize."
    try:
        sentences = sentence_tokenizer.tokenize(description)
        summary_sentences = sentences[:num_sentences]
        summary = " ".join(summary_sentences).strip()
        if len(sentences) > num_sentences and summary: summary += "..."
        if not summary: return "Description too short to summarize."
        elif len(summary) < 30: return description
        return summary
    except Exception as e:
        logger.error(f"Error during description summarization: {e}")
        return "Could not generate summary from description."

# --- YouTube Category Name Lookup ---
def get_category_name(category_id):
    """Looks up the category name from the ID using global map."""
    global YOUTUBE_CATEGORY_MAP
    if not category_id: return "Unknown"
    return YOUTUBE_CATEGORY_MAP.get(str(category_id), "Unknown/Other")

# --- Extract Video ID ---
def extract_video_id(url):
    """Extracts YouTube video ID from various URL formats."""
    if not isinstance(url, str): return None
    patterns = [
        r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})',
        r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([a-zA-Z0-9_-]{11})',
        r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]{11})',
        r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/shorts\/([a-zA-Z0-9_-]{11})',
        r'^([a-zA-Z0-9_-]{11})$' ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match: return match.group(1)
    return None

# --- Search YouTube Videos ---
def search_youtube_video(query, max_results=1):
    """Searches YouTube using the global API service."""
    global youtube_service
    if not youtube_service: logger.error("YT service not available for search."); return None
    if not query or not isinstance(query, str): return []
    results = []
    try:
        request = youtube_service.search().list(part="id,snippet", q=query, type="video", maxResults=max_results, relevanceLanguage="en")
        response = request.execute()
        for item in response.get('items', []):
            if item.get('id', {}).get('kind') == 'youtube#video':
                video_id = item.get('id', {}).get('videoId')
                if video_id:
                    results.append({
                        'video_id': video_id,
                        'title': item.get('snippet', {}).get('title', 'N/A'),
                        'channel_title': item.get('snippet', {}).get('channelTitle', 'N/A') })
    except Exception as e: logger.error(f"YouTube search failed for '{query}': {e}")
    return results

# --- Get Video Details (incl. Category) ---
def get_video_details(video_id):
    """Gets video details using the global API service."""
    global youtube_service
    details = {'title': None, 'description': None, 'channel_title': None, 'category_id': None, 'error': None}
    if not video_id: details['error'] = "No Video ID"; return details
    if not youtube_service: details['error'] = "API service unavailable"; return details
    try:
        request = youtube_service.videos().list(part="snippet", id=str(video_id))
        response = request.execute()
        if response.get('items'):
            snippet = response['items'][0].get('snippet', {})
            details.update({
                'title': snippet.get('title'), 'description': snippet.get('description'),
                'channel_title': snippet.get('channelTitle'), 'category_id': snippet.get('categoryId') })
        else: details['error'] = "Video not found"
    except Exception as e: details['error'] = f"API Error fetching details: {type(e).__name__}"; logger.error(f"YT Details Error {video_id}: {e}")
    return details

# --- Get YouTube Comments ---
def get_youtube_comments(video_id, max_comments=75):
    """Gets comments using the global API service."""
    global youtube_service
    comments, error_msg = [], None
    if not video_id: return [], "No Video ID"
    if not youtube_service: return [], "API service unavailable"
    try:
        request = youtube_service.commentThreads().list(part="snippet", videoId=str(video_id), textFormat="plainText", order="relevance", maxResults=min(max_comments, 100))
        response = request.execute()
        for item in response.get('items', []):
            try: comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])
            except KeyError: pass
        if not comments and 'items' not in response: error_msg = "No comments found"
    except HttpError as e:
        err_content = e.content.decode('utf-8','ignore').lower()
        error_msg = "Comments disabled" if e.resp.status == 403 and "disabled comments" in err_content else f"API Error {e.resp.status}"
        logger.error(f"YT Comment fetch {error_msg} for {video_id}")
    except Exception as e: error_msg = f"Comment Error: {type(e).__name__}"; logger.error(f"{error_msg} for {video_id}")
    return comments, error_msg

# --- Get YouTube Transcript (Placeholder/Unreliable) ---
def get_youtube_transcript(video_id):
    """Attempts to get transcript, handles known errors."""
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(str(video_id), languages=['en', 'en-US'])
        return " ".join([item['text'] for item in transcript_list]), None
    except RequestBlocked: return None, "Request blocked by YouTube (Colab issue)"
    except TranscriptsDisabled: return None, "Transcripts disabled"
    except NoTranscriptFound: return None, "No English transcript found"
    except Exception as e: logger.warning(f"YT Transcript Error {video_id}: {e}"); return None, f"Transcript Error: {type(e).__name__}"

# --- Keyword Extraction ---
def extract_youtube_keywords(title, description, comments, num_keywords=7):
    """Extracts keywords using TF-IDF."""
    text_for_keywords = []
    if title: text_for_keywords.append(title)
    if description: text_for_keywords.append(description)
    if comments: text_for_keywords.extend(comments)
    if not text_for_keywords: return []
    processed_texts = [preprocess_text(text, remove_stopwords=True) for text in text_for_keywords if isinstance(text, str) and text]
    if not processed_texts: return []
    try:
        vectorizer = TfidfVectorizer(max_df=0.90, min_df=2, max_features=3000, stop_words='english', ngram_range=(1,2))
        tfidf_matrix = vectorizer.fit_transform(processed_texts)
        feature_names = vectorizer.get_feature_names_out()
        sum_tfidf = tfidf_matrix.sum(axis=0)
        scores = [(feature_names[col], sum_tfidf[0, col]) for col in range(sum_tfidf.shape[1])]
        sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
        return [keyword for keyword, score in sorted_scores[:num_keywords]]
    except Exception as e: logger.error(f"TF-IDF Error: {e}"); return []

print("--- YouTube Helper & NLP Utility Functions Defined ---")
def extract_example_comments(comments, num_examples=2):
    """
    Extracts example positive/negative comments using VADER scores.
    Args: comments (list), num_examples (int)
    Returns: tuple (list_of_pos_examples, list_of_neg_examples)
    """
    global vader_analyzer # Uses global VADER analyzer from Cell 3
    pos_examples = []
    neg_examples = []
    if not comments or not vader_analyzer:
        if not vader_analyzer: logger.error("VADER analyzer missing for example comments.")
        return pos_examples, neg_examples
    comments_with_scores = []
    for comment in comments:
        if isinstance(comment, str) and 5 < len(comment.split()) < 150:
             try:
                 score = vader_analyzer.polarity_scores(comment)['compound']
                 comments_with_scores.append({'text': comment, 'score': score})
             except Exception as e: logger.warning(f"VADER scoring failed for comment: {comment[:50]}... Error: {e}")
    comments_with_scores.sort(key=lambda x: x['score'], reverse=True)
    for data in comments_with_scores:
        if len(pos_examples) < num_examples and data['score'] >= 0.5: pos_examples.append(data['text'])
        if len(pos_examples) >= num_examples: break
    for data in reversed(comments_with_scores):
        if len(neg_examples) < num_examples and data['score'] <= -0.4: neg_examples.append(data['text'])
        if len(neg_examples) >= num_examples: break
    logger.info(f"Extracted {len(pos_examples)} pos and {len(neg_examples)} neg comment examples.")
    return pos_examples, neg_examples

print("--- NLP Utility & YT Helper Functions Defined (with Example Comments) ---")

--- YouTube Helper & NLP Utility Functions Defined ---
--- NLP Utility & YT Helper Functions Defined (with Example Comments) ---


In [None]:
'''# Cell 5: YouTube Orchestrator & Verdict Formatter

import html # Ensure imported
import logging # Ensure imported
# Assumes functions from Cell 4 are defined
# Assumes sentiment_pipeline_yt (global) is loaded from Cell 3

# --- Overall Verdict Calculator (Needed for YT comments too) ---
def calculate_overall_verdict(sentiments):
    """Calculates an overall verdict based on sentiment labels."""
    if not sentiments: return "Not Enough Data"
    total = len(sentiments)
    pos_count = sentiments.count("Positive")
    neg_count = sentiments.count("Negative")
    pos_ratio = pos_count / total
    neg_ratio = neg_count / total
    if pos_ratio >= 0.65 and neg_ratio < 0.15: return "Overwhelmingly Positive 👍"
    elif pos_ratio > neg_ratio + 0.15 and pos_ratio >= 0.40: return "Generally Positive 🙂"
    elif neg_ratio >= 0.65 and pos_ratio < 0.15: return "Overwhelmingly Negative 👎"
    elif neg_ratio > pos_ratio + 0.15 and neg_ratio >= 0.40: return "Generally Negative 🙁"
    elif abs(pos_ratio - neg_ratio) < 0.20 and (pos_count + neg_count) / total > 0.5: return "Mixed Reviews 🤔"
    else: return "Neutral / Balanced 😐"

# --- YouTube Verdict Formatter ---
def format_youtube_verdict(details, comments_count, sentiment_results, keywords, category_name, summary, transcript_error=None):
    """Formats YouTube analysis results into HTML."""
    verdict = []
    verdict.append(f"🔎 **YouTube Video Analysis**")
    verdict.append(f"🎬 <b>Title:</b> {html.escape(details.get('title','N/A'))}")
    verdict.append(f"👤 <b>Channel:</b> {html.escape(details.get('channel_title','N/A'))}")
    verdict.append(f"📊 <b>Category:</b> {html.escape(category_name)}")
    verdict.append("---")

    verdict.append("📝 <b>Summary (from Description):</b>")
    verdict.append(f"<i>{html.escape(summary)}</i>")
    verdict.append("---")

    verdict.append("💬 **Comment Analysis:**")
    verdict.append(f"  Comments Found: {comments_count}")
    if sentiment_results.get('error'):
        verdict.append(f"  Sentiment: <i>Error - {html.escape(sentiment_results['error'])}</i>")
    elif comments_count > 0:
        label = sentiment_results.get('label', 'Neutral')
        pos_count = sentiment_results.get('pos', 0)
        neg_count = sentiment_results.get('neg', 0)
        neu_count = sentiment_results.get('neu', 0)
        emoji = "😊" if "Positive" in label else ("😠" if "Negative" in label else "😐") # Check label substring
        verdict.append(f"  Overall Sentiment (Model): {emoji} <b>{label}</b>")
        verdict.append(f"     <i>(Pos: {pos_count}, Neg: {neg_count}, Neu: {neu_count})</i>")
    else:
         verdict.append(f"  Sentiment: <i>No comments found or analysis failed</i>")
    verdict.append("---")

    verdict.append("🔑 **Keywords:**")
    if keywords:
        verdict.append(f"  <code>{html.escape(', '.join(keywords))}</code>")
    else:
         verdict.append("  <i>Could not extract keywords.</i>")

    # Add transcript status if error occurred
    if transcript_error:
         verdict.append("---")
         verdict.append(f"<i>Transcript Status: {html.escape(transcript_error)}</i>")

    return "\n".join(verdict)

# --- YouTube Orchestrator ---
def analyze_youtube_video_orchestrator(video_id):
    """Orchestrates YouTube analysis."""
    global sentiment_pipeline_yt # Ensure access to the loaded pipeline

    if not video_id: return "<b>Error:</b> No video ID provided."
    logger.info(f"Orchestrating YouTube analysis for ID: {video_id}")

    # 1. Get Details
    video_details = get_video_details(video_id)
    if video_details.get('error') and not video_details.get('title'):
        return f"<b>Error:</b> Could not fetch video details. {html.escape(video_details.get('error','Unknown Error'))}"

    # 2. Get Comments
    comments, comments_error = get_youtube_comments(video_id)

    # 3. Get Transcript Status
    _, transcript_error = get_youtube_transcript(video_id)

    # 4. Analyze Comment Sentiment
    sentiment_results_yt = {'label': 'Neutral', 'pos': 0, 'neg': 0, 'neu': 0, 'error': comments_error}
    comment_sentiments_list = []
    if comments:
        if sentiment_pipeline_yt:
             comment_sentiments_list = [get_sentiment_label(c) for c in comments]
             sentiment_results_yt['pos'] = comment_sentiments_list.count("Positive")
             sentiment_results_yt['neg'] = comment_sentiments_list.count("Negative")
             sentiment_results_yt['neu'] = comment_sentiments_list.count("Neutral")
             sentiment_results_yt['label'] = calculate_overall_verdict(comment_sentiments_list)
             sentiment_results_yt['error'] = None # Clear fetch error if analysis ran
             logger.info(f"YT sentiment analysis complete for {len(comments)} comments.")
        else:
            sentiment_results_yt['error'] = "Sentiment Analyzer not available."
            logger.error(sentiment_results_yt['error'])
    elif comments_error: # If comments failed to fetch, report that as the sentiment error
         sentiment_results_yt['error'] = comments_error

    # 5. Extract Keywords
    keywords = extract_youtube_keywords(video_details.get('title'), video_details.get('description'), comments)

    # 6. Get Category Name
    category_name = get_category_name(video_details.get('category_id'))

    # 7. Summarize Description
    summary = summarize_description(video_details.get('description'))

    # 8. Generate Verdict
    verdict = format_youtube_verdict(
        video_details,
        len(comments) if comments else 0, # Pass comment count
        sentiment_results_yt,
        keywords,
        category_name,
        summary,
        transcript_error # Pass transcript status
    )

    logger.info(f"YouTube analysis orchestration complete for ID: {video_id}")
    return verdict


print("--- YouTube Orchestrator & Formatter Defined ---")'''
'''# Cell 5: YouTube Orchestrator & Verdict Formatter (Updated)

import html # Ensure imported
import logging # Ensure imported
# Assumes functions from Cell 4 are defined
# Assumes sentiment_pipeline_yt (global) is loaded from Cell 3

# --- Overall Verdict Calculator (Keep as before) ---
def calculate_overall_verdict(sentiments):
    # ... (definition remains the same) ...
    if not sentiments: return "Not Enough Data"
    total = len(sentiments); pos_count = sentiments.count("Positive"); neg_count = sentiments.count("Negative")
    pos_ratio = pos_count / total; neg_ratio = neg_count / total
    if pos_ratio >= 0.65 and neg_ratio < 0.15: return "Overwhelmingly Positive 👍"
    elif pos_ratio > neg_ratio + 0.15 and pos_ratio >= 0.40: return "Generally Positive 🙂"
    elif neg_ratio >= 0.65 and pos_ratio < 0.15: return "Overwhelmingly Negative 👎"
    elif neg_ratio > pos_ratio + 0.15 and neg_ratio >= 0.40: return "Generally Negative 🙁"
    elif abs(pos_ratio - neg_ratio) < 0.20 and (pos_count + neg_count) / total > 0.5: return "Mixed Reviews 🤔"
    else: return "Neutral / Balanced 😐"

# --- YouTube Verdict Formatter (Updated) ---
def format_youtube_verdict(details, comments_count, sentiment_results, keywords, category_name, single_line_summary, transcript_error=None, pos_examples=None, neg_examples=None): # Added examples
    """Formats YouTube analysis results into HTML, including example comments."""
    verdict = []
    verdict.append(f"🔎 **YouTube Video Analysis**")
    verdict.append(f"🎬 <b>Title:</b> {html.escape(details.get('title','N/A'))}")
    verdict.append(f"👤 <b>Channel:</b> {html.escape(details.get('channel_title','N/A'))}")
    verdict.append(f"📊 <b>Category:</b> {html.escape(category_name)}")
    verdict.append("---")

    verdict.append("📝 <b>Content Snippet (from Description):</b>") # Changed Title
    verdict.append(f"<i>{html.escape(single_line_summary)}</i>") # Display single line
    verdict.append("---")

    verdict.append("💬 **Comment Analysis:**")
    verdict.append(f"  Comments Found: {comments_count}")
    if sentiment_results.get('error'):
        verdict.append(f"  Sentiment: <i>Error - {html.escape(sentiment_results['error'])}</i>")
    elif comments_count > 0:
        label = sentiment_results.get('label', 'Neutral')
        pos_count = sentiment_results.get('pos', 0)
        neg_count = sentiment_results.get('neg', 0)
        neu_count = sentiment_results.get('neu', 0)
        emoji = "😊" if "Positive" in label else ("😠" if "Negative" in label else "😐")
        verdict.append(f"  Overall Sentiment (Model): {emoji} <b>{label}</b>")
        verdict.append(f"     <i>(Pos: {pos_count}, Neg: {neg_count}, Neu: {neu_count})</i>")
    else:
         verdict.append(f"  Sentiment: <i>No comments found or analysis failed</i>")

    # --- Add Example Comments Section ---
    if pos_examples:
        verdict.append("\n👍 <b>Example Positive Comments:</b>")
        for i, comment in enumerate(pos_examples):
            verdict.append(f"  {i+1}. <i>{html.escape(comment[:200])}...</i>") # Limit length
    if neg_examples:
        verdict.append("\n👎 <b>Example Negative Comments:</b>")
        for i, comment in enumerate(neg_examples):
            verdict.append(f"  {i+1}. <i>{html.escape(comment[:200])}...</i>") # Limit length
    # --- End Example Comments Section ---

    verdict.append("---")

    verdict.append("🔑 **Keywords:**")
    if keywords:
        verdict.append(f"  <code>{html.escape(', '.join(keywords))}</code>")
    else:
         verdict.append("  <i>Could not extract keywords.</i>")

    if transcript_error:
         verdict.append("---")
         verdict.append(f"<i>Transcript Status: {html.escape(transcript_error)}</i>") # Using italics

    return "\n".join(verdict)

# --- YouTube Orchestrator (Updated) ---
def analyze_youtube_video_orchestrator(video_id):
    """Orchestrates YouTube analysis, including example comments and single line summary."""
    global sentiment_pipeline_yt # Ensure access to the loaded pipeline

    if not video_id: return "<b>Error:</b> No video ID provided."
    logger.info(f"Orchestrating YouTube analysis for ID: {video_id}")

    # 1. Get Details
    video_details = get_video_details(video_id) # Uses global service
    if video_details.get('error') and not video_details.get('title'):
        return f"<b>Error:</b> Could not fetch video details. {html.escape(video_details.get('error','Unknown Error'))}"

    # 2. Get Comments
    comments, comments_error = get_youtube_comments(video_id) # Uses global service

    # 3. Get Transcript Status
    _, transcript_error = get_youtube_transcript(video_id)

    # 4. Analyze Comment Sentiment
    sentiment_results_yt = {'label': 'Neutral', 'pos': 0, 'neg': 0, 'neu': 0, 'error': comments_error}
    comment_sentiments_list = []
    if comments: # Only proceed if comments were found
        if sentiment_pipeline_yt:
             comment_sentiments_list = [get_sentiment_label(c) for c in comments]
             sentiment_results_yt['pos'] = comment_sentiments_list.count("Positive")
             sentiment_results_yt['neg'] = comment_sentiments_list.count("Negative")
             sentiment_results_yt['neu'] = comment_sentiments_list.count("Neutral")
             sentiment_results_yt['label'] = calculate_overall_verdict(comment_sentiments_list)
             sentiment_results_yt['error'] = None
             logger.info(f"YT sentiment analysis complete for {len(comments)} comments.")
        else:
            sentiment_results_yt['error'] = "Sentiment Analyzer not available."
            logger.error(sentiment_results_yt['error'])
    # Keep comments_error if fetching failed initially
    elif comments_error:
         sentiment_results_yt['error'] = comments_error

    # 5. Extract Example Comments (Use raw comments before potential filtering)
    pos_example_comments, neg_example_comments = extract_example_comments(comments, num_examples=2)

    # 6. Extract Keywords
    keywords = extract_youtube_keywords(video_details.get('title'), video_details.get('description'), comments)

    # 7. Get Category Name
    category_name = get_category_name(video_details.get('category_id'))

    # 8. Get Single Line Summary
    single_line_summary = summarize_description(video_details.get('description'), num_sentences=1) # Request only 1 sentence

    # 9. Generate Verdict
    verdict = format_youtube_verdict(
        video_details,
        len(comments) if comments else 0,
        sentiment_results_yt,
        keywords,
        category_name,
        single_line_summary,
        transcript_error,
        pos_examples=pos_example_comments, # Pass examples
        neg_examples=neg_example_comments  # Pass examples
    )

    logger.info(f"YouTube analysis orchestration complete for ID: {video_id}")
    return verdict

print("--- YouTube Orchestrator & Formatter Defined (with Examples & Single Line Summary) ---")'''
# Cell 5: YouTube Orchestrator & Verdict Formatter (Restoring Multi-Sentence Summary)

import html
import logging
# Assumes functions from Cell 4 are defined
# Assumes sentiment_pipeline_yt is loaded globally

# --- Overall Verdict Calculator (Keep as before) ---
def calculate_overall_verdict(sentiments):
    # ... (definition remains the same) ...
    if not sentiments: return "Not Enough Data"
    total = len(sentiments); pos_count = sentiments.count("Positive"); neg_count = sentiments.count("Negative")
    pos_ratio = pos_count / total; neg_ratio = neg_count / total
    if pos_ratio >= 0.65 and neg_ratio < 0.15: return "Overwhelmingly Positive 👍"
    elif pos_ratio > neg_ratio + 0.15 and pos_ratio >= 0.40: return "Generally Positive 🙂"
    elif neg_ratio >= 0.65 and pos_ratio < 0.15: return "Overwhelmingly Negative 👎"
    elif neg_ratio > pos_ratio + 0.15 and neg_ratio >= 0.40: return "Generally Negative 🙁"
    elif abs(pos_ratio - neg_ratio) < 0.20 and (pos_count + neg_count) / total > 0.5: return "Mixed Reviews 🤔"
    else: return "Neutral / Balanced 😐"


# --- YouTube Verdict Formatter (Updated) ---
def format_youtube_verdict(details, comments_count, sentiment_results, keywords, category_name,
                           single_line_snippet, multi_sentence_summary, # Added multi_sentence_summary
                           transcript_error=None, pos_examples=None, neg_examples=None):
    """Formats YouTube analysis results, including snippet, summary, and examples."""
    verdict = []
    verdict.append(f"🔎 **YouTube Video Analysis**")
    verdict.append(f"🎬 <b>Title:</b> {html.escape(details.get('title','N/A'))}")
    verdict.append(f"👤 <b>Channel:</b> {html.escape(details.get('channel_title','N/A'))}")
    verdict.append(f"📊 <b>Category:</b> {html.escape(category_name)}")
    verdict.append("---")

    # Single Line Snippet
    verdict.append("📝 <b>Content Snippet (from Description):</b>")
    verdict.append(f"<i>{html.escape(single_line_snippet)}</i>")
    verdict.append("---")

    # Multi-Sentence Summary
    verdict.append("📋 <b>Summary (from Description):</b>") # New section title
    verdict.append(f"<i>{html.escape(multi_sentence_summary)}</i>") # Display longer summary
    verdict.append("---")


    verdict.append("💬 **Comment Analysis:**")
    verdict.append(f"  Comments Found: {comments_count}")
    if sentiment_results.get('error'):
        verdict.append(f"  Sentiment: <i>Error - {html.escape(sentiment_results['error'])}</i>")
    elif comments_count > 0:
        label = sentiment_results.get('label', 'Neutral')
        pos_count = sentiment_results.get('pos', 0)
        neg_count = sentiment_results.get('neg', 0)
        neu_count = sentiment_results.get('neu', 0)
        emoji = "😊" if "Positive" in label else ("😠" if "Negative" in label else "😐")
        verdict.append(f"  Overall Sentiment (Model): {emoji} <b>{label}</b>")
        verdict.append(f"     <i>(Pos: {pos_count}, Neg: {neg_count}, Neu: {neu_count})</i>")
    else:
         verdict.append(f"  Sentiment: <i>No comments found or analysis failed</i>")

    # Example Comments Section (Keep as before)
    if pos_examples:
        verdict.append("\n👍 <b>Example Positive Comments:</b>")
        for i, comment in enumerate(pos_examples): verdict.append(f"  {i+1}. <i>{html.escape(comment[:200])}...</i>")
    if neg_examples:
        verdict.append("\n👎 <b>Example Negative Comments:</b>")
        for i, comment in enumerate(neg_examples): verdict.append(f"  {i+1}. <i>{html.escape(comment[:200])}...</i>")

    verdict.append("---")

    # Keywords Section (Keep as before)
    verdict.append("🔑 **Keywords:**")
    if keywords: verdict.append(f"  <code>{html.escape(', '.join(keywords))}</code>")
    else: verdict.append("  <i>Could not extract keywords.</i>")

    # Transcript Status (Keep as before)
    if transcript_error:
         verdict.append("---")
         verdict.append(f"<i>Transcript Status: {html.escape(transcript_error)}</i>")

    return "\n".join(verdict)


# --- YouTube Orchestrator (Updated) ---
def analyze_youtube_video_orchestrator(video_id, num_summary_sentences=3): # Added parameter for summary length
    """Orchestrates YouTube analysis, including snippet, summary and example comments."""
    global sentiment_pipeline_yt

    if not video_id: return "<b>Error:</b> No video ID provided."
    logger.info(f"Orchestrating YouTube analysis for ID: {video_id}")

    # 1. Get Details
    video_details = get_video_details(video_id)
    if video_details.get('error') and not video_details.get('title'):
        return f"<b>Error:</b> Could not fetch video details. {html.escape(video_details.get('error','Unknown Error'))}"

    # 2. Get Comments
    comments, comments_error = get_youtube_comments(video_id)

    # 3. Get Transcript Status
    _, transcript_error = get_youtube_transcript(video_id)

    # 4. Analyze Comment Sentiment
    sentiment_results_yt = {'label': 'Neutral', 'pos': 0, 'neg': 0, 'neu': 0, 'error': comments_error}
    comment_sentiments_list = []
    if comments:
        if sentiment_pipeline_yt:
             comment_sentiments_list = [get_sentiment_label(c) for c in comments]
             sentiment_results_yt['pos'] = comment_sentiments_list.count("Positive")
             sentiment_results_yt['neg'] = comment_sentiments_list.count("Negative")
             sentiment_results_yt['neu'] = comment_sentiments_list.count("Neutral")
             sentiment_results_yt['label'] = calculate_overall_verdict(comment_sentiments_list)
             sentiment_results_yt['error'] = None
             logger.info(f"YT sentiment analysis complete for {len(comments)} comments.")
        else:
            sentiment_results_yt['error'] = "Sentiment Analyzer not available."
            logger.error(sentiment_results_yt['error'])
    elif comments_error:
         sentiment_results_yt['error'] = comments_error

    # 5. Extract Example Comments
    pos_example_comments, neg_example_comments = extract_example_comments(comments, num_examples=2)

    # 6. Extract Keywords
    keywords = extract_youtube_keywords(video_details.get('title'), video_details.get('description'), comments)

    # 7. Get Category Name
    category_name = get_category_name(video_details.get('category_id'))

    # 8. Get Summaries (Snippet AND Longer Summary)
    description_text = video_details.get('description')
    single_line_snippet = summarize_description(description_text, num_sentences=1)
    multi_sentence_summary = summarize_description(description_text, num_sentences=num_summary_sentences) # Use parameter

    # 9. Generate Verdict (Pass both summaries)
    verdict = format_youtube_verdict(
        video_details,
        len(comments) if comments else 0,
        sentiment_results_yt,
        keywords,
        category_name,
        single_line_snippet,     # Pass snippet
        multi_sentence_summary,  # Pass longer summary
        transcript_error,
        pos_examples=pos_example_comments,
        neg_examples=neg_example_comments
    )

    logger.info(f"YouTube analysis orchestration complete for ID: {video_id}")
    return verdict


print("--- YouTube Orchestrator & Formatter Defined (with Snippet, Summary & Examples) ---")

--- YouTube Orchestrator & Formatter Defined (with Snippet, Summary & Examples) ---


In [None]:
# Cell 6: Telegram Bot Code (YouTube Only)

# --- Imports ---
import logging
import asyncio
import html
# Ensure nest_asyncio was imported and applied in Cell 2
from telegram import Update, constants
from telegram.ext import (
    Application,
    CommandHandler,
    MessageHandler,
    filters,
    ContextTypes,
    ApplicationBuilder
)
from telegram.error import BadRequest
# --- Assumes previous cells (1-5) defining functions/globals ran ---
# --- Includes analyze_youtube_video_orchestrator from Cell 5 ---

# --- Bot Command Handlers ---

async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    user_name = update.effective_user.first_name
    cmd1_escaped = html.escape("/youtube <YouTube URL or Video Title>")
    await update.message.reply_html(
        f"Hi {user_name}!\n\n"
        f"I can analyze YouTube videos.\n\n"
        f"Use:\n"
        f"<code>{cmd1_escaped}</code>\n\n"
        f"Use /help for more info."
    )

async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    cmd1_escaped = html.escape("/youtube <YouTube URL or Video Title>")
    await update.message.reply_html(
        f"<b>YouTube Video Analyzer Bot</b>\n\n"
        f"Send the command:\n"
        f"<code>{cmd1_escaped}</code>\n\n"
        f"I will analyze the video's details (Title, Channel, Category), summarize its description, analyze comment sentiment, and extract keywords.\n\n"
        f"<i>Note: Transcript fetching from Colab is often blocked by YouTube.</i>"
    )


async def handle_youtube(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    """Handles the /youtube command, processing URL or title."""
    user_input = " ".join(context.args) if context.args else None
    if not user_input:
        await update.message.reply_text("Please provide a YouTube URL or Video Title after /youtube.")
        return

    logger.info(f"Received /youtube command with input: '{user_input}'")
    await update.message.reply_html("<i>Okay, looking for that YouTube video...</i>")
    await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)

    video_id = None
    video_title_search = None

    potential_id = extract_video_id(user_input)
    if potential_id:
        video_id = potential_id
        await update.message.reply_html(f"Analyzing YouTube video with ID: <code>{video_id}</code>...")
    else:
        if not youtube_service:
             await update.message.reply_html("<b>Error:</b> YouTube search unavailable (API Key/Service missing?). Please use a direct URL.")
             return
        await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)
        search_results = await asyncio.to_thread(search_youtube_video, user_input, max_results=1)
        if search_results is None: await update.message.reply_html("<b>Error:</b> YouTube search failed (API issue?)."); return
        elif not search_results: await update.message.reply_html(f"<b>Sorry:</b> Couldn't find YouTube videos matching '<i>{html.escape(user_input)}</i>'."); return
        else:
            top_result = search_results[0]
            video_id = top_result['video_id']
            video_title_search = top_result['title']
            channel = top_result['channel_title']
            await update.message.reply_html(f"Found: <b>{html.escape(video_title_search)}</b> by <i>{html.escape(channel)}</i>.\nAnalyzing...")

    if video_id:
        await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)
        try:
            if not youtube_service: # Check again before analysis call
                 raise ConnectionError("YouTube API Service not available for analysis.")

            # Run the YouTube orchestrator (Cell 5) asynchronously
            verdict = await asyncio.to_thread(analyze_youtube_video_orchestrator, video_id)
            await update.message.reply_html(verdict) # Formatter (Cell 5) produces HTML

        except ConnectionError as ce:
             logger.error(f"YouTube analysis connection error for ID {video_id}: {ce}")
             await update.message.reply_html(f"<b>Error:</b> Cannot perform YouTube analysis. {html.escape(str(ce))}")
        except Exception as e:
            logger.error(f"Error during YouTube analysis orchestration for ID {video_id}: {e}", exc_info=True)
            await update.message.reply_html(f"<b>Error:</b> An unexpected error occurred during YouTube analysis: <i>{html.escape(type(e).__name__)}</i>")
    else:
        await update.message.reply_html("<b>Error:</b> Could not determine the YouTube video to analyze.")


async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
    """Log Errors, potentially ignore HTML parsing errors."""
    if isinstance(context.error, BadRequest) and "message is not modified" in str(context.error).lower():
        logger.warning(f"Ignoring 'Message is not modified' error: {context.error}")
        return # Don't log this common benign error
    if isinstance(context.error, BadRequest) and "Can't parse entities" in str(context.error):
         logger.warning(f"Ignoring HTML parse error: {context.error}")
         return
    logger.error(f"Update {update} caused error {context.error}", exc_info=context.error)


# --- Main Function to Run the Bot ---
def run_bot():
    """Sets up and runs the YouTube-focused Telegram bot."""
    global TELEGRAM_BOT_TOKEN, youtube_service, sentiment_pipeline_yt, sentence_tokenizer
    if not TELEGRAM_BOT_TOKEN:
        logger.critical("FATAL: Telegram Bot Token not found. Cannot start.")
        return

    # Optional: Re-check critical components loaded in Cell 3 before starting
    if not youtube_service: logger.warning("YouTube service not initialized at bot start.")
    if not sentiment_pipeline_yt: logger.warning("Sentiment pipeline not loaded at bot start.")
    if not sentence_tokenizer: logger.warning("Sentence tokenizer not loaded at bot start.")


    builder = Application.builder().token(TELEGRAM_BOT_TOKEN)
    application = builder.build()

    # Register command handlers (YouTube only)
    application.add_handler(CommandHandler("start", start))
    application.add_handler(CommandHandler("help", help_command))
    application.add_handler(CommandHandler("youtube", handle_youtube))

    application.add_error_handler(error_handler)

    logger.info("Starting YouTube Analyzer bot polling in Colab...")
    print("--- Bot is starting (YouTube Analyzer ONLY) ---")
    print("--- Make sure API Key & Token are in Colab Secrets ---")
    print("--- Ensure Cells 1-6 have run successfully ---")
    print("Send commands to your bot in Telegram.")
    print("Interrupt the kernel (Stop button) to stop the bot.")
    try:
        application.run_polling(allowed_updates=Update.MESSAGE)
    except KeyboardInterrupt:
        print("\nBot stopped manually.")
        logging.info("Bot stopped manually.")
    except Exception as e:
         print(f"\nAn critical error occurred running the bot: {e}")
         logging.critical(f"An critical error occurred running the bot application: {e}", exc_info=True)
    finally:
         print("Bot polling finished.")
         logging.info("Bot polling finished.")


# --- Start the bot ---
if __name__ == "__main__":
    run_bot()

--- Bot is starting (YouTube Analyzer ONLY) ---
--- Make sure API Key & Token are in Colab Secrets ---
--- Ensure Cells 1-6 have run successfully ---
Send commands to your bot in Telegram.
Interrupt the kernel (Stop button) to stop the bot.
