<a href="https://colab.research.google.com/github/manjunatharao26/genai-capstone/blob/main/Copy_of_POCmanju.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install  -Uqq "google-genai==1.7.0"
!pip install -Uqq newspaper3k lxml_html_clean
!pip install -Uqq youtube-transcript-api newspaper3k

In [10]:
from google import genai
from google.genai import types


genai.__version__

'1.7.0'

In [11]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

In [14]:
import google.generativeai as genai
import os


genai.configure(api_key=GOOGLE_API_KEY)

# Gemini LLM call
def call_llm(prompt: str) -> str:
    try:
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"[Gemini API Error: {e}]"


In [18]:
# News & YouTube Summarizer - Starter Notebook

import re
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from newspaper import Article
from IPython.display import display, Markdown




# Helper to check if URL is YouTube
def is_youtube_url(url: str) -> bool:
    return 'youtube.com/watch' in url or 'youtu.be/' in url

# Extract video ID from YouTube URL
def extract_video_id(url: str) -> str:
    if 'youtu.be/' in url:
        return url.split('/')[-1]
    match = re.search(r"v=([\w-]+)", url)
    return match.group(1) if match else None

def get_youtube_transcript(video_id: str) -> str:
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry['text'] for entry in transcript])
    except Exception as e:
        return "[Transcript unavailable due to IP restrictions. Please provide transcript manually.]"



# Get article text from news URL
def get_news_article_text(url: str) -> str:
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        return f"[Error fetching article: {e}]"




# --- Manual Transcript Fallback ---
def summarize_manual_transcript(transcript: str) -> str:
    if not transcript or "Transcript unavailable" in transcript:
        return "[No transcript available to summarize.]"

    prompt = (
        "You're a helpful assistant. Summarize the following transcript from a YouTube video. "
        "Provide a TL;DR followed by 3-5 bullet points for key takeaways.\n\n"
        f"{transcript}"
    )
    return call_llm(prompt)

# --- Manual Transcript Fallback ---
def summarize_content(transcript: str) -> str:
    if not transcript or "Transcript unavailable" in transcript:
        return "[No transcript available to summarize.]"

    prompt = (
        "You're a helpful assistant. Summarize the following transcript from a YouTube video/an article data. "
        "Provide a TL;DR followed by 3-5 bullet points for key takeaways.\n\n"
        f"{transcript}"
    )
    return call_llm(prompt)

def parse_srt_file(file_path: str) -> str:
    with open(file_path, 'r', encoding='utf-8') as f:
        srt_content = f.read()

    # Remove SRT index and timecodes
    cleaned = re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", srt_content)
    # Remove empty lines
    cleaned = re.sub(r"\n{2,}", "\n", cleaned)
    return cleaned.strip()

# Main function to handle URL
def summarize_url(url: str):
    if is_youtube_url(url):
        video_id = extract_video_id(url)
        if not video_id:
            print("Invalid YouTube URL.")
            return
        print("Fetching YouTube transcript...")
        content = get_youtube_transcript(video_id)
    else:
        print("Fetching news article text...")
        content = get_news_article_text(url)

    print("Generating summary with LLM...")
    summary = summarize_content(content)
    display(Markdown(f"## ✨ Summary\n{summary}"))

# Example usage (replace with any YouTube or news URL)
# summarize_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
# summarize_url("https://www.bbc.com/news/world-asia-68944499")


In [22]:
# uncomment prompt for user prompts to provide url.

#url = input("Enter the full path to your YT video/news article: ")
url="https://www.youtube.com/watch?v=TQd3eGjaTOo"
summarize_url(url)

Fetching YouTube transcript...
Generating summary with LLM...


## ✨ Summary
Here's a summary of the transcript:

**TL;DR:** Coding transcends technical skills, offering profound mental transformation. It fosters a growth mindset, clarity, confidence, and disciplined thinking, liberating individuals from limiting circumstances and empowering them to address global challenges.

**Key Takeaways:**

*   **Coding's transformative power:** It's more than just a job skill, it's an activity that can drastically reshape one's mindset and perceptions, fostering confidence and a growth-oriented approach.
*   **Neuroplasticity and coding:** Learning and debugging code actively restructures the brain, creating new neural pathways that enhance cognitive flexibility and problem-solving abilities in various aspects of life.
*   **Discipline and Logic:** Coding necessitates precision and adherence to rules, cultivating a meticulous and orderly mindset applicable to real-world situations beyond the digital realm.
*   **Breaking cycles:** Coding can break cycles of poverty and mediocrity by empowering people to think differently and see new possibilities.
*   **Beyond Personal Growth:** By cultivating problem-solving skills, it positions individuals to tackle significant global and healthcare challenges, contributing to societal improvement.


In [23]:
#transcript = parse_srt_file("/content/sample_data/How coding can change your life_TEDx.srt")
#summary = summarize_manual_transcript(transcript)
#display(Markdown(summary))