In [1]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType

In [4]:
def parse_url(url: str) -> str:
    """
    Extract video ID from URL.

    Args: 
        url(str): youtube video url

    Returns:
        Youtube video's video ID
    
    """
    if "=" in url:
        return url.split("=")[-1]

    return url


def get_text_from_video(url: str) -> str:
    """
    Get transcript text from YouTube video.

    Args:
        url(str): youtube video url

    Returns:
        Youtube video's transcripted text
    
    """
    video_id = parse_url(url)
    
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = " ".join([entry["text"] for entry in transcript])
        transcript_text = transcript_text.replace("\n", " ").replace("'", "")
        return transcript_text
    except Exception as e:
        return f"Failed to retrieve transcript: {str(e)}"

In [6]:
dimon = get_text_from_video("https://www.youtube.com/watch?v=4-PXch3ZR0Y")

In [7]:
with open('dimon.txt',"w") as f:
    f.write(dimon)

In [11]:
len(dimon)

4282

In [3]:
#load text from file
with open("From Surviving to Thriving: My Midlife Reset.txt", "r") as f:
    transcript = f.read()

In [4]:
def create_chunks(transcript_text: str) -> list:
    """
    Split transcript text into processable chunks.

    Args:
        transcript_text (str): Youtube video's transcripted text

    Returns:
        processable chunks
    
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_text(transcript_text)
    return chunks

In [5]:
chunks = create_chunks(transcript)

In [6]:
chunks

["Hello YouTube friends and as you will notice this video is looking a little bit different for me and I'm about to embark on something new and if I'm being honest I'm actually a little bit scared about it but let me explain. Life seems to have this way of throwing curveballs, sometimes gentle ones, sometimes the kind that kind of just knock the wind right out of you. Since turning 50 I've faced some of the hardest moments of my life. Going through a divorce, navigating life on my own as a single parent, selling the home I thought I'd grow old in, buying and renovating a new one for me and my two sons and basically doing everything pretty much on my own. Now I'm not saying this is a woe is me, I'm just trying to set the scene. Oh and somewhere in the middle of all of that I did something I swore I wouldn't do, I adopted a puppy and let me tell you he's been a handful but I wouldn't trade him for the world. But recently I felt something shift, maybe it was all the unexpected twists and"

In [7]:
chunks[0]

"Hello YouTube friends and as you will notice this video is looking a little bit different for me and I'm about to embark on something new and if I'm being honest I'm actually a little bit scared about it but let me explain. Life seems to have this way of throwing curveballs, sometimes gentle ones, sometimes the kind that kind of just knock the wind right out of you. Since turning 50 I've faced some of the hardest moments of my life. Going through a divorce, navigating life on my own as a single parent, selling the home I thought I'd grow old in, buying and renovating a new one for me and my two sons and basically doing everything pretty much on my own. Now I'm not saying this is a woe is me, I'm just trying to set the scene. Oh and somewhere in the middle of all of that I did something I swore I wouldn't do, I adopted a puppy and let me tell you he's been a handful but I wouldn't trade him for the world. But recently I felt something shift, maybe it was all the unexpected twists and"

In [8]:
def get_summary(chunks: list) -> str:
    """
    Summarize text chunks and create a single summary.
    
    Args:
        chunks (list): processable chunks of transcriptted text

    Returns:
        A single summary for youtube video
    """
    llm = OllamaLLM(model="llama3")

    template = """Text: {text}
    Goal: Summarize given text.
    Answer: """

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    summaries = [chain.invoke({"text": chunk}) for chunk in chunks]
    
    combined_summary = " ".join(summaries)
    
    # Create final summary
    final_summary_prompt = ChatPromptTemplate.from_template(
        "Multiple summaries: {summaries}\nGoal: Create a coherent single summary.\nAnswer: "
    )
    final_summary_chain = final_summary_prompt | llm
    final_summary = final_summary_chain.invoke({"summaries": combined_summary})
    
    return final_summary

In [9]:
ds_summ = get_summary(chunks)

In [10]:
ds_summ

"Here's a concise summary:\n\nA 50-year-old individual embarks on a new journey, transitioning from mere survival to thriving. After facing various life challenges, including divorce, single parenthood, and significant life changes, they've realized the importance of living intentionally. To share their experiences and connect with others who may be facing similar struggles, they're starting a vlog that documents their self-discovery, adventure, and growth. The speaker aims to inspire others by showcasing everyday moments, sharing lessons learned from parenting, and exploring healthy living practices. By doing so, they hope to demonstrate that even small moments can hold beauty and significance, and that it's never too late to start making positive changes in life."

In [11]:
with open("from_surviving.txt", "w") as f:
    f.write(ds_summ)

In [12]:
def extract_topics(chunks:list) -> list:
    """
    Extract main topics from text chunks.
    
    Args:
        chunks (list): processable chunks of transcriptted text
    
    Returns:
        Main topic list
    """
    llm = OllamaLLM(model="llama3")

    template = """Text: {text}
    Goal: Extract main topics from the given text.
    Answer: List the key topics separated by commas."""

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    topics_list = [chain.invoke({"text": chunk}) for chunk in chunks]

    # Combine topics from different chunks
    all_topics = set()
    for topics in topics_list:
        # Split comma-separated topics and clean whitespace
        topic_items = [t.strip() for t in topics.split(",")]
        all_topics.update(topic_items)

    # Remove empty elements
    all_topics = {topic for topic in all_topics if topic}
    
    return list(all_topics)

In [13]:
tops = extract_topics(chunks)

In [14]:
len(tops)

63

In [15]:
def extract_quotes(chunks:list) -> list:
    """
    Extract important quotes from text chunks.
    
    Args:
        chunks (list): processable chunks of transcriptted text
    
    Returns:
        important quotes list
    """
    llm = OllamaLLM(model="llama3")
    template = """Text: {text}
    Goal: Extract the most important quote from this text.
    Answer: Provide the quote as plain text."""

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    quotes = [chain.invoke({"text": chunk}) for chunk in chunks]
    
    # Filter duplicate or empty quotes
    unique_quotes = []
    seen_quotes = set()
    
    for quote in quotes:
        # Normalize quote (clean whitespace and compare lowercase)
        normalized = quote.strip().lower()
        if normalized and normalized not in seen_quotes:
            unique_quotes.append(quote.strip())
            seen_quotes.add(normalized)
    
    return unique_quotes

In [16]:
quotes = extract_quotes(chunks)

In [17]:
quotes

['"I\'m not saying this is a woe is me, I\'m just trying to set the scene."',
 '"In 2025 I\'m making a conscious decision to not just survive but to thrive."',
 '"I want to look back on this time in my life and feel like I gave it everything I had, whether that be emotionally, physically, spiritually, just every way."',
 '"So many women reach this point in life and wonder what now..."\n\n(This is the most important quote mentioned in the text, which highlights the theme of the vlog about navigating different stages of life and finding a sense of purpose.)',
 'Here is the most important quote from the text:\n\n"Maybe the real magic is in these everyday moments, the little things we overlook, the lessons hidden in the ordinary..."\n\nLet me know if you need anything else!',
 '"Not that I\'ve ever really played small, but I have spent time kind of waiting for the perfect time to..."',
 '"So let\'s start thriving together."']