## Aim

Goal : Convert youtube video podcasts into concise audio summaries

Plan : 

- Task 1 : Retrieve Transcripts: Download transcripts from YouTube video podcasts
- Task 2 : Summarise Transcripts: Generate concise summaries from downloaded transcripts
- Task 3 : Convert Summaries to Audio: Use text-to-speech to create audio summaries 

## Tasks

=> Add tokens to wrapper for both cases

In [1]:
# imports

import enum
import instructor
import os
import re
from abc import ABC, abstractmethod
from dotenv import load_dotenv
from openai import OpenAI
from pathlib import Path
from pprint import pprint as pp
from pydantic import BaseModel, Field, StringConstraints, conlist, field_validator
from typing import Any, Iterable, List, Optional, Union
from typing_extensions import Annotated
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter, TextFormatter

In [2]:
# load API key

dotenv_path = Path(r"C:\Storage\python_projects\ashvin\.env")
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# main constants

GPT_MODEL = "gpt-4o" # points to latest GPT model

#instantiate client
client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS)
audio_client = OpenAI()

In [3]:
# AbstractTool and Tool classes


class AbstractTool(ABC):
    """
    Abstract base class for tools.

    Attributes:
        name (str): The name of the tool.
        description (str): A brief description of what the tool does.
        system_prompt (str | None): The system prompt used by the tool, if applicable.
    """

    name: str
    description: str
    system_prompt: str | None = None

    @abstractmethod
    def run(self, input_data: Any) -> Any:
        pass

class Tool(BaseModel, AbstractTool):
    """
    A tool model that inherits from both BaseModel and AbstractTool.

    This class combines Pydantic's data validation features with the interface 
    enforcement of an abstract base class.

    Attributes:
        name (str): The name of the tool.
        description (str): A brief description of what the tool does.
        system_prompt (str | None): The system prompt used by the tool, if applicable.
    """

    name: str
    description: str
    system_prompt: str | None = None

    def run(self, input_data: Any) -> Any:
        return f"Running with {input_data}"




In [4]:
# wrapper

def wrapper(
    system_prompt: str | None = None, 
    user_prompt: Union[str, list] | None = None, 
    response_model: BaseModel | None = None, 
    max_retries: int = 3, 
    additional_messages: Union[str, List[str]] | None = None
):
    """Wrapper function to generate LLM completion"""
    messages = []

    # Add system prompt if provided
    if system_prompt is not None:
        messages.append({"role": "system", "content": system_prompt})

    # Add additional messages before user_prompt
    if additional_messages is not None:
        if isinstance(additional_messages, list):
            for message in additional_messages:
                messages.append({"role": "user", "content": message})
        else:
            messages.append({"role": "user", "content": additional_messages})

    # Add user context if provided
    if user_prompt is not None:
        if isinstance(user_prompt, list):
            for context in user_prompt:
                messages.append({"role": "user", "content": context})
        else:
            messages.append({"role": "user", "content": user_prompt})

    # Generate the completion
    completion = client.chat.completions.create(
        model=GPT_MODEL,
        response_model=response_model,
        max_retries=max_retries,
        messages=messages
    )
    
    # Check if response_model is None and return appropriate result
    if response_model is None:
        return completion.choices[0].message.content.strip()
    else:
        return completion

In [None]:
class SimpleRouterAgent:
    """
    Simple router agent that selects and runs the appropriate tool based on the message.
    """
    def __init__(self, tools: List[AbstractTool]):
        self.tools = {tool.name: tool for tool in tools}
        self.system_prompt: str | None = "You are an intelligent agent that selects the best tool for the given task."

    def _select_tool(self, message: str) -> AbstractTool:
        """
        Uses the LLM to select the appropriate tool based on the provided message.

        Args:
            message (str): The message describing the task to be performed.

        Returns:
            AbstractTool: The selected tool for performing the task.
        """
        user_prompt = f"Which tool should be used for the following message: '{message}'?"
        additional_messages = [tool.description for tool in self.tools.values()]

        completion = wrapper(
            system_prompt=self.system_prompt,
            user_prompt=user_prompt,
            additional_messages=additional_messages,
            response_model=None,
            max_retries=3
        )

        # Extracting the tool name from the LLM response
        tool_name = completion['choices'][0]['message']['content'].strip()

        # Finding the tool by name
        selected_tool = self.tools.get(tool_name)
        if not selected_tool:
            raise ValueError(f"No matching tool found for: {tool_name}")

        return selected_tool

    def run(self, message: str) -> str:
        """
        Selects and runs the appropriate tool based on the message.

        Args:
            message (str): The message describing the task to be performed.

        Returns:
            str: The result of running the tool.
        """
        tool = self._select_tool(message)
        return tool.run(message)

In [5]:
class Transcript(Tool):
    """
    A tool for extracting and formatting YouTube video transcripts.

    This class inherits from Tool and overrides the run method to extract
    the YouTube video ID from a given URL, retrieve the transcript, and 
    format it as a JSON string.
    """

    name: str = "YouTube Transcript Extractor"
    description: str = "Extracts the YouTube video ID from a URL, retrieves the transcript, and formats it as JSON."
    system_prompt: str | None = None

    def run(self, url: str) -> Optional[str]:
        """
        Extract the YouTube video ID from a given URL, retrieve the transcript,
        and format it as a JSON string.

        Parameters:
            url (str): The YouTube URL from which to extract the video ID.

        Returns:
            Optional[str]: The JSON formatted transcript if the video ID is valid and the
                           transcript is available, otherwise None.
        """
        # Regular expression to find the video ID in a YouTube URL
        pattern = r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})'
        match = re.search(pattern, url)
        if not match:
            print("No valid YouTube video ID found in the provided URL.")
            return None

        video_id = match.group(1)

        try:
            # Retrieve the transcript
            transcript = YouTubeTranscriptApi.get_transcript(video_id)

            # Format the transcript as JSON
            formatter = JSONFormatter()
            json_formatted_transcript = formatter.format_transcript(transcript)

            return json_formatted_transcript
        except Exception as e:
            print(f"Error retrieving or formatting transcript: {e}")
            return None



In [12]:
# summary tool

class Summary(Tool):
    """
    A tool for summarizing a given text.

    This class inherits from Tool and provides functionality to
    summarize input text.
    """

    # tool properties
    name: str = "Text Summarizer"
    description: str = "Summarizes the input text into a concise version."
    system_prompt: str | None = """
    You are an expert podcast summariser, condensing information into digestible summaries with appropriate signposting.
    Provide a concise, clear, and understandable summary of the given text. 
    Include upfront a one sentence TL;DR" 
    """

    # output property
    summary: str = Field(None, description="The summarised version of the input text.")
    
    def run(self, text: str) -> Any:
        """
        Summarize the input text.

        Parameters:
            input_data (str): The input text to summarize.

        Returns:
            str: The summarized text.
        """
        completion = wrapper(
            system_prompt=self.system_prompt,
            user_prompt=text,
            response_model=Summary,
            max_retries=3
        )

        return completion

In [7]:
# text to speech








In [8]:
url="https://www.youtube.com/watch?v=Mn0nBjqgBcs&t"

In [9]:
transcript_tool = Transcript()
transcript = transcript_tool.run(url=url)

In [16]:
summary_tool = Summary()
summary = summary_tool.run(transcript)

In [17]:
summary.summary

"The review discusses the board game adaptation of 'Slay the Spire', a video game known for its exceptional deck-building mechanics. Initially skeptical about the adaptation, the reviewer was impressed by how well it translates to the board game format, maintaining core game elements and cooperative features while simplifying for physical play. Despite its high quality and new cooperative mode, the reviewer remains unconvinced of its necessity, noting that the digital version offers a similar experience for less money and less admin work. The game is lauded for its faithful adaptation and fun gameplay but is seen as potentially redundant for those already enjoying the digital version."

In [18]:
audio_tool = Audio()
audio = audio_tool.run(text=summary.summary)
print(audio)

c:\Storage\python_projects\ashvin\sandbox\pydantic\audio.mp3


In [53]:
from __future__ import annotations


class User(BaseModel):
    name: str
    age: str

class City(BaseModel):
    country: str
    capital: str

class Taxonomy(BaseModel):
    Kingdom: str
    Phylum: str
    Class: str
    Order: str
    Family: str
    Genus: str
    Species: str

class Router(BaseModel):
    tool_list: Union[User, City, Taxonomy]

new_client = instructor.from_openai(
    OpenAI(), mode=instructor.Mode.TOOLS
)



test_system_prompt = "You are an intelligent router. Select and return the single right tool for the user."
test_user_prompt = "I'm Ashvin"
test_messages = [
    {"role": "system", "content": test_system_prompt},
    {"role": "user", "content": test_user_prompt},
]

completion = new_client.chat.completions.create(
    model=GPT_MODEL,
    response_model=Router, 
    messages=test_messages
)


In [54]:
completion

Router(tool_list=User(name='Ashvin', age='unknown'))