In [1]:
# goal
goal = "Convert a long YouTube transcript into structured notes and diagrams for a local material-mkdocs site."

# tasks
task_1 = "Download the complete transcript from the YouTube video."
task_2 = "Collect video metadata (title, author, date) from YouTube."
task_3 = "Identify key topics, keywords and questions from the transcript."
task_4 = "Draft an outline with clear sections and subsections based on the concatenated content from the previous tasks."
task_5 = "Populate each section and subsection in the outline with detailed content."
task_6 = "Create relevant mermaid diagrams and integrate them into the content."
task_7 = "Assemble each section into individual Markdown files, including relevant diagrams and metadata."
task_8 = "Compile all Markdown files into the material-mkdocs site structure."
task_9 = "Launch the local mkdocs site to verify the structure and content."

In [2]:
# urls

url_1 = "https://www.youtube.com/watch?v=hvAPnpSfSGo" # langraph

## Things I want to try

- [X] cost decorator
- [ ] try different models
- [X] improve my video_id function
- [ ] like Josh's idea of replace the website domain and get a custom site
- [ ] try Surya
- [X] make url a const
- [ ] router run everything in sequence


In [3]:
# imports

import enum
import instructor
import json
import os
import re
import uuid
from abc import ABC, abstractmethod
from datetime import datetime
from dotenv import load_dotenv
from googleapiclient.discovery import build
from openai import OpenAI
from pathlib import Path
from pprint import pprint as pp
from pydantic import BaseModel, Field, StringConstraints, UUID4, conlist, constr, field_validator
import tiktoken
import time
from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Type, Union
from typing_extensions import Annotated, Literal
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter, TextFormatter

In [4]:
# load API key

dotenv_path = Path(r"C:\Storage\python_projects\ashvin\.env")
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

# main constants

GPT_MODEL = "gpt-4o" # points to latest GPT model
URL = url_1


#instantiate client
client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS)
audio_client = OpenAI()

In [5]:
# cost decorator

def cost(function: Callable) -> Callable:
    """
    Decorator to calculate and add the cost of token usage based on predefined model pricing.
    
    This decorator enriches the output of the decorated function by calculating the cost
    based on the number of prompt and completion tokens used. The costs are computed
    according to a hardcoded pricing table for supported models.

    Args:
        function (Callable): The function to be decorated, expected to return an instance
                             of a model with token counts included.

    Returns:
        Callable: A decorator that enhances the function's output with cost calculations.
    """

    # Define the pricing table within the decorator
    pricing = {
        'gpt-4o': {
            'input': 5.00 / 1000000,  # $5.00 per 1M tokens
            'output': 15.00 / 1000000  # $15.00 per 1M tokens
        }
    }

    def decorated_function(*args, **kwargs) -> Dict[str, Any]:
        # Call the original function and capture its output
        result = function(*args, **kwargs)
        
        # Extract token counts using dot notation
        prompt_tokens = result.token_counts.prompt_tokens
        completion_tokens = result.token_counts.completion_tokens

        # Determine the model used; default to 'gpt-4o' for now
        model = 'gpt-4o'  # This could be dynamically determined based on args/kwargs if needed

        # Calculate costs based on the price table for the specific model
        input_cost = prompt_tokens * pricing[model]['input']
        output_cost = completion_tokens * pricing[model]['output']
        total_cost = input_cost + output_cost
        
        # Format and assign cost details
        result.cost_details = {
            'input_cost': f"${input_cost:.6f}",
            'output_cost': f"${output_cost:.6f}",
            'total_cost': f"${total_cost:.6f}"
        }

        # Optionally print cost details for transparency
        print(f"Cost Details: {result.cost_details}")
        return result

    return decorated_function

In [18]:
# v2 cost decorator

class CostDetails(BaseModel):
    input_cost: float
    output_cost: float
    total_cost: float

    def formatted_input_cost(self):
        return f"${self.input_cost:.6f}"

    def formatted_output_cost(self):
        return f"${self.output_cost:.6f}"

    def formatted_total_cost(self):
        return f"${self.total_cost:.6f}"

def cost(function: Callable) -> Callable:
    """
    Decorator to calculate and add the cost of token usage based on predefined model pricing.
    
    This decorator enriches the output of the decorated function by calculating the cost
    based on the number of prompt and completion tokens used. The costs are computed
    according to a hardcoded pricing table for supported models.

    Args:
        function (Callable): The function to be decorated, expected to return an instance
                             of a model with token counts included.

    Returns:
        Callable: A decorator that enhances the function's output with cost calculations.
    """

    # Define the pricing table within the decorator
    pricing = {
        'gpt-4o': {
            'input': 5.00 / 1000000,  # $5.00 per 1M tokens
            'output': 15.00 / 1000000  # $15.00 per 1M tokens
        }
    }

    def decorated_function(*args, **kwargs) -> Any:
        # Call the original function and capture its output
        result = function(*args, **kwargs)
        
        # Extract token counts using dot notation
        prompt_tokens = result.token_counts.prompt_tokens
        completion_tokens = result.token_counts.completion_tokens

        # Determine the model used; default to 'gpt-4o' for now
        model = 'gpt-4o'  # This could be dynamically determined based on args/kwargs if needed

        # Calculate costs based on the price table for the specific model
        input_cost = prompt_tokens * pricing[model]['input']
        output_cost = completion_tokens * pricing[model]['output']
        total_cost = input_cost + output_cost
        
        # Assign cost details using the CostDetails model
        result.cost_details = CostDetails(
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost
        )

        # Optionally print formatted cost details for transparency
        print(f"Cost Details: Input: {result.cost_details.formatted_input_cost()}, Output: {result.cost_details.formatted_output_cost()}, Total: {result.cost_details.formatted_total_cost()}")
        return result

    return decorated_function

In [19]:
# wrapper

@cost
def wrapper(
    system_prompt: str | None = None, 
    user_prompt: Union[str, List[str]] | None = None, 
    response_model: BaseModel | None = None, 
    max_retries: int = 3, 
    additional_messages: Union[str, List[str]] | None = None
) -> 'WrapperOutput':
    """
    Generates LLM completions using provided parameters and collects token usage information.
    
    This function dynamically constructs a message array for the LLM based on input parameters,
    handles the completion process using either standard or model-based completions depending on 
    the presence of a response model, and returns structured outputs including both the completion 
    response and token usage statistics.

    Args:
        system_prompt (str, optional): System-level initial prompt or instruction.
        user_prompt (Union[str, List[str]], optional): User-provided content or context as a single string or list of strings.
        response_model (BaseModel, optional): Pydantic model to structure the response when using model-specific completions.
        max_retries (int): Maximum number of retries for the LLM request.
        additional_messages (Union[str, List[str]], optional): Additional messages to precede the user prompt.

    Returns:
        WrapperOutput: A Pydantic model containing the LLM response and detailed token counts.

    Classes Defined Inside:
        TokenCounts: A Pydantic model detailing the counts of different types of tokens.
        WrapperOutput: A Pydantic model encapsulating the response and TokenCounts model.
    """

    class TokenCounts(BaseModel):
        completion_tokens: int
        prompt_tokens: int
        total_tokens: int

    class WrapperOutput(BaseModel):
        response: Union[str, BaseModel]
        token_counts: TokenCounts
        cost_details: Optional[Dict[str, str]] = None

    messages = []

    # Construct the messages list based on provided inputs
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    if additional_messages:
        # Can handle both list of messages or a single string
        if isinstance(additional_messages, List):
            messages.extend([{"role": "user", "content": message} for message in additional_messages])
        else:
            messages.append({"role": "user", "content": additional_messages})

    if user_prompt:
        # Similarly, handles both single and multiple user prompts
        if isinstance(user_prompt, List):
            messages.extend([{"role": "user", "content": context} for context in user_prompt])
        else:
            messages.append({"role": "user", "content": user_prompt})

    # Generate the completion and extract token counts based on the presence of a response model
    if response_model is None:
        # Standard completion process without a structured model
        completion = client.chat.completions.create(
            model=GPT_MODEL,
            response_model=None,
            max_retries=max_retries,
            messages=messages
        )
        response_content = completion.choices[0].message.content.strip()
        token_counts = TokenCounts(
            completion_tokens=completion.usage.completion_tokens,
            prompt_tokens=completion.usage.prompt_tokens,
            total_tokens=completion.usage.total_tokens
        )
    else:
        # Model-based completion that structures the response as per the specified BaseModel
        structured_response, raw_completion = client.chat.completions.create_with_completion(
            model=GPT_MODEL,
            response_model=response_model,
            max_retries=max_retries,
            messages=messages
        )
        response_content = structured_response
        token_counts = TokenCounts(
            completion_tokens=raw_completion.usage.completion_tokens,
            prompt_tokens=raw_completion.usage.prompt_tokens,
            total_tokens=raw_completion.usage.total_tokens
        )

    return WrapperOutput(response=response_content, token_counts=token_counts)

In [20]:
completion = wrapper(
    system_prompt="Write me a short story of 100 words",
    response_model=None
)

Cost Details: Input: $0.000080, Output: $0.001995, Total: $0.002075


Input Cost: $0.000080000000000000006544244313


In [9]:
pp(f"The completion type is : {type(completion)}")
pp(f"The completion is : {completion}")
print("\n")
print(f"The completion response type is : {type(completion.response)}")
print(f"The completion response is : {completion.response}")
print("\n")
print(f" the completion token counts type is : {type(completion.token_counts)}")
print(f" the completion token counts is : {(completion.token_counts)}")
print("\n")
print(f" the completion token counts - completion tokens type is : {type(completion.token_counts.completion_tokens)}")
print(f" the completion token counts - completion tokens is : {completion.token_counts.completion_tokens}")

"The completion type is : <class '__main__.wrapper.<locals>.WrapperOutput'>"
("The completion is : response='In the heart of Willowbrook, young Emma found "
 'a diary in an abandoned trunk. The pages, yellowed with age, told the story '
 'of Lina, a girl who lived a century ago. Emma read about Lina’s dreams, '
 'adventures, and unfulfilled wish to see the world. Inspired, Emma decided to '
 'travel, carrying Lina’s diary. She visited places Lina had only imagined, '
 'jotting down her own experiences alongside Lina’s faded ink. Emma’s final '
 'stop was the cliff Lina often dreamed of. She released the diary into the '
 'wind, whispering a promise that Lina’s stories would live on, carried by the '
 "breeze across distant lands.' "
 'token_counts=TokenCounts(completion_tokens=129, prompt_tokens=16, '
 "total_tokens=145) cost_details={'input_cost': '$0.000080', 'output_cost': "
 "'$0.001935', 'total_cost': '$0.002015'}")


The completion response type is : <class 'str'>
The completion 

In [None]:

class UserExtract(BaseModel):
    name: str
    age: int


completion = wrapper(
    system_prompt="extract the user details",
    user_prompt="Jason is a 25 year old asian male",
    response_model=UserExtract
)



In [None]:
pp(f"The completion type is : {type(completion)}")
pp(f"The completion is : {completion}")
print("\n")
print(f"The completion response type is : {type(completion.response)}")
print(f"The completion response is : {completion.response}")
print("\n")
print(f" the completion token counts type is : {type(completion.token_counts)}")
print(f" the completion token counts is : {(completion.token_counts)}")
print("\n")
print(f" the completion token counts - completion tokens type is : {type(completion.token_counts.completion_tokens)}")
print(f" the completion token counts - completion tokens is : {completion.token_counts.completion_tokens}")


In [None]:
print(completion)

In [None]:
print(completion.cost_details
      )