In [1]:
# goal
goal = "Create structured notes from a long YouTube video transcript."

# tasks
task_1 = "Download Transcript: Obtain the complete transcript from the YouTube video."
task_2 = "Extract Video Metadata: Collect essential metadata (video title, author, date) from YouTube."
task_3 = "Extract Text Metadata: Identify key topics and keywords from the transcript."
task_4 = "Create Outline: Draft an outline with clear sections and subsections based on the transcript."
task_5 = "Detail Subsections: Fill each subsection using specific content from the transcript."
task_6 = "Compile Document: Assemble sections into one Markdown file, including all metadata for context."

# wishlist

future = [
    "convert audio to transcript", 
    "Literal for toolbox?", 
    "audio longer than 4 mins?", 
    "some sort of exa search",
    "use Surya for arxiv pdf"
    ]

In [2]:
# imports

import enum
import instructor
import json
import os
import re
import uuid
from abc import ABC, abstractmethod
from datetime import datetime
from dotenv import load_dotenv
from googleapiclient.discovery import build
from openai import OpenAI
from pathlib import Path
from pprint import pprint as pp
from pydantic import BaseModel, Field, StringConstraints, UUID4, conlist, constr, field_validator
import tiktoken
import time
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Type, Union
from typing_extensions import Annotated, Literal
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter, TextFormatter

In [3]:
# load API key

dotenv_path = Path(r"C:\Storage\python_projects\ashvin\.env")
load_dotenv(dotenv_path=dotenv_path)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

# main constants

GPT_MODEL = "gpt-4o" # points to latest GPT model

#instantiate client
client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS)
audio_client = OpenAI()

In [4]:
url = "https://www.youtube.com/watch?v=P1ww1IXRfTA"
url_2 = "https://www.youtube.com/watch?v=OY6ywiMknvs&t"
url_3 = "https://www.youtube.com/watch?v=MXPYbjjyHXc"
url_4 = "https://www.youtube.com/watch?v=q1XFm21I-VQ"
url_5 = "https://www.youtube.com/watch?v=gA0tDvwcbcI"
url_6 = "https://www.youtube.com/watch?v=FzK1hdXFhR8" # voicenotes

In [5]:
# wrapper

def wrapper(
    system_prompt: str | None = None, 
    user_prompt: Union[str, list] | None = None, 
    response_model: BaseModel | None = None, 
    max_retries: int = 3, 
    additional_messages: Union[str, List[str]] | None = None
):
    """Wrapper function to generate LLM completion"""
    messages = []

    # Add system prompt if provided
    if system_prompt is not None:
        messages.append({"role": "system", "content": system_prompt})

    # Add additional messages before user_prompt
    if additional_messages is not None:
        if isinstance(additional_messages, list):
            for message in additional_messages:
                messages.append({"role": "user", "content": message})
        else:
            messages.append({"role": "user", "content": additional_messages})

    # Add user context if provided
    if user_prompt is not None:
        if isinstance(user_prompt, list):
            for context in user_prompt:
                messages.append({"role": "user", "content": context})
        else:
            messages.append({"role": "user", "content": user_prompt})

    # Generate the completion
    completion = client.chat.completions.create(
        model=GPT_MODEL,
        response_model=response_model,
        max_retries=max_retries,
        messages=messages
    )
    
    # Check if response_model is None and return appropriate result
    if response_model is None:
        return completion.choices[0].message.content.strip()
    else:
        return completion

In [6]:
def count_tokens(text: str, print_length: bool = True, token_type: str = 'input') -> int:
    """
    Count the number of tokens in a given text string using a specific tokenization model, print the token count,
    calculate and print the cost of tokens based on a pricing table.

    Parameters:
        text (str): The text string to tokenize and count.
        print_length (bool): If True, prints the length of the tokens. Default is True.
        token_type (str): Specifies whether to use 'input' or 'output' token pricing. Default is 'input'.

    Returns:
        int: The number of tokens in the text.
    """
    # Encode the transcript to count tokens
    # Assume tiktoken.get_encoding and tiktoken.encode are available from previous imports or context
    encoding = tiktoken.get_encoding("cl100k_base")
    tokens = encoding.encode(text)
    token_count = len(tokens)

    # Print the token length if required
    if print_length:
        print(f"Token count: {token_count}")

    # Pricing table
    pricing = {
        'input': 5 / 1_000_000,  # $5 per 1 million tokens
        'output': 15 / 1_000_000  # $15 per 1 million tokens
    }

    # Calculate cost
    token_price_per_unit = pricing.get(token_type, pricing['input'])
    cost = token_price_per_unit * token_count
    print(f"Cost for {token_type} tokens: ${cost:.6f}")

    return token_count


In [7]:
# Youtube details tool

class YoutubeDetails(BaseModel):
    """
    This tool extracts the YouTube video ID from a given URL, retrieves key metadata,
    and formats it as a dictionary.
    
    The metadata extracted includes:
        - Title: The title of the video.
        - Description: The description of the video.
        - Published At: The date and time when the video was published.
        - Channel Title: The title of the channel that uploaded the video.
        - Views: The number of views the video has received.
        - Likes: The number of likes the video has received.
        - Dislikes: The number of dislikes the video has received.
        - Comments: The number of comments on the video.
        - Duration: The duration of the video in ISO 8601 format.
        - Tags: A list of tags associated with the video.
    """

    def run(self, url: str) -> Optional[Dict[str, str]]:
        """
        Extract the YouTube video ID from a given URL, retrieve key metadata,
        and format it as a dictionary.

        Parameters:
            url (str): The YouTube URL from which to extract the video ID.

        Returns:
            Optional[Dict[str, str]]: The video metadata if the video ID is valid and the
                                      metadata is available, otherwise None.
        """
        # Regular expression to find the video ID in a YouTube URL
        pattern = r'(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})'
        match = re.search(pattern, url)
        if not match:
            print("No valid YouTube video ID found in the provided URL.")
            return None

        video_id = match.group(1)

        try:
            if not YOUTUBE_API_KEY:
                print("API key not found in environment variables.")
                return None

            youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
            request = youtube.videos().list(part='snippet,contentDetails,statistics', id=video_id)
            response = request.execute()

            if not response['items']:
                print("No video found with the provided video ID.")
                return None

            video_details = response['items'][0]
            metadata = {
                "Title": video_details['snippet']['title'],
                "Description": video_details['snippet']['description'],
                "Published At": video_details['snippet']['publishedAt'],
                "Channel Title": video_details['snippet']['channelTitle'],
                "Views": video_details['statistics'].get('viewCount', 'N/A'),
                "Likes": video_details['statistics'].get('likeCount', 'N/A'),
                "Dislikes": video_details['statistics'].get('dislikeCount', 'N/A'),
                "Comments": video_details['statistics'].get('commentCount', 'N/A'),
                "Duration": video_details['contentDetails']['duration'],
                "Tags": video_details['snippet'].get('tags', [])
            }

            return metadata
        except Exception as e:
            print(f"Error retrieving video metadata: {e}")
            return None

In [8]:
youtube_metadata_tool = YoutubeDetails()
youtube_metadata = youtube_metadata_tool.run(url=url_6)
_ = count_tokens(json.dumps(youtube_metadata))

Token count: 846
Cost for input tokens: $0.004230


In [9]:
# transcript tool

class Transcript(BaseModel):
    """
    This tool extracts the YouTube video ID from a given URL, retrieves the transcript, and 
    formats it as a JSON string.
    """

    def run(self, url: str) -> Optional[str]:
        """
        Extract the YouTube video ID from a given URL, retrieve the transcript,
        and format it as a JSON string.

        Parameters:
            url (str): The YouTube URL from which to extract the video ID.

        Returns:
            Optional[str]: The JSON formatted transcript if the video ID is valid and the
                           transcript is available, otherwise None.
        """
        # Regular expression to find the video ID in a YouTube URL
        pattern = r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})'
        match = re.search(pattern, url)
        if not match:
            print("No valid YouTube video ID found in the provided URL.")
            return None

        video_id = match.group(1)

        try:
            # Retrieve the transcript
            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en-GB', 'en'])

            # Format the transcript as JSON
            formatter = JSONFormatter()
            json_formatted_transcript = formatter.format_transcript(transcript)

            return json_formatted_transcript
        except Exception as e:
            print(f"Error retrieving or formatting transcript: {e}")
            return None

In [10]:
transcript_tool = Transcript()
transcript = transcript_tool.run(url=url_6)
_ = count_tokens(json.dumps(transcript))

Token count: 35280
Cost for input tokens: $0.176400


In [11]:
# extract metadata from text

class MetaData(BaseModel):
    """
    Extract various metadata properties from a text - PrimarySource - and generate a MetaData instance by processing the text.
    Each property is described by a specific prompt, and this class can generate and return a metadata report
    based on the input text.
    """

    documentation_names: conlist(constr(max_length=100), min_length=1, max_length=10) = Field(
        default=None,
        description="Generate up to 10 possible names for documentation we want to build, for the data in PrimarySource.",
        example=["Documentation1", "Guide", "Manual"]
    )

    intended_audience: conlist(constr(max_length=100), min_length=1, max_length=10) = Field(
        default=None,
        description="Generate up to 10 words describing the intended audience for creating documentation from the data in PrimarySource.",
        example=["Beginners", "Advanced Users", "Developers", "Managers"]
    )

    description: constr(max_length=500) = Field(
        default=None,
        description="Provide a three-sentence description of the information in PrimarySource.",
        example="This is an article about data science best practices."
    )

    keywords: conlist(constr(max_length=50), min_length=1, max_length=10) = Field(
        default=None,
        description="Generate up to 10 possible keywords referring to industries, technologies, people or other themes for the data in PrimarySource.",
        example=["Data Science", "Machine Learning"]
    )

    clarification_questions: conlist(constr(max_length=200), min_length=1, max_length=10) = Field(
    default=None,
    description="Generate a list of insightful questions that target ambiguities or complex areas within PrimarySource, prompting deeper exploration or explanation.",
    example=[
        "Can you explain the significance of the statistical methods used in this analysis?",
        "What are the implications of these findings for the broader field of study?",
        "How does this information compare with prior established studies?",
        "What methodologies were used in the data collection process?",
        "Could you detail the reasoning behind the conclusions drawn in this document?",
        "What are the potential biases in this study, and how were they addressed?",
        "Can you clarify the terms used in the discussion of the results?",
        "Are there any assumptions in this study that need more detailed justification?",
        "What are the limitations of this study, and how do they affect the results?",
        "How can the findings be applied in practical scenarios?"
    ]
)

    def run(self, text: str) -> 'MetaData':
        """
        Process the input text to extract metadata and generate a MetaData instance.

        Parameters:
            text (str): The input text from which to extract metadata.

        Returns:
            MetaData: An instance of MetaData class filled with extracted metadata.
        """

        metadata = wrapper(
            system_prompt="Extract metadata from the provided text.",
            user_prompt=text,
            response_model=MetaData,  # Assuming the wrapper can fill a MetaData instance
            max_retries=3
        )

        return metadata


In [12]:
metadata_tool = MetaData()
metadata = metadata_tool.run(transcript)
_ = count_tokens(metadata.model_dump_json(), token_type='output')

Token count: 380
Cost for output tokens: $0.005700


In [13]:
def print_metadata_details(metadata_instance):
    """
    Prints details of each field in the MetaData instance, including list counts.

    Parameters:
        metadata_instance (MetaData): The instance of MetaData to be detailed.
    """
    for field_name, value in metadata_instance.dict().items():
        if value is not None:
            if isinstance(value, list):
                print(f"{field_name} (Count: {len(value)}): {value}")
            else:
                print(f"{field_name}: {value}")
        else:
            print(f"{field_name}: None")


In [14]:
print_metadata_details(metadata_instance=metadata)

documentation_names (Count: 10): ['Voice Notes and AI Integration', 'Personal Usage of Voice Notes', 'In-Depth Interview with D Joe on Voice Notes', 'Building Voice Notes: A User-Centric Approach', 'Privacy and AI: Insights from Voice Notes', 'Voice Notes: Features and Future Plans', 'Exploring Voice Notes with D Joe', 'User Feedback and Development Strategies for Voice Notes', 'Understanding the Impact of Voice Notes', 'Voice Notes: Privacy, Features, and User Engagement']
intended_audience (Count: 10): ['developers', 'product managers', 'tech enthusiasts', 'voice note users', 'AI researchers', 'privacy advocates', 'content creators', 'startup founders', 'software testers', 'UX designers']
description: This interview with D Joe explores the development and features of Voice Notes, focusing on the product's user-centric design and integration with AI. The discussion highlights the team's approach to privacy, user engagement, and future development plans. It also addresses the potential

In [15]:
string_channel_metadata = json.dumps(youtube_metadata)
string_text_metadata = metadata.model_dump_json()
string_transcript = transcript

In [16]:
concatenated_text = f"""

This is the programmatic metadata of the PrimarySource : {string_channel_metadata}

---

This is the extracted metadata from the PrimarySource : {string_text_metadata}

---

This is the text from the PrimarySource : {string_transcript}

"""

_ = count_tokens(concatenated_text)

Token count: 35392
Cost for input tokens: $0.176960


In [36]:


class OutlineNode(BaseModel):
    """
    Represents a node in the outline. Each node can represent a section or a subsection.

    Attributes:
        id (int): Unique identifier for the node.
        title (str): Title of the node.
        description (str): A single sentence description of the node.
        key_points (List[str]): A list of very short strings outlining key points to cover in this node.
        parent_id (Union[int, None]): Identifier of the parent node, if any.
    """
    id: int = Field(None, description="Unique identifier for the node.")
    title: str = Field(None, description="Title of the node.")
    description: str = Field(None, description="A single sentence description of the node.")
    key_points: List[str] = Field(default_factory=list, description="A list of very short strings outlining key points to cover in this node.")
    parent_id: Union[int, None] = Field(default=None, description="Identifier of the parent node, if any.")


class OutlineEdge(BaseModel):
    """
    Represents an edge in the outline, connecting two nodes.

    Attributes:
        source (int): Identifier of the source node.
        target (int): Identifier of the target node.
        label (str): Label describing the relationship between the nodes. Be expressive and precise in label selection.
    """
    source: int = Field(None, description="Identifier of the source node.")
    target: int = Field(None, description="Identifier of the target node.")
    label: str = Field(None, description="Label describing the relationship between the nodes.Be expressive and precise in label selection.")


class Outline(BaseModel):
    """
    Represents the overall structure of a document outline, containing multiple nodes and edges.

    The outline can have sections, subsections, and subsubsections. A section can have many subsections,
    and each subsection can have many subsubsections. Ideally the first section doesn't have any subsections.
    The levels of nestedness are limited to a maximum of three levels:
        - Level 1: Section - corresponds to ## in markdown
        - Level 2: Subsection - corresponds to ### in markdown
        - Level 3: Subsubsection - corresponds to #### in markdown

    Attributes:
        title (str): Title of the document for which the outline is generated.
        nodes (List[OutlineNode]): List of nodes in the document outline.
        edges (List[OutlineEdge]): List of edges connecting the nodes in the document outline.
    """
    title: str = Field(None, description="Title of the document for which the outline is generated.")
    nodes: List[OutlineNode] = Field(default_factory=list, description="List of nodes in the document outline.")
    edges: List[OutlineEdge] = Field(default_factory=list, description="List of edges connecting the nodes in the document outline.")

    def run(self, text: str) -> 'Outline':
        """
        Generate an outline based on the input text.

        Parameters:
            text (str): The input text to generate the outline from.

        Returns:
            Outline: An instance of the Outline class with the generated outline.
        """
        outline = wrapper(
            system_prompt="""
            Generate a detailed, structured outline of the document based on the provided text.
            The outline should have appropriate sections, subsections and subsubsections.
            Ideally the first section doesn't have any subsections.
            """,
            user_prompt=text,
            response_model=Outline,
            max_retries=3
        )
        return outline



In [37]:
outline_tool = Outline()
outline = outline_tool.run(concatenated_text)

_ = count_tokens(outline.model_dump_json(), token_type='input')
_ = count_tokens(outline.model_dump_json(), token_type='output')
print("/n")
print("These are the outline nodes :")
pp(outline.nodes)
print("These are the outline edges :")
pp(outline.edges)

Token count: 865
Cost for input tokens: $0.004325
Token count: 865
Cost for output tokens: $0.012975
/n
These are the outline nodes :
[OutlineNode(id=1, title='Introduction', description='Initial greeting and discussion about sponsorship.', key_points=['Sponsor mention', 'Affiliate link', 'Introduction of guest', 'Gratitude for transparency'], parent_id=None),
 OutlineNode(id=2, title='Who is Jijo Sunny?', description='Background story and previous ventures of Jijo Sunny.', key_points=['Background on Jijo Sunny', 'Early projects', 'Involvement with his brother'], parent_id=None),
 OutlineNode(id=3, title='The Genesis of Voicenotes', description='Discussion on the inspiration and early ideas behind Voicenotes.', key_points=['Inspiration for Voicenotes', 'Development timeline', 'Challenges and learning', 'Initial usage of Voice Memos'], parent_id=None),
 OutlineNode(id=4, title='Features and Innovations in Voicenotes', description='Detailed discussion about the unique features and future

In [41]:
task = f"""
This is the primary source : {transcript}

---
This is the outline of notes about the primary source expressed as a knowledge graph of nodes and edges : {outline.model_dump_json()}

---
Task 1 :

Given this knowledge graph, extract the outline in markdown with appropriate formatting for sections and subsections

---

Task 2 : 

Follow the structure established in the outline, extract and then infer the content from the primary source and your knowledge.
For each section or subsection fill in the content in great detail.
Each section must be at least 300 words long.
Write in markdown, with appropriate formatting (bold, italics, headings, bullet points, etc).
Write it as an expert in the themes.

---
ONLY RETURN THE OUTPUT FROM TASK 2. DO NOT ENCLOSE THE OUTPUT IN CODE BLOCKS.
"""

In [39]:
print(f"Input tokens")
print("---")
_ = count_tokens(task)
response = wrapper(
    user_prompt=task
)
print(f"Output tokens")
print("---")
_ = count_tokens(response, token_type='output')

Input tokens
---
Token count: 35141
Cost for input tokens: $0.175705
Output tokens
---
Token count: 3903
Cost for output tokens: $0.058545


In [40]:
print(response)

```markdown
# VCP 6 - Voicenotes: The Inspiring Story & Future Plans (with: Jijo Sunny)

## Introduction
**Initial greeting and discussion about sponsorship.**

### Key Points:
- Sponsor mention
- Affiliate link
- Introduction of guest
- Gratitude for transparency

*Today's video is sponsored by you if you like what I do here in the channel and you are planning to try voice notes, please use my affiliate link vadcampus.com/slvn voice notes. Did you get it? And of course you can also buy me a coffee at vladcampus.com/coffee.*

*Hello, D! Thank you for being here with us today.*
*Hey Vlad, thank you for having me. Look, I love that you guys are being so transparent about the voice notes' numbers and statistics.*

In this episode, we dive into the fascinating journey and future plans of Voicenotes, founded by Jijo Sunny. We'll explore his background, the genesis of Voicenotes, its unique features, the role of AI, plans for future enhancements, and also crucial aspects concerning privacy a

In [45]:
new_task = f"""
This is the primary source : {transcript}

---
This is the initial draft of detailed and structured notes extracted from the transcript: {response}

---
Task:

Preserve the markdown structure of the initial draft.
Extend, update and revise the content of each section and subsection to:
    - provide more detail to each section and subsection by incorporating relevant information from the primary source
    - selectively incorporate callouts where appropriate. Use direct quotes in callouts where appropriate.
The added detail is of the same writing clarity and quality as previously. 
Return a revised set of notes in markdown

---
ONLY RETURN THE OUTPUT FROM THE TASK. DO NOT ENCLOSE THE OUTPUT IN CODE BLOCKS.
"""

In [46]:
print(f"Input tokens")
print("---")
_ = count_tokens(new_task)
new_response = wrapper(
    user_prompt=new_task
)
print(f"Output tokens")
print("---")
_ = count_tokens(new_response, token_type='output')

Input tokens
---
Token count: 38166
Cost for input tokens: $0.190830
Output tokens
---
Token count: 4177
Cost for output tokens: $0.062655


In [47]:
print(new_response)

# VCP 6 - Voicenotes: The Inspiring Story & Future Plans (with: Jijo Sunny)

## Introduction
**Initial greeting and discussion about sponsorship.**

### Key Points:
- Sponsor mention
- Affiliate link
- Introduction of guest
- Gratitude for transparency

Today's video is sponsored by you if you like what I do here on the channel and you are planning to try Voice Notes, please use my affiliate link vadcampus.com/slvn voice notes. Did you get it? And of course, you can also buy me a coffee at vladcampus.com/coffee.

Hello, D! Thank you for being here with us today.

Hey Vlad, thank you for having me. Look, I love that you guys are being so transparent about the voice notes' numbers and statistics.

In this episode, we dive into the fascinating journey and future plans of Voicenotes, founded by Jijo Sunny. We'll explore his background, the genesis of Voicenotes, its unique features, the role of AI, plans for future enhancements, and also crucial aspects concerning privacy and security. The

In [50]:
newest_task = f"""

This is the primary source : {transcript}

---

This is the revised draft of detailed and structured notes extracted from the primary source: {new_response}

---
Task:

Preserve the markdown structure and contents of the revised draft.
Extend the content by selectively incorporating one or more mermaid diagrams:
    - Only insert mermaid diagrams where appropriate so that the reader can better understand the note content
    - Extract or infer details for the diagrams from primary source or the revised draft
    - Insert the diagram in code blocks at the appropriate section, subsection or subsubsection of the revised draft
    - The diagrams should be accessible and easy to grasp. They should not be overly complex or complicated.
Return a revised set of notes in markdown with the appropriately inserted mermaid diagram code

---
ONLY RETURN THE OUTPUT FROM THE TASK. DO NOT ENCLOSE THE WHOLE OUTPUT IN CODE BLOCKS. ONLY INSERT CODE BLOCKS AS APPROPRIATE FOR MERMAID DIAGRAMS.
"""

In [51]:
print(f"Input tokens")
print("---")
_ = count_tokens(newest_task)
newest_response = wrapper(
    user_prompt=newest_task
)
print(f"Output tokens")
print("---")
_ = count_tokens(newest_response, token_type='output')

Input tokens
---
Token count: 38497
Cost for input tokens: $0.192485
Output tokens
---
Token count: 4175
Cost for output tokens: $0.062625


In [52]:
print(newest_response)

# VCP 6 - Voicenotes: The Inspiring Story & Future Plans (with: Jijo Sunny)

## Introduction
**Initial greeting and discussion about sponsorship.**

### Key Points:
- Sponsor mention
- Affiliate link
- Introduction of guest
- Gratitude for transparency

Today's video is sponsored by you if you like what I do here on the channel and you are planning to try Voice Notes, please use my affiliate link vadcampus.com/slvn voice notes. Did you get it? And of course, you can also buy me a coffee at vladcampus.com/coffee.

Hello, D! Thank you for being here with us today.

Hey Vlad, thank you for having me. Look, I love that you guys are being so transparent about the voice notes' numbers and statistics.

In this episode, we dive into the fascinating journey and future plans of Voicenotes, founded by Jijo Sunny. We'll explore his background, the genesis of Voicenotes, its unique features, the role of AI, plans for future enhancements, and also crucial aspects concerning privacy and security. The