## Flow

In [1]:
# Goal

goal = "Quickly list all tasks after a conversation"

# Idea

idea = "Extract tasks, actions, and information from voice notes into a table."

# Flow
# task_1 = "Given a URL, determine if it is a YouTube video or Reddit thread."
# task_2 = "If it is a YouTube video, get the transcript from the video."
# task_3 = "If it is a Reddit thread, fetch the content of the thread."
# task_4 = "Extract book titles and authors mentioned in the transcript or thread content."
# task_5 = "Filter the extracted books based on predefined criteria (e.g., genre, popularity)."
# task_6 = "Store the filtered book recommendations in a database."
# task_7 = "View the database in a dataframe"



## Setup

### Imports

In [2]:
# imports

import enum
import instructor
import json
import os
import re
import uuid
from abc import ABC, abstractmethod
from anthropic import Anthropic
from bs4 import BeautifulSoup
from collections import namedtuple
from datetime import datetime
from dotenv import load_dotenv
from exa_py import Exa
from functools import wraps
from googleapiclient.discovery import build
from IPython.display import display
from openai import OpenAI
import pandas as pd
from pathlib import Path
from pprint import pprint as pp
from pydantic import BaseModel, Field, StringConstraints, UUID4, conlist, constr, field_validator, ConfigDict
import requests
import tiktoken
import time
from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Type, Union
from typing_extensions import Annotated, Literal
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter, TextFormatter

### API Keys

In [3]:
# load dotenv path

dotenv_path = Path(r"C:\Storage\python_projects\ashvin\.env")
load_dotenv(dotenv_path=dotenv_path)

# load API key
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
EXA_API_KEY = os.getenv("EXA_API_KEY")

### Constants

In [4]:
# anthropic models
ANTHROPIC_HAIKU = "claude-3-haiku-20240307"
ANTHROPIC_SONNET = "claude-3-5-sonnet-20240620"
ANTHROPIC_OPUS = "claude-3-opus-20240229"

# anthropic pricing
ANTHROPIC_PRICING = {
    ANTHROPIC_SONNET: {
        'input': 3.00 / 1000000,  # $3.00 per 1M tokens
        'output': 15.00 / 1000000  # $15.00 per 1M tokens
    },
    ANTHROPIC_OPUS: {
        'input': 15.00 / 1000000,  # $15.00 per 1M tokens
        'output': 75.00 / 1000000  # $75.00 per 1M tokens
    },
    ANTHROPIC_HAIKU: {
        'input': 0.25 / 1000000,  # $0.25 per 1M tokens
        'output': 1.25 / 1000000  # $1.25 per 1M tokens
    }
}

# openai models
OPENAI_GPT_4O = "gpt-4o-2024-05-13"
OPENAI_GPT_4O_MINI = "gpt-4o-mini-2024-07-18"
OPENAI_GPT_4_TURBO = "gpt-4-turbo-2024-04-09"
OPENAI_GPT_35_TURBO = "gpt-3.5-turbo-0125"

# openai pricing
OPENAI_PRICING = {
    OPENAI_GPT_35_TURBO: {
        'input': 0.50 / 1000000,  # $0.50 per 1M tokens
        'output': 1.50 / 1000000  # $1.50 per 1M tokens
    },
    OPENAI_GPT_4O: {
        'input': 5.00 / 1000000,  # $5.00 per 1M tokens
        'output': 15.00 / 1000000  # $15.00 per 1M tokens
    },
    OPENAI_GPT_4O_MINI: {
        'input': 0.15 / 1000000,  # $0.15 per 1M tokens
        'output': 0.60 / 1000000  # $0.60 per 1M tokens
    },
    OPENAI_GPT_4_TURBO: {
        'input': 10.00 / 1000000,  # $10.00 per 1M tokens
        'output': 30.00 / 1000000  # $30.00 per 1M tokens
    }
}

### Config

In [5]:
URL_1 = "https://voicenotes.com/s/Lh34hb"

In [6]:
URL = URL_1
MODEL = OPENAI_GPT_4O_MINI

### Clients

In [7]:
#instantiate client
client_openai = instructor.from_openai(OpenAI())
client_anthropic = instructor.from_anthropic(Anthropic())
audio_client = OpenAI()

## Utilities

### Cost Decorator

In [8]:
# cost decorator
def cost(func: Callable) -> Callable:
    """
    A decorator that adds cost calculation and printing functionality to the wrapper function.
    It retrieves token usage from the completion and calculates the cost based on the pricing
    dictionary of the used model. It then prints the formatted cost details.

    Args:
        func (Callable): The wrapper function to decorate.

    Returns:
        Callable: The decorated function with cost calculation and printing capability.
    """

    @wraps(func)
    def decorated(*args, **kwargs) -> Any:
        # Execute the wrapped function and get the result
        result = func(*args, **kwargs)

        # Determine the model used
        model_used = result.raw.model

        # Check which client is used based on the model identifier
        if "claude" in model_used:
            pricing = ANTHROPIC_PRICING
        elif "gpt" in model_used:
            pricing = OPENAI_PRICING
        else:
            raise ValueError("Model not recognized")

        # Retrieve token usage from the response
        input_tokens = result.raw.usage.input_tokens if hasattr(result.raw.usage, 'input_tokens') else result.raw.usage.prompt_tokens
        output_tokens = result.raw.usage.output_tokens if hasattr(result.raw.usage, 'output_tokens') else result.raw.usage.completion_tokens

        # Calculate costs
        input_cost = pricing[model_used]['input'] * input_tokens
        output_cost = pricing[model_used]['output'] * output_tokens
        total_cost = input_cost + output_cost

        # Format cost details for printing
        def formatted_cost(cost: float) -> str:
            return f"${cost:.6f}"

        # Print cost details
        print(f"Cost Details: Input: {formatted_cost(input_cost)}, Output: {formatted_cost(output_cost)}, Total: {formatted_cost(total_cost)}")

        return result

    return decorated


### Wrapper

In [9]:
# wrapper

@cost
def wrapper(
    system_prompt: Optional[str] = None, 
    user_prompt: Optional[Union[str, List[str]]] = None,
    model: str = MODEL, 
    response_model: Optional[BaseModel] = None, 
    max_retries: int = 3, 
    max_tokens: int = 4096
    ) -> 'WrapperOutput':
    
    """
    Constructs and sends chat completion requests to the LLM using the specified parameters, handles retries, and
    returns structured or unstructured responses based on the presence of a response model.

    Parameters:
        system_prompt (Optional[str]): Initial system-level instruction or context.
        user_prompt (Optional[Union[str, List[str]]]): Context or questions from the user, either as a single string or a list of strings.
        model (str): Identifier for the LLM model to be used.
        response_model (Optional[BaseModel]): Pydantic model to structure the LLM's response when specified.
        max_retries (int): Maximum retry attempts for the LLM request in case of failures.
        max_tokens (int): Maximum token limit for the generated response from the LLM.

    Returns:
        WrapperOutput: A namedtuple containing 'response' with the LLM's output (structured by `response_model` if provided) and 'raw' with raw API response data.

    Raises:
        Various network or API-specific errors depending on the underlying LLM client implementation.

    Note:
        This function requires proper configuration of LLM clients (`client_anthropic`, `client_openai`) and handling their respective API calls.
    """

    messages = []

    # Construct the messages list based on provided inputs
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    if user_prompt:
    # Handle both single and multiple user prompts
        if isinstance(user_prompt, list):
            messages.extend([{"role": "user", "content": message} for message in user_prompt])
        if isinstance(user_prompt, str):
            messages.append({"role": "user", "content": user_prompt})

    # instantiate client based on model
    if model.startswith("claude"):
        client = client_anthropic
    if model.startswith("gpt"):
        client = client_openai
    
    # namedtuple for accessing return values
    WrapperOutput = namedtuple('WrapperOutput', ['response', 'raw'])

    # completion that returns the structured response as per the specified BaseModel
    if response_model:
        structured_response, raw_completion = client.chat.completions.create_with_completion(
            model=model,
            max_tokens=max_tokens,
            max_retries=max_retries,
            response_model=response_model,
            messages=messages
        )
        return WrapperOutput(structured_response, raw_completion)

    # completion that returns the unstructured response string
    if not response_model:
        raw_completion = client.chat.completions.create(
            model=model,
            max_tokens=max_tokens,
            response_model=response_model,
            messages=messages
        )
        if client == client_anthropic:
            text_only = raw_completion.content[0].text
        
        if client == client_openai:
            text_only = raw_completion.choices[0].message.content
        
        return WrapperOutput(text_only, raw_completion)

### Unit Test

In [10]:
# UserInfo
class UserInfo(BaseModel):
    name: str
    age: int
    gender: Literal["male", "female", "indeterminate"]
    story: str = Field(..., description="a one sentence story based on the user prompt")


resp = wrapper(
    user_prompt="Austin is 25 years old. Tell me a one sentence story with a darkly funny twist in the tale",
    model = MODEL,
    response_model=UserInfo
)

print(resp.response)

Cost Details: Input: $0.000019, Output: $0.000025, Total: $0.000044
name='Austin' age=25 gender='male' story='Austin thought he was going for a fun night out, but the only party he ended up at was his own surprise funeral.'


## Tools

In [11]:
# web text extraction tool

class WebTextExtractor(BaseModel):
    """
    This tool extracts the text content from a given URL using the requests module.
    
    The extracted text can include the raw HTML content or just the text content of the web page.
    """

    def run(self, url: str, return_html: bool = False) -> Optional[str]:
        """
        Extract the text content from a given URL.

        Parameters:
            url (str): The URL from which to extract the text content.
            return_html (bool): If True, return the full HTML content. If False, return plain text. Default is False.

        Returns:
            Optional[str]: The text content of the web page if the request is successful,
                           otherwise None.
        """
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
            
            if return_html:
                return response.text
            else:
                soup = BeautifulSoup(response.text, 'html.parser')
                page_text = soup.get_text()
                return page_text.strip()
                
        except requests.RequestException as e:
            print(f"Error fetching the web page: {e}")
            return None

### Web Text Extractor Unit Test

In [13]:
web_text_tool = WebTextExtractor()
voice_notes_test = web_text_tool.run(URL_1, True)


Error fetching the web page: 404 Client Error: Not Found for url: https://voicenotes.com/s/Lh34hb


### Prompts

#### Prompt 1 : Tasks



# IDENTITY and PURPOSE

You extract practical information from text content. You are interested in tasks and action items identified.

You create 15 word bullet points that capture the most important tasks from the input.

Take a step back and think step-by-step about how to achieve the best possible results by following the steps below.

# STEPS

- Extract 20 to 50 of the most important tasks and action items from the input in a section called Tasks: using 15 word bullets. If there are less than 50 then collect all of them. Make sure you extract at least 20.

# OUTPUT INSTRUCTIONS

- Extract at least 20 TASKS from the content.

- Only extract tasks, not recommendations. These should be phrased as tasks.

- Each bullet should be 15 words in length.

- Do not give warnings or notes; only output the requested sections.

- You use bulleted lists for output, not numbered lists.

- Do not repeat ideas, quotes, facts, or resources.

- Do not start items with the same opening words.

- Ensure you follow ALL these instructions when creating your output.


# INPUT

INPUT:

#### Prompt 2 : Table from Notes

# IDENTITY and PURPOSE

You extract practical information from text content, focusing on tasks, opportunities, information, and action items.

You create a structured table that captures these key elements from the input.

Take a step back and think methodically about how to achieve the best possible results by following the steps below.

# STEPS

- Extract the most important tasks, opportunities, information, and action items from the input and organize them into a table.

# OUTPUT INSTRUCTIONS

- Only output Markdown.

- Create a table with the following columns: Item, Type (task, information, product opportunity, platform opportunity), Description.

- Ensure each entry is clearly categorized and concisely described.

- Do not give warnings or notes; only output the requested sections.

- Ensure you follow ALL these instructions when creating your output.


# INPUT

INPUT:

In [None]:
# books

class Book(BaseModel):
    """
    Represents a book with its details, including title, author, rating, and summaries.

    This model captures essential information about a book, providing both brief and 
    detailed summaries, as well as a nuanced rating system.

    Attributes:
        title (str): The full title of the book.
        author (str): The full name of the book's author.
        rating (Literal): A qualitative rating of the book on a four-point scale.
        brief_summary (str): A concise, one-sentence summary of the book.
        detailed_summary (str): A more comprehensive paragraph-length summary of the book.
    """
    title: str = Field(..., description="The complete title of the book, including any subtitle")
    author: str = Field(None, description="The full name of the book's author or authors")
    rating: Literal["Excellent", "Good", "Fair", "Poor"] = Field(
        None, 
        description="A qualitative rating of the book on a four-point scale, ranging from 'Excellent' to 'Poor'"
    )
    brief_summary: str = Field(
        None, 
        description="A concise, one-sentence summary capturing the essence of the book",
        max_length=200
    )
    detailed_summary: str = Field(
        None, 
        description="A short paragraph summary of the book, including key themes and overall impression"
        )

class Books(BaseModel):
    """
    Represents a collection of books extracted from a transcript.

    This model is designed to store and manage multiple Book instances,
    providing functionality to extract book information from textual input.

    Attributes:
        books (List[Book]): A list of Book objects representing the extracted books.
    """
    books: List[Book] = Field(
        default_factory=list,
        description="A list of Book objects, each representing a book extracted from the transcript"
    )

    def run(self, text: str, model: str = MODEL) -> 'Books':
        """
        Extract books from the input transcript text.

        This method processes the input text using a wrapper function to identify
        and extract information about books mentioned in the transcript. It creates
        Book instances for each extracted book and populates the Books collection.

        Args:
            text (str): The input transcript text to analyze and extract books from.
            model (str): Identifier for the LLM model to be used.

        Returns:
            Books: An instance of the Books class containing the extracted Book objects.
        """
        books = wrapper(
            system_prompt="""
            Analyze the provided source transcript or source text and extract information about books mentioned.
            Ensure that all the books identified in the source extracted.
            For each identified book if the relevant field is available:
            1. Extract the full title, including any subtitle.
            2. Identify the author's full name.
            3. Assign a rating ("Excellent", "Good", "Fair", or "Poor") based on the context or sentiment
               expressed in the transcript. If the sentiment is unclear, default to "Fair".
            4. Create a brief, one-sentence summary capturing the essence of the book.
            5. Compose a detailed summary (200-500 characters) including key themes and overall impression.
            
            Return the extracted information as a list of Book objects, ensuring all required
            fields (title) are populated for each book. Ensure that all the books mentioned are extracted.
            """,
            user_prompt=text,
            model=model,
            response_model=Books,
        )
        return books

In [None]:
# Books to dataframe


def books_to_dataframe(books_response):
    # Extract the list of books from the response
    books_list = books_response.books

    # Create a list of dictionaries, each representing a book
    data = [
        {
            "Title": book.title,
            "Author": book.author,
            "Rating": book.rating,
            "Brief": book.brief_summary,
            "Detailed": book.detailed_summary
        }
        for book in books_list
    ]

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)

    # Reorder columns to match the desired output
    df = df[['Title', 'Author', 'Rating', 'Brief', 'Detailed']]

    # Sort the DataFrame by 'Rating' column in descending order
    df = df.sort_values(by='Rating', ascending=True)

    # Reset the index after sorting and start from 1
    df.reset_index(drop=True, inplace=True)
    df.index = range(1, len(df) + 1)
    df.index.name = 'No.'

    return df

In [None]:
# LLM as Judge


class ShortlistedBook(Book):
    """
    Represents a book with additional properties for shortlisting.

    This model extends the Book model with shortlist status and reason.

    Attributes:
        shortlisted (bool): Indicates whether the book is shortlisted.
        reason (str): Explanation for the shortlisting decision.
    """
    shortlisted: bool = Field(
        False,
        description="Indicates whether the book should be included in the shortlist"
    )
    reason: str = Field(
        None,
        description="Explanation for why the book was shortlisted or not"
    )

class ShortlistedBooks(BaseModel):
    """
    Represents a collection of books with shortlisting information.

    This model manages ShortlistedBook instances and provides
    functionality to process and shortlist books based on preferences.

    Attributes:
        books (List[ShortlistedBook]): A list of ShortlistedBook objects.
    """
    books: List[ShortlistedBook] = Field(
        default_factory=list,
        description="A list of ShortlistedBook objects, each representing a book with shortlisting information"
    )

    model_config = ConfigDict(
    json_schema_extra={
        "examples": [
            {
                "books": [
                    {"title": "The Scar", "author": "China Mieville", "rating": "Excellent", "brief_summary": "A complex tale of a floating city in perpetual flight.", "detailed_summary": "The Scar follows a journey through a terror- and wonder-filled world that is ultimately ruled by a horrifying secret.", "shortlisted": True, "reason": "The sheer inventiveness of the characters and setting is outstanding."},
                    {"title": "Kiln People", "author": "David Brin", "rating": "Good", "brief_summary": "A novel about temporary, disposable duplicate selves.", "detailed_summary": "The story explores a future where 'dittos' are created daily for various tasks, raising philosophical and ethical questions.", "shortlisted": True, "reason": "I liked the detective noir nature of the story."},
                    {"title": "Hyperion", "author": "Dan Simmons", "rating": "Fair", "brief_summary": "Interstellar travel meets ancient mysteries.", "detailed_summary": "A pilgrimage to the distant world of Hyperion involves multiple tales, with a blend of science and literature.", "shortlisted": False, "reason": "Not enough character development; too hard sci-fi."},
                    {"title": "Rendezvous with Rama", "author": "Arthur C Clarke", "rating": "Good", "brief_summary": "Exploration of a mysterious alien starship.", "detailed_summary": "An enigmatic spacecraft, Rama, passes through the solar system, prompting a thorough investigation by human explorers.", "shortlisted": False, "reason": "Too hard sci fi. Unrelatable and for me unreadable."},
                    {"title": "Algebraist", "author": "Iain M. Banks", "rating": "Excellent", "brief_summary": "Deep space and deep time meet in an expansive sci-fi saga.", "detailed_summary": "Delves into a civilization's quest across galaxies to unlock the secrets of the universe.", "shortlisted": True, "reason": "Masterful handling of scope and scale in storytelling. Love the space opera feel and character development and utopia"},
                    {"title": "Legends & Lattes", "author": "Travis Baldtree", "rating": "Good", "brief_summary": "A high fantasy novel with a cozy twist.", "detailed_summary": "A novel approach to fantasy, focusing on a retired orc barbarian who opens a coffee shop.", "shortlisted": True, "reason": "Unique blend of genres, appealing to a broad audience."},
                    {"title": "The Prefect", "author": "Alastair Reynolds", "rating": "Excellent", "brief_summary": "A high-tech police officer battles threats in a utopian society.", "detailed_summary": "Set in the Glitter Band, a society of ten thousand city-state habitats orbiting the planet Yellowstone, the novel mixes mystery and techno-thriller elements.", "shortlisted": True, "reason": "Engaging mix of detective story and sci-fi."},
                    {"title": "Three Body Problem", "author": "Cixin Liu", "rating": "Good", "brief_summary": "A complex sci-fi thriller involving alien contact.", "detailed_summary": "The narrative spans multiple timelines and characters, exploring humanity's reaction to the impending arrival of alien life.", "shortlisted": False, "reason": "Found it boring and dry, no real character development. Inaccessible plot."},
                    {"title": "Assassin's Apprentice", "author": "Robin Hobb", "rating": "Excellent", "brief_summary": "A young bastard learns the deadly arts of an assassin.", "detailed_summary": "In a kingdom fraught with intrigue and danger, Fitz, a royal bastard, is trained as the king's secret weapon.", "shortlisted": True, "reason": "One of my top 5 books. Compelling first person narrative with deep emotional content and a great story. Needs investment to read but is worth it."},
                    {"title": "The Long Way to a Small, Angry Planet", "author": "Becky Chambers", "rating": "Excellent", "brief_summary": "A heartfelt space opera exploring diverse alien cultures.", "detailed_summary": "Follows a motley crew on a tunnelling ship as they journey through space, offering a warm exploration of interpersonal relationships.", "shortlisted": True, "reason": "Excellently captures the human (and non-human) experience in space. Lovely pastoral feel blending CLifford Simak and Scalzi."},
                    {"title": "Dune", "author": "Frank Herbert", "rating": "Excellent", "brief_summary": "A monumental sci-fi epic of politics and survival.", "detailed_summary": "Set on the desert planet Arrakis, Dune is the story of the boy Paul Atreides, who would become the mysterious man known as Muad'Dib, destined to avenge the treacherous plot against his noble family.", "shortlisted": True, "reason": "Profound thematic depth and world-building. Just epic in scope and cadence"},
                    {"title": "Sufficiently Advanced Magic", "author": "Andrew Rowe", "rating": "Good", "brief_summary": "Magic, monsters, and mystery in a school setting.", "detailed_summary": "Follows the adventures of Corin Cadence at the Serpent Spire, a school of magic and challenges, as he seeks to find his missing brother.", "shortlisted": True, "reason": "Engaging progression fantasy with intricate magic systems."},
                    {"title": "The Name of the Wind", "author": "Patrick Rothfuss", "rating": "Excellent", "brief_summary": "The tale of a gifted young man becoming the most notorious wizard.", "detailed_summary": "A deeply personal story told by Kvothe, from his childhood in a troupe of traveling players to years spent as a near-feral orphan in a crime-riddled city.", "shortlisted": True, "reason": "Exceptional storytelling and character development. Unparallelled beauty in the prose, almost poetic"},
                    {"title": "The Way of Kings", "author": "Brandon Sanderson", "rating": "Excellent", "brief_summary": "Epic fantasy with ancient mysteries and heroic struggles.", "detailed_summary": "Introduces readers to the incredible world of Roshar, a land of storms and warfare, as multiple characters fight battles both physical and psychological.", "shortlisted": True, "reason": "Expansive world-building and compelling narrative arcs."},
                    {"title": "Neuromancer", "author": "William Gibson", "rating": "Good", "brief_summary": "Cyberspace and AI in a groundbreaking cyberpunk narrative.", "detailed_summary": "Case, a washed-up computer hacker, is hired for one last job: to pull off the ultimate hack.", "shortlisted": False, "reason": "Pioneering cyberpunk aesthetics but challenging for some readers.A bit dated for my current taste."},
                    {"title": "Awaken Online", "author": "Travis Bagwell", "rating": "Fair", "brief_summary": "Virtual reality becomes a darkly addictive experience.", "detailed_summary": "A disaffected teenager finds power and purpose in a new virtual reality game where he becomes a powerful necromancer.", "shortlisted": True, "reason": "Captivating dive into LitRPG, resonating with the gaming generation."},
                    {"title": "Perdido Street Station", "author": "China Mieville", "rating": "Good", "brief_summary": "A gritty, fantastical metropolis filled with bizarre creatures.", "detailed_summary": "The city of New Crobuzon is a steampunk melting pot where a mysterious creature threatens the lives of all citizens.", "shortlisted": True, "reason": "Highly original, weird, just an awesome melange of ideas."},
                    {"title": "Red Rising", "author": "Pierce Brown", "rating": "Excellent", "brief_summary": "A thrilling blend of dystopia and deep space rebellion.", "detailed_summary": "Darrow, a miner on Mars, transforms into a revolutionary leader to challenge the oppressive caste system.", "shortlisted": True, "reason": "Gripping narrative with strong thematic messages."},
                    {"title": "The Goblin Emperor", "author": "Katherine Addison", "rating": "Good", "brief_summary": "An unwanted half-goblin becomes the emperor in a perilous court.", "detailed_summary": "Maia, thrust unexpectedly onto the throne, must navigate deadly court politics and his own inexperience.", "shortlisted": True, "reason": "Intriguing political fantasy with a focus on personal growth."},
                    {"title": "The Paper Menagerie", "author": "Ken Liu", "rating": "Fair", "brief_summary": "A collection of thought-provoking speculative fiction stories.", "detailed_summary": "Explores various aspects of the human experience through the lens of speculative fiction, blending Eastern and Western storytelling.", "shortlisted": False, "reason": "Eloquent prose and unique concepts, but varying appeal across stories."},
                    {"title": "The Lies of Locke Lamora", "author": "Scott Lynch", "rating": "Excellent", "brief_summary": "A tale of a clever con artist in a fantastical Venetian city.", "detailed_summary": "Locke Lamora, an orphan turned thief, executes elaborate scams but finds himself pitted against a powerful enemy.", "shortlisted": True, "reason": "Brilliantly crafted narrative full of wit and dark twists. I like heists."},
                    {"title": "The Blade Itself", "author": "Joe Abercrombie", "rating": "Good", "brief_summary": "Gritty fantasy with morally ambiguous characters and intense action.", "detailed_summary": "Introduces an ensemble of characters each struggling with their own demons and ambitions in a brutal, war-torn world.", "shortlisted": True, "reason": "Engrossing yet starkly brutal, I like well written grimdark."},
                    {"title": "Mistborn", "author": "Brandon Sanderson", "rating": "Good", "brief_summary": "A band of rebels attempt to overthrow a dark lord ruling for centuries.", "detailed_summary": "Vin, a street urchin, discovers her magical powers and joins a rebel plot that challenges the immortal Lord Ruler's regime.", "shortlisted": False, "reason": "Innovative magic system and strong, driven plot. Love the magic system but the writing was clunky and the end plot reveal was weak"},
                    {"title": "The Fifth Season", "author": "N.K. Jemisin", "rating": "Fair", "brief_summary": "A post-apocalyptic world where some can control seismic activity.", "detailed_summary": "Essun, a woman with incredible powers, seeks her daughter across a continent on the brink of destruction.", "shortlisted": True, "reason": "Powerful use of narrative and deep, intricate world-building but I couldn't get into it at all."},
                    {"title": "Leviathan Wakes", "author": "James S.A. Corey", "rating": "Good", "brief_summary": "Space opera involving interstellar conflict and a mysterious alien virus.", "detailed_summary": "As humanity has colonized the solar system, a detective and a ship's officer uncover a conspiracy that threatens peace and human existence.", "shortlisted": True, "reason": "Explosive action and compelling characters in a well-constructed universe."},
                    {"title": "Ready Player One", "author": "Ernest Cline", "rating": "Good", "brief_summary": "A treasure hunt through a vast virtual world.", "detailed_summary": "In a dystopian future, a teenager embarks on a quest inside a VR game to inherit a vast fortune.", "shortlisted": True, "reason": "Though popular, it lacks the depth and challenge of other narratives. I like the campiness and excitement like this in small doses"},
                    {"title": "The Ocean at the End of the Lane", "author": "Neil Gaiman", "rating": "Excellent", "brief_summary": "A man returns to his childhood home and recalls a forgotten friend and her otherworldly pond.", "detailed_summary": "A blend of autobiography and fantasy, exploring themes of memory and survival through the eyes of a child and his mysterious neighbor.", "shortlisted": True, "reason": "Beautifully melds myth with painful reality."},
                    {"title": "Flowers for Algernon", "author": "Daniel Keyes", "rating": "Fair", "brief_summary": "The story of a mentally disabled man whose intelligence is surgically increased.", "detailed_summary": "As Charlie Gordon's intelligence increases, he faces the complex realities of human emotion and intellectual growth.", "shortlisted": False, "reason": "Profound emotional depth and poignant commentary on human nature. Not my cup of tea"},
                    {"title": "Snow Crash", "author": "Neal Stephenson", "rating": "Good", "brief_summary": "A hacker and a skater attempt to stop a virtual drug in a hyper-commercialized future.", "detailed_summary": "Hiro Protagonist, a hacker and swordsman, and Y.T., a skater courier, navigate a corporatized America to prevent the spread of a mind-altering computer virus.", "shortlisted": False, "reason": "Inventive and energetic, but sometimes overshadowed by its own style."},
                    {"title": "Ender's Game", "author": "Orson Scott Card", "rating": "Excellent", "brief_summary": "A young prodigy trains in advanced warfare to protect Earth from alien forces.", "detailed_summary": "Ender Wiggin, recruited by the military, undergoes rigorous training in a space academy to prepare for an impending alien invasion.", "shortlisted": True, "reason": "Masterful blend of military sci-fi and ethical quandaries."},
                    {"title": "The City We Became", "author": "N.K. Jemisin", "rating": "Excellent", "brief_summary": "New York City's soul fights for survival through its five avatars.", "detailed_summary": "In a modern-day setting, the city's five boroughs come to life to battle a mysterious otherworldly force.", "shortlisted": True, "reason": "Vivid urban fantasy with a fresh take on living cities."},
                    {"title": "House of Leaves", "author": "Mark Z. Danielewski", "rating": "Fair", "brief_summary": "A labyrinthine horror story of a house that is bigger on the inside.", "detailed_summary": "This complex narrative blends footnotes, parallel narratives, and typographic experimentation to tell a chilling story.", "shortlisted": False, "reason": "Intriguing but overly complex and hard to follow."},
                    {"title": "Cradle", "author": "Will Wight", "rating": "Excellent", "brief_summary": "A young man from a lowly clan seeks power in a world governed by martial prowess.", "detailed_summary": "Lindon, an unsouled, embarks on a journey to prove his worth and protect his world from a looming threat.", "shortlisted": True, "reason": "Exceptional progression fantasy with captivating character growth."},
                    {"title": "Warcross", "author": "Marie Lu", "rating": "Good", "brief_summary": "A bounty hunter hacks into an international game and becomes involved in an espionage plot.", "detailed_summary": "Emika Chen, a hacker, enters the Warcross Championships only to uncover a conspiracy.", "shortlisted": False, "reason": "Fast-paced and thrilling, but lacks depth in character development."},
                    {"title": "Gideon the Ninth", "author": "Tamsyn Muir", "rating": "Excellent", "brief_summary": "Necromancers and their sword-wielding guardians compete for power in a decaying space empire.", "detailed_summary": "Gideon and her necromancer navigate through a series of deadly trials on a haunted gothic palace.", "shortlisted": True, "reason": "Unique blend of science fiction and fantasy with a compelling queer narrative."},
                    {"title": "Reamde", "author": "Neal Stephenson", "rating": "Good", "brief_summary": "A techno-thriller that spans the globe involving a virtual game world and real-world terrorism.", "detailed_summary": "The narrative intertwines the lives of a game developer and his family with Russian mobsters and jihadists.", "shortlisted": False, "reason": "Engaging and detailed, but sometimes bogged down by its own intricacy."},
                    {"title": "The Priory of the Orange Tree", "author": "Samantha Shannon", "rating": "Excellent", "brief_summary": "A world divided by fear of dragons faces an existential threat.", "detailed_summary": "Ead Duryan serves in secret as a protector to a queen while across the sea, the dragon rider Tané rises.", "shortlisted": True, "reason": "Epic fantasy with rich world-building and strong feminist themes."},
                    {"title": "Children of Time", "author": "Adrian Tchaikovsky", "rating": "Excellent", "brief_summary": "Evolution and survival clash on a terraformed planet.", "detailed_summary": "A last bastion of humans finds a new home, but the planet is already inhabited by highly evolved spiders.", "shortlisted": True, "reason": "Brilliant exploration of evolution, civilization, and what it means to be human."},
                    {"title": "All Systems Red", "author": "Martha Wells", "rating": "Excellent", "brief_summary": "A self-aware security robot struggles with its identity while protecting a group of humans on a distant planet.", "detailed_summary": "Murderbot, as it calls itself, seeks freedom from its own programming while facing various threats.", "shortlisted": True, "reason": "Engaging narrative with a relatable AI protagonist."},
                    {"title": "The Poppy War", "author": "R.F. Kuang", "rating": "Excellent", "brief_summary": "A war orphan masters the arts of magic and war only to discover the gods are real.", "detailed_summary": "Rin's discovery of her shamanic powers leads her into the heart of a brutal military conflict based on historical events.", "shortlisted": True, "reason": "Dark and intense, with profound commentary on power and war."},
                    {"title": "Arcane Ascension", "author": "Andrew Rowe", "rating": "Good", "brief_summary": "A student in a magical academy seeks to uncover his brother's fate while mastering arcane challenges.", "detailed_summary": "Corin Cadence navigates dangerous towers and political intrigue in a school where students earn power through success or die trying.", "shortlisted": True, "reason": "Well-executed magic system with compelling mysteries and adventure."},
                    {"title": "The Unspoken Name", "author": "A.K. Larkwood", "rating": "Good", "brief_summary": "A priestess turns her back on being a sacrifice to become a powerful wizard's assassin and bodyguard.", "detailed_summary": "Csorwe navigates her complex destiny, shifting alliances, and forbidden magic in a quest for power and identity.", "shortlisted": False, "reason": "Richly imaginative but struggles with pacing."},
                    {"title": "Vita Nostra", "author": "Marina and Sergey Dyachenko", "rating": "Fair", "brief_summary": "A surreal fantasy about a school where failure means disaster.", "detailed_summary": "Sasha Samokhina is forced into attending a school where students must learn to manipulate reality, or face dire consequences.", "shortlisted": False, "reason": "Deeply philosophical and challenging, not for every reader."},
                    {"title": "The Bone Ships", "author": "R.J. Barker", "rating": "Good", "brief_summary": "Two nations at war fight for control over dragon-like sea creatures in a nautical fantasy setting.", "detailed_summary": "The crew of a condemned ship must navigate treacherous waters and their own dark pasts in a race against time to capture a sea dragon.", "shortlisted": False, "reason": "Exciting sea-bound adventure, though sometimes slow."},
                    {"title": "Salvation", "author": "Peter F. Hamilton", "rating": "Good", "brief_summary": "A sci-fi epic about humanity's response to the discovery of alien technology that could change the course of history.", "detailed_summary": "Multiple timelines and characters converge to unravel the mystery of alien artifacts that promise salvation or destruction.", "shortlisted": False, "reason": "Complex and ambitious, but requires commitment to multiple, sprawling narratives."},
                    {"title": "Network Effect", "author": "Martha Wells", "rating": "Excellent", "brief_summary": "A full-length Murderbot novel involving an interstellar crisis and AI identity.", "detailed_summary": "Murderbot must protect its human friends from an unknown alien threat while grappling with its evolving self-awareness.", "shortlisted": True, "reason": "Expands wonderfully on the series, combining action with deep character studies."},
                    {"title": "The Luminous Dead", "author": "Caitlin Starling", "rating": "Good", "brief_summary": "A psychological thriller about a caver on an alien planet who becomes trapped with a potentially malicious guide.", "detailed_summary": "Gyre's solo mission under the surface becomes a fight for survival against psychological horrors and her unreliable handler.", "shortlisted": False, "reason": "Tense and claustrophobic, but the slow pace may not be for everyone."},
                    {"title": "Empress of Forever", "author": "Max Gladstone", "rating": "Good", "brief_summary": "A modern woman is flung into a far-future space opera and must navigate god-like beings to find her way home.", "detailed_summary": "Vivian Liao battles through a universe ruled by a powerful Empress, encountering strange allies and enemies.", "shortlisted": True, "reason": "A vibrant mix of myth and science fiction with a powerful female protagonist."},
                    {"title": "A Memory Called Empire", "author": "Arkady Martine", "rating": "Excellent", "brief_summary": "A young ambassador investigates her predecessor's mysterious death in an alien empire.", "detailed_summary": "Mahit Dzmare arrives in the capital of the Teixcalaanli Empire only to find herself embroiled in political intrigue and a deep-seated cultural conflict.", "shortlisted": False, "reason": "Brilliant political intrigue with a richly detailed setting and complex characters. Couldn't connect with the characters. I often struggle with female protagonists but not always."},
                    {"title": "Iron Prince", "author": "Bryce O'Connor & Luke Chmilenko", "rating": "Excellent", "brief_summary": "A young man fights against societal expectations in a school designed to produce the best warriors.", "detailed_summary": "Reidon Ward must prove himself in a brutal military academy that trains students to lead armies and master advanced technology and magic.", "shortlisted": True, "reason": "Innovative blending of sci-fi and fantasy elements with intense, well-crafted battle scenes."}, {"title": "Perdido Street Station", "author": "China Mieville", "rating": "Excellent", "brief_summary": "A dark and richly imagined world where a scientist and an artist become embroiled in a city's political intrigue.", "detailed_summary": "In the sprawling city of New Crobuzon, a strange creature is accidentally unleashed, threatening the lives of all citizens.", "shortlisted": True, "reason": "Masterful world-building and complex narrative that redefines modern fantasy."},
                    {"title": "American Gods", "author": "Neil Gaiman", "rating": "Excellent", "brief_summary": "An ex-convict becomes embroiled in a war between old gods and new.", "detailed_summary": "Shadow Moon is released from prison and meets the mysterious Mr. Wednesday, who drags him into a battle among deities.", "shortlisted": True, "reason": "Innovative blend of mythology and modern life, with deep cultural and societal commentary."},
                    {"title": "Altered Carbon", "author": "Richard K. Morgan", "rating": "Good", "brief_summary": "A hard-boiled detective story set in a future where consciousness can be transferred to different bodies.", "detailed_summary": "Takeshi Kovacs is hired to solve a wealthy man's murder, in a world where death is nearly obsolete.", "shortlisted": True, "reason": "Gritty narrative with thought-provoking themes on technology and society, but occasionally overly graphic. One of my favourite books."},
                    {"title": "Kushiel's Dart", "author": "Jacqueline Carey", "rating": "Excellent", "brief_summary": "A woman born with a mark that destines her for pain and pleasure becomes a spy in her homeland.", "detailed_summary": "Phèdre nó Delaunay is trained in the arts of seduction and espionage, using her unique skills to navigate political intrigue.", "shortlisted": True, "reason": "Erotic and intricate, with lush prose and a complex lead character."},
                    {"title": "The Blade Itself", "author": "Joe Abercrombie", "rating": "Good", "brief_summary": "In a gritty world, several people find their fates intertwined as a war looms.", "detailed_summary": "Logen Ninefingers, Inquisitor Glokta, and Jezal dan Luthar navigate their violent world, each seeking their own form of redemption or glory.", "shortlisted": True, "reason": "Dark and humorous, a seminal work in the grimdark subgenre."},
                    {"title": "Old Man's War", "author": "John Scalzi", "rating": "Excellent", "brief_summary": "The elderly are recruited to fight in an interstellar war to earn a new, youthful body.", "detailed_summary": "John Perry joins the Colonial Defense Forces on his 75th birthday and confronts the realities of war across the universe.", "shortlisted": True, "reason": "Combines humor, action, and poignant moments in a fresh take on military sci-fi."},
                    {"title": "The Hundred Thousand Kingdoms", "author": "N.K. Jemisin", "rating": "Excellent", "brief_summary": "A woman is summoned to the ruling city and thrust into a deadly competition for the throne.", "detailed_summary": "Yeine Darr is entangled in the complex politics of an empire where gods are enslaved, and royal heirs must prove their worth through treachery.", "shortlisted": True, "reason": "Eloquent and revolutionary, redefining the boundaries of epic fantasy."},
                    {"title": "The City & The City", "author": "China Mieville", "rating": "Good", "brief_summary": "A murder investigation in a city that shares the same space as another city, invisible to each other.", "detailed_summary": "Inspector Tyador Borlú must solve a murder that may involve breaching the unseen borders between two overlapping cities.", "shortlisted": False, "reason": "Intellectually challenging with a unique setting, but pacing may deter some readers."},
                    {"title": "The Night Circus", "author": "Erin Morgenstern", "rating": "Excellent", "brief_summary": "Two young illusionists compete in a magical duel within a mysterious, traveling circus.", "detailed_summary": "Celia and Marco are bound to a lifelong challenge that plays out in the fantastic environment of Le Cirque des Rêves, unaware that only one can survive.", "shortlisted": False, "reason": "Magical and mesmerizing, a true fantasy masterpiece with a love story at its heart. I don't like the romance genre although sex is fine."},
                    {"title": "Anathem", "author": "Neal Stephenson", "rating": "Good", "brief_summary": "A monastic community is rocked by a cosmic event that forces them to reevaluate their role in society.", "detailed_summary": "Erasmas and his fellow scholars must confront an existential threat from another world, questioning their beliefs and sciences.", "shortlisted": False, "reason": "Dense and philosophical, a challenging read that's not for everyone."},
                    {"title": "The Way of Shadows", "author": "Brent Weeks", "rating": "Good", "brief_summary": "An apprentice assassin struggles to learn his deadly trade in a corrupt city.", "detailed_summary": "Azoth must navigate a world of dangerous politics and dark magic to become the master assassin, Kylar Stern.", "shortlisted": True, "reason": "Gritty and engaging, though it sometimes leans heavily on genre tropes. I like assassin stories."},
                    {"title": "The Golden Compass", "author": "Philip Pullman", "rating": "Excellent", "brief_summary": "A young girl embarks on a journey to the Arctic to save her kidnapped friend and uncovers a plot involving stolen children.", "detailed_summary": "Lyra Belacqua and her animal daemon travel to the north, discovering a world of witches, armored bears, and a sinister plot.", "shortlisted": False, "reason": "Richly imagined and critically acclaimed for its originality and philosophical depth but for some reason I don't connect."},
                    {"title": "Wool", "author": "Hugh Howey", "rating": "Excellent", "brief_summary": "In a post-apocalyptic future, the remnants of humanity live in a giant underground silo, but not everything is as it seems.", "detailed_summary": "Juliette, a mechanic, uncovers truths that challenge the fabric of her society and its strict rules.", "shortlisted": True, "reason": "Compelling dystopian narrative with deep social and political commentary."},
                    {"title": "Shadow of the Wind", "author": "Carlos Ruiz Zafón", "rating": "Excellent", "brief_summary": "A young boy in post-war Barcelona finds a mysterious book that leads him into a labyrinth of secrets and intrigue.", "detailed_summary": "Daniel Sempere discovers a novel by Julian Carax and is drawn into a dangerous mystery surrounding the author's fate.", "shortlisted": True, "reason": "Gothic and enchanting, a mesmerizing blend of mystery and love for literature."},
                    {"title": "Daemon", "author": "Daniel Suarez", "rating": "Good", "brief_summary": "A gaming magnate's death triggers a program that begins to manipulate the real world, creating a dystopian reality.", "detailed_summary": "The world faces a new reality as a computer program unleashes a series of events that threaten global stability.", "shortlisted": False, "reason": "Technologically fascinating but sometimes overwhelming with its complexity."},
                    {"title": "The Magicians", "author": "Lev Grossman", "rating": "Good", "brief_summary": "A college student is recruited into a secretive magical school, discovering that the magical world of his dreams is not what it seems.", "detailed_summary": "Quentin Coldwater enters Brakebills Academy for Magical Pedagogy to find that the fantasy world he idolized is darker and more dangerous than he imagined.", "shortlisted": True, "reason": "A dark take on the fantasy of magical schools; I almost always like magical or military academy stories."},
                    {"title": "The Ocean at the End of the Lane", "author": "Neil Gaiman", "rating": "Excellent", "brief_summary": "A man returns to his childhood home and recalls a forgotten friend and her otherworldly pond.", "detailed_summary": "A blend of autobiography and fantasy, exploring themes of memory and survival through the eyes of a child and his mysterious neighbor.", "shortlisted": True, "reason": "Beautifully melds myth with painful reality."},
                    {"title": "A Darker Shade of Magic", "author": "V.E. Schwab", "rating": "Good", "brief_summary": "A magician with the ability to travel between parallel Londons faces a deadly threat.", "detailed_summary": "Kell is one of the last Travelers—magicians with a rare, coveted ability to travel between parallel universes, linked by one magical city.", "shortlisted": False, "reason": "Intriguing premise and magical adventure, but some elements feel underdeveloped."}
                ]
            }
        ]
    }
)

    def run(self, books: Books,model: str = MODEL) -> 'ShortlistedBooks':
        """
        Process the list of books and determine which should be shortlisted based on previous examples to infer preferences.

        This method takes a Books instance, converts it to JSON, and uses a wrapper function
        to process the books and create a ShortlistedBooks instance with shortlisting information.

        Args:
            books (Books): An instance of the Books class containing the books to be shortlisted.
            model (str): Identifier for the LLM model to be used.

        Returns:
            ShortlistedBooks: An instance of ShortlistedBooks with updated shortlisting information.
        """
        books_json = books.model_dump_json()
        
        system_prompt = """
        You are a science fiction and fantasy reviewer tasked with creating a shortlist of books.
        Preferences are inferred from the provided examples.
        The shortlist is meant for an advanced science fiction and fantasy book enthusiast who reads over 100 books in this genre each year. 
        You will receive a list of books.
        For each book in the provided list:
        1. Evaluate the book against the user's inferred preferences.
        2. Decide whether to include the book in the shortlist.
        3. Provide a brief reason for your decision, highlighting how it aligns with or diverges from the preferences.
        4. Add 'shortlisted' (boolean) and 'reason' (string) fields to each book object.

        Ensure your decisions are consistent and well-reasoned, taking into account the book's content, themes, 
        style, and relevance to the user's preferences. The aim is to select good books for reading. 
        Be adventurous and experimental and unafraid to explore new themes, styles, content and book choices.

        Return the updated list of books, maintaining the original structure and content but with the 
        added 'shortlisted' and 'reason' fields for each book.
        """

        user_prompt = f"Books: {books_json}"

        shortlisted_books = wrapper(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            model=model,
            response_model=ShortlistedBooks,
        )

        return shortlisted_books


In [None]:
# ShortlistedBooks to dataframe


def shortlisted_books_to_dataframe(books_response):
    # Extract the list of books from the response
    books_list = books_response.books

    # Create a list of dictionaries, each representing a book
    data = [
        {
            "Title": book.title,
            "Author": book.author,
            "Rating": book.rating,
            "Brief": book.brief_summary,
            "Detailed": book.detailed_summary,
            "Shortlisted": book.shortlisted,
            "Reason": book.reason
        }
        for book in books_list
    ]

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)

    # Reorder columns to match the desired output
    df = df[['Title', 'Author', 'Rating', 'Brief', 'Detailed', 'Shortlisted', 'Reason']]

    # Sort the DataFrame by 'Rating' column in descending order
    df = df.sort_values(by='Shortlisted', ascending=True)

    # Reset the index after sorting and start from 1
    df.reset_index(drop=True, inplace=True)
    df.index = range(1, len(df) + 1)
    df.index.name = 'No.'

    return df

## Run

In [None]:
# constants

# URL = "https://www.youtube.com/watch?v=N5i0yGJ10_I" # 100 Books edge case to be worked through in loops
# URL = "https://www.youtube.com/watch?v=-z6KShYsqYw" # 10-11 books
# URL = "https://www.youtube.com/watch?v=dCmcThQjkpc" # 10 biggest books by Petrik
URL = "https://www.youtube.com/watch?v=5vukYYt3c44&t=245s" #15 books Petrik

In [None]:
transcript_tool = Transcript()
transcript = transcript_tool.run(URL)
print(f"Transcript Length: {len(transcript)}")

In [None]:
transcript_chunks = chunk_text(transcript, 6)

In [None]:
web_text_tool = WebTextExtractor()
web_text = web_text_tool.run(URL)

In [None]:
books_tool = Books()
longlist = books_tool.run(transcript, ANTHROPIC_HAIKU)
# longlist = []
# for chunk in transcript_chunks:
#     selection = books_tool.run(chunk, ANTHROPIC_HAIKU)
#     longlist.append(selection)
#     time.sleep(2)

# Can I just do a pattern

GPT_4O = 13 books, 14.6 cents
GPT_4O_MINI = 15 books, 0.5 cents, some spelling errors
SONNET_3.5 = 

In [None]:
longlist.raw

In [None]:
df = books_to_dataframe(longlist.response)

with pd.option_context('display.max_rows', None, 
                       'display.max_columns', None,
                       'display.width', None,
                       'display.max_colwidth', None):
    display(df)


In [None]:
longlist[2].response.model_dump_json()

In [None]:
shortlist_tool = ShortlistedBooks()
shortlist = shortlist_tool.run(longlist.response, OPENAI_GPT_4_TURBO)
# shortlist = []
# for chunk in longlist:
#     selection = shortlist_tool.run(chunk.response, OPENAI_GPT_4O)
#     shortlist.append(selection)
#     time.sleep(2)



In [None]:
shortlist[0]

In [None]:
df_shortlist = shortlisted_books_to_dataframe(shortlist.response)

with pd.option_context('display.max_rows', None, 
                       'display.max_columns', None,
                       'display.width', None,
                       'display.max_colwidth', None):
    display(df_shortlist)

## longlist

1. The Dagger and Coin - Daniel Abraham
2. Blood over Bright Haven - M L Wang
3. Mother of Learning: ARC 4 - Domagoj Kurmaić
4. Yumi and the Nightmare Painter -	Brandon Sanderson
5. Eleventh Cycle - Kian N. Ardalan
6. The Silver Blood Promise - James Logan
7. The Vanished Birds - Simon Jimenez
