# Setup

In [5]:
import os
from pprint import pprint
from typing import Dict
from typing import Iterable
from typing import List

from dotenv import find_dotenv
from dotenv import load_dotenv
import instructor
from openai import AzureOpenAI
from pydantic import BaseModel
from pydantic import Field


load_dotenv(find_dotenv())
DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2023-05-15",
)
client = instructor.patch(client)

In [66]:
def get_completion(
    messages: List[Dict[str, str]],
    response_model: BaseModel | Iterable[BaseModel],
    stream: bool = False,
    client: AzureOpenAI = client,
) -> BaseModel | Iterable[BaseModel]:
    return client.chat.completions.create(
        model=DEPLOYMENT,
        messages=messages,
        response_model=response_model,
        temperature=0,
        stream=stream,
    )

# Improving Extractions

In [3]:
class Extraction(BaseModel):
    topic: str
    summary: str
    hypothetical_questions: List[str] = Field(
        default_factory=list,
        description="Hypothetical questions that this document could answer",
    )
    keywords: List[str] = Field(
        default_factory=list, description="Keywords that this document is about"
    )

In [46]:
# Chapter 1 by itself gets censored, lol
with open("../resources/romans1.txt") as f:
    text1 = f.read()

with open("../resources/romans2.txt") as f:
    text2 = f.read()

text = "Chapter1\n\n" + text1 + "\nChapter 2\n\n" + text2

In [47]:
messages = [
    {
        "role": "system",
        "content": """\
            You are a text processing system.
            You don't have any opinions and a capability to interpret.
            You need to extract chunks from the following text and create a set of topics.
            """,
    },
    {"role": "user", "content": text},
]

In [48]:
extractions = get_completion(
    messages,
    Iterable[Extraction],
)

In [60]:
for e in extractions[3:]:
    pprint(e.model_dump())

{'hypothetical_questions': ["What triggers God's wrath according to Paul?",
                            "How do people suppress the truth in Paul's view?",
                            'What are the consequences of not honoring God as '
                            'described by Paul?'],
 'keywords': ["God's wrath",
              'sinful humanity',
              'suppress the truth',
              'godlessness',
              'wickedness',
              'sinful desires',
              'depraved mind'],
 'summary': "Paul speaks of God's wrath being revealed against all godlessness "
            'and wickedness of people who suppress the truth. He describes how '
            'they knew God but did not honor Him, leading to their foolish '
            'hearts being darkened and God giving them over to sinful desires '
            'and a depraved mind.',
 'topic': "God's Wrath Against Sinful Humanity"}
{'hypothetical_questions': ['What does Paul say about judging others?',
                  

# Adding temporal window context

## Plain query

In [63]:
from datetime import date


class DateRange(BaseModel):
    start: date
    end: date


class Query(BaseModel):
    rewritten_query: str
    published_daterange: DateRange

In [80]:
# AzureOpenAI models do not seem to be capable of searching the web by themselves.
# Thus have to add "Today is ..."
query = "What are the Biblical events that happened on dates not too far from today?"

messages = [
    {
        "role": "system",
        "content": f"""\
            You are a system that updates queries with additional context for a search engine.
            Today is {date.today()}
            Update the following query.
            """,
    },
    {"role": "user", "content": query},
]

In [81]:
rewritten_query = get_completion(messages=messages, response_model=Query)
pprint(rewritten_query.model_dump())

{'published_daterange': {'end': datetime.date(2024, 3, 31),
                         'start': datetime.date(2024, 3, 1)},
 'rewritten_query': 'Biblical events that happened around March'}


## Chain of Thoughts

In [92]:
class DateRangeWithCOT(DateRange):
    chain_of_thought: str = Field(
        description="""/
        Write down 5 bullet points that would help you to provide a better date range.
        Use them to think step by step.
        """
    )


class QueryWithCOT(BaseModel):
    rewritten_query: str = Field(description="Rewrite the query to make it more specific")
    published_daterange: DateRangeWithCOT = Field(description="Effective date range to search in")

In [93]:
rewritten_query = get_completion(messages=messages, response_model=QueryWithCOT)
pprint(rewritten_query.model_dump())

{'published_daterange': {'chain_of_thought': 'Since the user is asking for '
                                             'events not too far from today, '
                                             'which is 2024-03-13, a '
                                             'reasonable date range to '
                                             'consider would be the entire '
                                             'month of March 2024. This will '
                                             'capture any Biblical events that '
                                             'are traditionally observed or '
                                             'commemorated during this time '
                                             'period. The Bible itself does '
                                             'not provide specific Gregorian '
                                             'calendar dates for events, but '
                                             'some events are tied to J