In [10]:
from ollama import Client, chat

import json

from src.database.base import init_db
from src.database.companies import get_company_by_ticker
from src.database.filings import get_filings_by_company
from src.database.documents import get_documents_by_filing, DocumentType

from src.database.prompts import get_prompt_by_name
from src.database.completions import create_completion

from src.llm.prompts import format_document_messages, format_aggregate_messages, PromptRole
from src.llm.client import init_client, get_chat_response
from src.llm.models import MODEL_CONFIG

from src.utils.config import settings
from src.utils.logging import configure_logging, get_logger

configure_logging()
logger = get_logger(name="notebook")
_, _ = init_db(settings.database.url)

client = init_client(settings.openai_api.url)
_ = client.ps()
logger.info("ollama_initialzied", status="success")



2025-06-28T15:41:10.779471Z [info     ] database_initialized           status=success


HTTP Request: GET http://10.0.0.4:11434/api/ps "HTTP/1.1 200 OK"
2025-06-28T15:41:10.792919Z [info     ] ollama_initialzied             status=success


In [13]:
TICKER = "AVGO"
MODEL = "qwen3"

In [14]:
company = get_company_by_ticker(TICKER)
filings = get_filings_by_company(company_id=company.id)
for filing in filings:
    documents = get_documents_by_filing(filing.id)
    for document in documents:
        pass



2025-06-28T15:41:45.574197Z [info     ] retrieved_company_by_ticker    company_id=0685e292-0152-7513-8000-6a7b30f86651 ticker=AVGO
2025-06-28T15:41:45.576228Z [info     ] retrieved_filings_by_company   company_id=0685e292-0152-7513-8000-6a7b30f86651 count=4
2025-06-28T15:41:45.582081Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e292-5d8a-7ba8-8000-be8914f39986
2025-06-28T15:41:45.584111Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e292-c438-718e-8000-c675a5db1c78
2025-06-28T15:41:45.585763Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e293-30db-7362-8000-744846e7c1f0
2025-06-28T15:41:45.587225Z [info     ] retrieved_documents_by_filing  count=2 filing_id=0685e293-97be-75c2-8000-1f4bb304a5e9


In [20]:
# def format_messages(document):
#     messages = [{
#         'role': PromptRole.SYSTEM.value,
#         'content': DOCUMENT_PROMPTS[document.document_type]
#     },
#     {
#         'role': PromptRole.USER.value,
#         'content': document.content
#     }]
#     return messages

# models = {
#     "gemma3": {
#         "name": "gemma3:12b",
#         "ctx": 12288,
#         # "ctx": 24567,
#         # "ctx": 32768,
#         # "ctx": 65536,
#         # "ctx": 131072,
#     },
#     "qwen3": {

#         "name": "qwen3:14b",
#         "ctx": 1288,
#         # "ctx": 24567,
#         # "ctx": 32768,
#         # "ctx": 65536,
#         # "ctx": 131072,
#     }
# }

# model = models["gemma3"]

# def get_chat_response(model, messages):
#     model = models[model]
#     response = client.chat(model=model["name"],
#         options= {
#             "num_ctx": model["ctx"],
#         },
#         messages = messages,
#     )
#     return response


# def check_ctx(model, messages):
#     model = models[model]
#     response = client.chat(model=model["name"],
#         options= {
#             "num_ctx": 1024,
#         },
#         messages = messages,
#     )
#     return response



In [None]:
descriptions = {}
risk_factors = {}
mdas = {}

for filing in filings:
    documents = get_documents_by_filing(filing.id)
    for document in documents:
        messages = format_document_messages(document)

        logger.info(f"Starting Chat for {TICKER} {filing.filing_date} {document.document_type.value}")
        response = get_chat_response(client, MODEL, messages)
        logger.info(f"Done", time=f"{response.total_duration / 1e9}s")

        # TODO: create a completion in the database
        # Extract necessary details from the response
        created_at = response.created_at
        total_duration = response.total_duration / 1e9
        content = response.message.content
        prompt = get_prompt_by_name(document.document_type.value)
        completion_data = {
            'model': MODEL,
            'document_ids': [document.id],  # Associate with the current document
            'system_prompt_id': prompt.id,  # Link to the prompt used
            'total_duration': total_duration,
            'created_at': created_at,
            'num_ctx': MODEL_CONFIG[MODEL]["ctx"],
            'content': content,  # Add the actual response content
        }
        completion = create_completion(completion_data)
        print(f"Created completion record with ID: {completion.id}")
        print(f"Associated with document: {document.document_name}")
        print(f"Model: {completion.model}, Duration: {completion.total_duration:.2f} seconds")
        print(f"Content length: {len(completion.content) if hasattr(completion, 'content') else 'N/A'} characters")

        if document.document_type == DocumentType.DESCRIPTION:
            descriptions[f"{filing.filing_date}"] = response.message.content
        elif document.document_type == DocumentType.RISK_FACTORS:
            risk_factors[f"{filing.filing_date}"] = response.message.content
        elif document.document_type == DocumentType.MDA:
            mdas[f"{filing.filing_date}"] = response.message.content
        else:
            logger.warning("wtf")



logger.info("done")


2025-06-28T15:46:49.863792Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e292-5d8a-7ba8-8000-be8914f39986
2025-06-28T15:46:49.864134Z [info     ] Starting Chat for AVGO 2021-12-17 business_description
HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:47:19.492554Z [info     ] Done                           time=29.625348936s
2025-06-28T15:47:19.493780Z [info     ] retrieved_prompt_by_name       name=business_description prompt_id=06860055-5fa7-7c96-8000-8afefd533328
2025-06-28T15:47:19.498798Z [info     ] created_completion             completion_id=068600e8-77ec-763c-8000-dc07b0a8cbae model=qwen3
2025-06-28T15:47:19.500486Z [info     ] Starting Chat for AVGO 2021-12-17 risk_factors


Created completion record with ID: 068600e8-77ec-763c-8000-dc07b0a8cbae
Associated with document: Broadcom Inc. 10-K 2021-12-17 - Business Description
Model: qwen3, Duration: 30.00 seconds
Content length: 2093 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:48:24.340721Z [info     ] Done                           time=64.837606118s
2025-06-28T15:48:24.342055Z [info     ] retrieved_prompt_by_name       name=risk_factors prompt_id=06860055-5f8d-73c6-8000-acf740a64053
2025-06-28T15:48:24.352016Z [info     ] created_completion             completion_id=068600ec-8580-7ce3-8000-bde6708068c9 model=qwen3
2025-06-28T15:48:24.354294Z [info     ] Starting Chat for AVGO 2021-12-17 management_discussion


Created completion record with ID: 068600ec-8580-7ce3-8000-bde6708068c9
Associated with document: Broadcom Inc. 10-K 2021-12-17 - Risk Factors
Model: qwen3, Duration: 65.00 seconds
Content length: 5629 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:49:52.631748Z [info     ] Done                           time=88.275014799s
2025-06-28T15:49:52.632931Z [info     ] retrieved_prompt_by_name       name=management_discussion prompt_id=06860055-5f9e-750c-8000-e57ce7b47741
2025-06-28T15:49:52.637386Z [info     ] created_completion             completion_id=068600f2-0a26-7955-8000-fd76b51c6270 model=qwen3
2025-06-28T15:49:52.642406Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e292-c438-718e-8000-c675a5db1c78
2025-06-28T15:49:52.642893Z [info     ] Starting Chat for AVGO 2022-12-16 business_description


Created completion record with ID: 068600f2-0a26-7955-8000-fd76b51c6270
Associated with document: Broadcom Inc. 10-K 2021-12-17 - Management Discussion
Model: qwen3, Duration: 88.00 seconds
Content length: 7917 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:51:13.188342Z [info     ] Done                           time=80.542321794s
2025-06-28T15:51:13.189573Z [info     ] retrieved_prompt_by_name       name=business_description prompt_id=06860055-5fa7-7c96-8000-8afefd533328
2025-06-28T15:51:13.197963Z [info     ] created_completion             completion_id=068600f7-130d-7e52-8000-800277dcead0 model=qwen3
2025-06-28T15:51:13.200113Z [info     ] Starting Chat for AVGO 2022-12-16 risk_factors


Created completion record with ID: 068600f7-130d-7e52-8000-800277dcead0
Associated with document: Broadcom Inc. 10-K 2022-12-16 - Business Description
Model: qwen3, Duration: 81.00 seconds
Content length: 7392 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:52:15.000535Z [info     ] Done                           time=61.797787829s
2025-06-28T15:52:15.001747Z [info     ] retrieved_prompt_by_name       name=risk_factors prompt_id=06860055-5f8d-73c6-8000-acf740a64053
2025-06-28T15:52:15.005419Z [info     ] created_completion             completion_id=068600fa-f00d-77d8-8000-f0c43d5b01b3 model=qwen3
2025-06-28T15:52:15.007146Z [info     ] Starting Chat for AVGO 2022-12-16 management_discussion


Created completion record with ID: 068600fa-f00d-77d8-8000-f0c43d5b01b3
Associated with document: Broadcom Inc. 10-K 2022-12-16 - Risk Factors
Model: qwen3, Duration: 62.00 seconds
Content length: 5556 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:53:33.352357Z [info     ] Done                           time=78.341487465s
2025-06-28T15:53:33.354621Z [info     ] retrieved_prompt_by_name       name=management_discussion prompt_id=06860055-5f9e-750c-8000-e57ce7b47741
2025-06-28T15:53:33.359936Z [info     ] created_completion             completion_id=068600ff-d5b6-7c97-8000-0cf2fab98304 model=qwen3
2025-06-28T15:53:33.362550Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e293-30db-7362-8000-744846e7c1f0
2025-06-28T15:53:33.362907Z [info     ] Starting Chat for AVGO 2023-12-14 business_description


Created completion record with ID: 068600ff-d5b6-7c97-8000-0cf2fab98304
Associated with document: Broadcom Inc. 10-K 2022-12-16 - Management Discussion
Model: qwen3, Duration: 78.00 seconds
Content length: 6507 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:54:49.254989Z [info     ] Done                           time=75.8892805s
2025-06-28T15:54:49.256485Z [info     ] retrieved_prompt_by_name       name=business_description prompt_id=06860055-5fa7-7c96-8000-8afefd533328
2025-06-28T15:54:49.265101Z [info     ] created_completion             completion_id=06860104-9420-72b0-8000-c53e950c5cac model=qwen3
2025-06-28T15:54:49.267058Z [info     ] Starting Chat for AVGO 2023-12-14 risk_factors


Created completion record with ID: 06860104-9420-72b0-8000-c53e950c5cac
Associated with document: Broadcom Inc. 10-K 2023-12-14 - Business Description
Model: qwen3, Duration: 76.00 seconds
Content length: 6914 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:56:16.636787Z [info     ] Done                           time=87.367246643s
2025-06-28T15:56:16.638773Z [info     ] retrieved_prompt_by_name       name=risk_factors prompt_id=06860055-5f8d-73c6-8000-acf740a64053
2025-06-28T15:56:16.647709Z [info     ] created_completion             completion_id=0686010a-0a3f-741c-8000-3fef799a8055 model=qwen3
2025-06-28T15:56:16.649688Z [info     ] Starting Chat for AVGO 2023-12-14 management_discussion


Created completion record with ID: 0686010a-0a3f-741c-8000-3fef799a8055
Associated with document: Broadcom Inc. 10-K 2023-12-14 - Risk Factors
Model: qwen3, Duration: 87.00 seconds
Content length: 8560 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:57:56.908250Z [info     ] Done                           time=100.256373179s
2025-06-28T15:57:56.909589Z [info     ] retrieved_prompt_by_name       name=management_discussion prompt_id=06860055-5f9e-750c-8000-e57ce7b47741
2025-06-28T15:57:56.918951Z [info     ] created_completion             completion_id=06860110-4e93-7e3b-8000-e65c79c5cc66 model=qwen3
2025-06-28T15:57:56.921296Z [info     ] retrieved_documents_by_filing  count=2 filing_id=0685e293-97be-75c2-8000-1f4bb304a5e9
2025-06-28T15:57:56.921557Z [info     ] Starting Chat for AVGO 2024-12-20 business_description


Created completion record with ID: 06860110-4e93-7e3b-8000-e65c79c5cc66
Associated with document: Broadcom Inc. 10-K 2023-12-14 - Management Discussion
Model: qwen3, Duration: 100.00 seconds
Content length: 8229 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T15:59:42.168449Z [info     ] Done                           time=105.243775445s
2025-06-28T15:59:42.169704Z [info     ] retrieved_prompt_by_name       name=business_description prompt_id=06860055-5fa7-7c96-8000-8afefd533328
2025-06-28T15:59:42.173892Z [info     ] created_completion             completion_id=06860116-e2bd-73da-8000-4273854575e8 model=qwen3
2025-06-28T15:59:42.176266Z [info     ] Starting Chat for AVGO 2024-12-20 risk_factors


Created completion record with ID: 06860116-e2bd-73da-8000-4273854575e8
Associated with document: Broadcom Inc. 10-K 2024-12-20 - Business Description
Model: qwen3, Duration: 105.00 seconds
Content length: 9951 characters


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T16:00:54.928316Z [info     ] Done                           time=72.749723765s
2025-06-28T16:00:54.929491Z [info     ] retrieved_prompt_by_name       name=risk_factors prompt_id=06860055-5f8d-73c6-8000-acf740a64053
2025-06-28T16:00:54.938301Z [info     ] created_completion             completion_id=0686011b-6ee5-7533-8000-2c1621dab394 model=qwen3


Created completion record with ID: 0686011b-6ee5-7533-8000-2c1621dab394
Associated with document: Broadcom Inc. 10-K 2024-12-20 - Risk Factors
Model: qwen3, Duration: 73.00 seconds
Content length: 7075 characters


In [None]:
import os

os.makedirs(f'outputs/{TICKER}', exist_ok=True)
with open(f'outputs/{TICKER}/mdas.json', 'w') as file:
    json.dump(mdas, file, indent=4)
with open(f'outputs/{TICKER}/risk_factors.json', 'w') as file:
    json.dump(risk_factors, file, indent=4)
with open(f'outputs/{TICKER}/descriptions.json', 'w') as file:
    json.dump(risk_factors, file, indent=4)

In [None]:
messages = format_aggregate_messages(mdas)
response = get_chat_response(client, MODEL, messages)
logger.info(f"Done", time=f"{response.total_duration / 1e9}s")
with open(f'outputs/{TICKER}/mda.md', 'w') as file:
    file.write(response.message.content)

created_at = response.created_at
total_duration = response.total_duration / 1e9
content = response.message.content
prompt = get_prompt_by_name("aggregate_prompt")

completion_data = {
    'model': MODEL,
    'system_prompt_id': prompt.id,  # Link to the prompt used
    'total_duration': total_duration,
    'created_at': created_at,
    'num_ctx': MODEL_CONFIG[MODEL]["ctx"],
    'content': content,  # Add the actual response content
}
completion = create_completion(completion_data)
print(f"Created completion record with ID: {completion.id}")
print(f"Associated with document: {document.document_name}")
print(f"Model: {completion.model}, Duration: {completion.total_duration:.2f} seconds")
print(f"Content length: {len(completion.content) if hasattr(completion, 'content') else 'N/A'} characters")

messages = format_aggregate_messages(risk_factors)
response = get_chat_response(client, MODEL, messages)
logger.info(f"Done", time=f"{response.total_duration / 1e9}s")
print(response.message.content)
with open(f'outputs/{TICKER}/risk_factors.md', 'w') as file:
    file.write(response.message.content)

messages = format_aggregate_messages(descriptions)
response = get_chat_response(client, MODEL, messages)
logger.info(f"Done", time=f"{response.total_duration / 1e9}s")
print(response.message.content)
with open(f'outputs/{TICKER}/descriptions.md', 'w') as file:
    file.write(response.message.content)

HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T11:02:27.051990Z [info     ] Done                           time=81.804331533s
HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T11:03:32.631413Z [info     ] Done                           time=65.576536244s


<think>
Okay, let's see. The user wants a summary of the historical changes in the provided documents. Each document is from a different year: 2021, 2022, 2023, and 2024. They all seem to be risk factors from a company's annual report or similar filing, likely a tech company given the context.

First, I need to compare each document to identify how the risk factors have evolved over time. The user probably wants to know what's new, what's been emphasized more, or if certain risks have become more prominent.

Looking at the 2021 document, the main categories are Business and Operational Risks, Financial Risks, Legal and Regulatory Risks, and Human Capital Risks. They mention customer concentration, competition, economic conditions, natural disasters, cybersecurity, regulatory uncertainty, IP, third-party vendors, supply chain, currency fluctuations, interest rates, taxation, credit risk, litigation, compliance, and key personnel.

In 2022, there's a more detailed breakdown with impact l

HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T11:04:41.689493Z [info     ] Done                           time=69.055010147s


<think>
Okay, I need to summarize the historical changes in these documents. Let me look through each of the four filings from 2021 to 2024.

Starting with the 2021 document. The key points here are Cisco's focus on digital agility, ESG principles, hybrid work, leadership changes like Maria Martinez becoming COO, and a strong emphasis on diversity and inclusion. The financial performance isn't detailed, but there's mention of ESG initiatives as a long-term commitment.

In 2022, the document expands on the ESG commitment, mentioning a specific Purpose Report and the link to their ESG hub. There's more detail on the leadership team, including Charles H. Robbins, R. Scott Herren, Maria Martinez, and Deborah Stahlkopf. The company's purpose is stated as "Power an Inclusive Future for All," and they emphasize conscious culture and employee well-being. The regulatory and legal considerations are also highlighted here, like IP protection and legal proceedings.

Moving to 2023, there's a focus

In [12]:
type(response.message.content)

str

In [None]:
messages = format_aggregate_messages(descriptions)

In [None]:
models = {
    "gemma3": {
        "name": "gemma3:12b",
        "ctx": 12288,
        # "ctx": 24567,
        # "ctx": 32768,
        # "ctx": 65536,
        # "ctx": 131072,
    },
    "qwen3": {

        "name": "qwen3:14b",
        "ctx": 8000,
        # "ctx": 24567,
        # "ctx": 32768,
        # "ctx": 65536,
        # "ctx": 131072,
    }
}


HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T09:45:46.268790Z [info     ] Done                           time=51.980350933s


<think>
Okay, I need to summarize the historical changes in the provided documents from 2021 to 2025. Let me look through each of the years and note the key points.

Starting with the 2021 document. The summary mentions Alphabet's core business model, revenue streams like advertising, Google Services, Google Cloud, and Other Bets. Competitive landscape includes companies like Microsoft, Amazon, etc. Growth strategies focus on AI, cloud expansion, and sustainability. The segments are Google Services, Google Cloud, and Other Bets.

In 2022, the structure is similar, but there's more emphasis on AI initiatives like MUM. The "Other Bets" are still mentioned as early-stage with potential but uncertainty. Hardware sales include Fitbit and Nest. The competitive landscape lists more companies, like ByteDance and TikTok.

2023 adds more details on AI integration and mentions specific products like Gemini. The growth strategy emphasizes AI across all products. The segments are the same, but ther

In [None]:
messages = format_aggregate_messages(mdas)
response = get_chat_response("qwen3", messages)
logger.info(f"Done", time=f"{response.total_duration / 1e9}s")
print(response.message.content)

HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T09:51:00.215778Z [info     ] Done                           time=51.980350933s


<think>
Okay, so the user has provided a series of documents from Alphabet Inc. spanning from 2021 to 2025, each detailing different aspects of the company's financial performance, accounting policies, legal issues, and strategic initiatives. My task is to summarize the historical changes in these documents. Let me start by going through each document one by one to identify the key points and track how they've evolved over time.

Starting with the 2021 document. The main points here are the company's substantial cash reserves, share repurchases, and the impact of the pandemic. They also mention the issuance of Sustainability Bonds. The key areas covered are financial strength, capital allocation, and the effect of the pandemic on operations.

Moving to 2022, the focus shifts to the leadership team's assessment. The MD&A section discusses transparency, strategic thinking, execution capabilities, and risk awareness. They acknowledge challenges like the pandemic's impact on ad revenue and

In [None]:
messages = format_aggregate_messages(risk_factors)
response = get_chat_response("qwen3", messages)
logger.info(f"Done", time=f"{response.total_duration / 1e9}s")
print(response.message.content)

HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"
2025-06-28T09:53:21.852202Z [info     ] Done                           time=51.980350933s


<think>
Okay, let's see. The user provided several documents from different years (2021 to 2025) that outline the risk factors for Alphabet (Google). They want a summary of the historical changes in these documents.

First, I need to compare the different years to identify trends and changes in the risk factors mentioned. Let me go through each document one by one.

Starting with 2021: The main categories are Macroeconomic & Regulatory Risks, Business & Operational Risks, Financial Risks, and Company-Specific Risks. Key points include the impact of COVID-19, tax changes, regulatory scrutiny, reliance on key personnel, and dependence on advertising revenue.

In 2022, the structure is similar but more detailed. They mention antitrust investigations, data privacy regulations, and the implications of these risks. The emphasis on legal challenges and the complexity of Alphabet's operations is more pronounced. Also, the mention of "Safe Harbor" dependencies and content liability on YouTube i

In [None]:
response