In [1]:
import os

import json

from ollama import Client, chat
from src.database.base import init_db
from src.database.companies import get_company_by_ticker
from src.database.filings import get_filings_by_company
from src.database.documents import get_documents_by_filing, DocumentType

from src.llm.prompts import format_document_messages, format_aggregate_messages, PromptRole
from src.llm.client import init_client, get_chat_response

from src.utils.config import settings
from src.utils.logging import configure_logging, get_logger

TICKER = "ACHR"
MODEL = "qwen3"

os.makedirs(f'outputs/{TICKER}', exist_ok=True)

configure_logging()
logger = get_logger(name="notebook")
_, _ = init_db(settings.database.url)

client = init_client(settings.openai_api.url)
_ = client.ps()
logger.info("ollama_initialzied", status="success")

company = get_company_by_ticker(TICKER)
filings = get_filings_by_company(company_id=company.id)
for filing in filings:
    documents = get_documents_by_filing(filing.id)
    for document in documents:
        pass



2025-06-28T15:26:33.767170Z [info     ] database_initialized           status=success
HTTP Request: GET http://10.0.0.4:11434/api/ps "HTTP/1.1 200 OK"
2025-06-28T15:26:33.780990Z [info     ] ollama_initialzied             status=success
HTTP Request: GET http://10.0.0.4:11434/api/ps "HTTP/1.1 200 OK"
2025-06-28T15:26:33.780990Z [info     ] ollama_initialzied             status=success
2025-06-28T15:26:33.791324Z [info     ] retrieved_company_by_ticker    company_id=0685e3e7-c4c5-7522-8000-7417d23b7385 ticker=ACHR
2025-06-28T15:26:33.792832Z [info     ] retrieved_filings_by_company   company_id=0685e3e7-c4c5-7522-8000-7417d23b7385 count=5
2025-06-28T15:26:33.795309Z [info     ] retrieved_documents_by_filing  count=2 filing_id=0685e3e8-1f56-74e4-8000-f041f0dee00e
2025-06-28T15:26:33.796561Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e3e8-898a-702a-8000-dc02dacf7deb
2025-06-28T15:26:33.797629Z [info     ] retrieved_documents_by_filing  count=3 filing_id=0685e3e8-ef7e

In [2]:
messages = format_document_messages(document)
response = get_chat_response(client, MODEL, messages)
response

HTTP Request: POST http://10.0.0.4:11434/api/chat "HTTP/1.1 200 OK"


ChatResponse(model='qwen3:14b', created_at='2025-06-28T15:27:56.825229052Z', done=True, done_reason='stop', total_duration=83016538119, load_duration=1894251035, prompt_eval_count=7503, prompt_eval_duration=10890900288, eval_count=1669, eval_duration=70215564546, message=Message(role='assistant', content='<think>\nOkay, let me try to work through this analysis step by step. The user wants me to evaluate the leadership team of Archer based on their MD&A section of the 10-K filing. The four aspects to assess are transparency and honesty in discussing challenges, strategic thinking and forward planning, execution capabilities based on past performance, and risk awareness and mitigation strategies.\n\nFirst, I need to go through the MD&A section carefully. Let me start by reading through the overview and the sections on revenue, operating expenses, liquidity, etc. The company is in the advanced aviation sector, developing eVTOL aircraft like Midnight and a defense variant with Anduril. The

In [3]:
response.created_at
response.done_reason
response.total_duration
document.id

UUID('0685e3e9-b9e6-71a5-8000-843872aa046c')

In [4]:
from src.llm.prompts import DOCUMENT_PROMPTS, AGGREGATE_PROMPT



## Database Migration

The error `UndefinedColumn: column prompts.content does not exist` indicates that we need to add the missing `content` column to the `prompts` table. Let's run the SQL command to add this column:

In [5]:
from sqlalchemy import text
from src.database.base import get_db_session

# Get a session
session = get_db_session()

# Add the missing content column to the prompts table
sql = text("ALTER TABLE prompts ADD COLUMN content TEXT;")

try:
    session.execute(sql)
    session.commit()
    print("Successfully added 'content' column to the prompts table.")
except Exception as e:
    session.rollback()
    print(f"Error executing SQL: {e}")

Error executing SQL: (psycopg2.errors.DuplicateColumn) column "content" of relation "prompts" already exists

[SQL: ALTER TABLE prompts ADD COLUMN content TEXT;]
(Background on this error at: https://sqlalche.me/e/20/f405)


## Additional Database Migration

Let's check what columns currently exist in the prompts table, and then remove any that are no longer needed (like template_vars and default_vars):

In [6]:
from sqlalchemy import text, inspect
from src.database.base import get_db_session, engine

# First, let's check the existing columns in the prompts table
inspector = inspect(engine)
columns = inspector.get_columns('prompts')
print("Current columns in prompts table:")
for col in columns:
    print(f" - {col['name']}: {col['type']}")

Current columns in prompts table:
 - id: UUID
 - name: VARCHAR(255)
 - description: TEXT
 - role: VARCHAR(9)
 - content: TEXT


In [7]:
from sqlalchemy import text, inspect
from src.database.base import get_db_session, engine

# Get a session
session = get_db_session()

# Remove template_vars and default_vars columns if they exist
try:
    # Check if columns exist before dropping them
    inspector = inspect(engine)
    columns = [col['name'] for col in inspector.get_columns('prompts')]

    if 'template_vars' in columns:
        session.execute(text("ALTER TABLE prompts DROP COLUMN template_vars;"))
        print("Dropped template_vars column")
    else:
        print("template_vars column doesn't exist")

    if 'default_vars' in columns:
        session.execute(text("ALTER TABLE prompts DROP COLUMN default_vars;"))
        print("Dropped default_vars column")
    else:
        print("default_vars column doesn't exist")

    if 'template' in columns:
        session.execute(text("ALTER TABLE prompts DROP COLUMN template;"))
        print("Dropped template column")
    else:
        print("template column doesn't exist")

    session.commit()
    print("Database schema update completed successfully.")
except Exception as e:
    session.rollback()
    print(f"Error executing SQL: {e}")

template_vars column doesn't exist
default_vars column doesn't exist
template column doesn't exist
Database schema update completed successfully.


Now let's try creating a prompt again with our updated schema:

In [8]:
# from src.database.prompts import create_prompt, PromptRole

# name = "aggregate_prompt"
# content = AGGREGATE_PROMPT

# # Create a new prompt in the database
# prompt_data = {
#     'name': name,
#     'description': 'Prompt for aggregating document analysis',
#     'role': PromptRole.SYSTEM,
#     'content': content
# }

# try:
#     prompt = create_prompt(prompt_data)
#     print(f"Created prompt with ID: {prompt.id}")
# except ValueError as e:
#     print(f"Error: {e}")
# for doc_type in DOCUMENT_PROMPTS:
#     name = doc_type.value
#     content = DOCUMENT_PROMPTS[doc_type]
#     prompt_data = {
#         'name': name,
#         'role': PromptRole.SYSTEM,
#         'content': content
#     }

#     try:
#         prompt = create_prompt(prompt_data)
#         print(f"Created prompt with ID: {prompt.id}")
#     except ValueError as e:
#         print(f"Error: {e}")

In [9]:
# available objects:
# company (Company)
# filing (Filing)
# document (Document)
# response (raw llm response. Has creation timestamp and duration fields)
# prompt (name matches document.document_type)

## Completions Table Migration

The completions table is missing a crucial field to store the actual text content of the LLM response. Let's add a `content` column to store this data:

In [22]:
from sqlalchemy import text
from src.database.base import get_db_session, engine

# Get a session
session = get_db_session()

# First, let's check if the column already exists
inspector = inspect(engine)
columns = [col['name'] for col in inspector.get_columns('completions')]
print("Current columns in completions table:", columns)

# Add the content column to the completions table if it doesn't exist
if 'content' not in columns:
    try:
        sql = text("ALTER TABLE completions ADD COLUMN content TEXT;")
        session.execute(sql)
        session.commit()
        print("Successfully added 'content' column to the completions table.")
    except Exception as e:
        session.rollback()
        print(f"Error executing SQL: {e}")
else:
    print("The 'content' column already exists in the completions table.")

# Add the created_at column to the completions table if it doesn't exist
if 'created_at' not in columns:
    try:
        sql = text("ALTER TABLE completions ADD COLUMN created_at TIMESTAMP;")
        session.execute(sql)
        session.commit()
        print("Successfully added 'created_at' column to the completions table.")
    except Exception as e:
        session.rollback()
        print(f"Error executing SQL: {e}")
else:
    print("The 'created_at' column already exists in the completions table.")

# and total_duration (int)
if 'total_duration' not in columns:
    try:
        sql = text("ALTER TABLE completions ADD COLUMN total_duration INT;")
        session.execute(sql)
        session.commit()
        print("Successfully added 'total_duration' column to the completions table.")
    except Exception as e:
        session.rollback()
        print(f"Error executing SQL: {e}")
else:
    print("The 'total_duration' column already exists in the completions table.")

# and num_ctx (int)
if 'num_ctx' not in columns:
    try:
        sql = text("ALTER TABLE completions ADD COLUMN num_ctx INT;")
        session.execute(sql)
        session.commit()
        print("Successfully added 'num_ctx' column to the completions table.")
    except Exception as e:
        session.rollback()
        print(f"Error executing SQL: {e}")
else:
    print("The 'num_ctx' column already exists in the completions table.")

# and remove context_texts
if 'context_text' in columns:
    try:
        sql = text("ALTER TABLE completions DROP COLUMN context_text;")
        session.execute(sql)
        session.commit()
        print("Successfully removed 'context_text' column from the completions table.")
    except Exception as e:
        session.rollback()
        print(f"Error executing SQL: {e}")
else:
    print("The 'context_text' column does not exist in the completions table.")

Current columns in completions table: ['id', 'system_prompt_id', 'user_prompt_id', 'context_text', 'model', 'temperature', 'top_p', 'content', 'created_at', 'total_duration', 'num_ctx']
The 'content' column already exists in the completions table.
The 'created_at' column already exists in the completions table.
The 'total_duration' column already exists in the completions table.
The 'num_ctx' column already exists in the completions table.
Successfully removed 'context_text' column from the completions table.


In [None]:
# Now let's update our completion creation code to include the content
from src.database.completions import create_completion

# Extract necessary details from the response
created_at = response.created_at
total_duration = response.total_duration / 1e9  # Convert from nanoseconds to seconds
content = response.message.content  # Extract the actual content of the response

from src.database.prompts import get_prompt_by_name
from src.llm.models import MODEL_CONFIG

prompt = get_prompt_by_name(document.document_type.value)

# Prepare completion data with content
completion_data = {
    'model': MODEL,
    'document_ids': [document.id],  # Associate with the current document
    'system_prompt_id': prompt.id,  # Link to the prompt used
    'total_duration': total_duration,
    'created_at': created_at,
    'temperature': 0.7,  # Default or you can use specific values if available
    'num_ctx': MODEL_CONFIG[MODEL]["ctx"],
    'content': content,  # Add the actual response content
}

# Create the completion record
try:
    completion = create_completion(completion_data)
    print(f"Created completion record with ID: {completion.id}")
    print(f"Associated with document: {document.document_name}")
    print(f"Model: {completion.model}, Duration: {completion.total_duration:.2f} seconds")
    print(f"Content length: {len(completion.content) if hasattr(completion, 'content') else 'N/A'} characters")
except Exception as e:
    print(f"Error creating completion: {e}")

2025-06-28T15:32:18.247557Z [info     ] retrieved_prompt_by_name       name=management_discussion prompt_id=06860055-5f9e-750c-8000-e57ce7b47741
2025-06-28T15:32:18.253794Z [info     ] created_completion             completion_id=068600b0-23fc-70f9-8000-2e276628b0cb model=qwen3
2025-06-28T15:32:18.253794Z [info     ] created_completion             completion_id=068600b0-23fc-70f9-8000-2e276628b0cb model=qwen3


Created completion record with ID: 068600b0-23fc-70f9-8000-2e276628b0cb
Associated with document: Archer Aviation Inc. 10-K 2025-02-28 - Management Discussion
Model: qwen3, Duration: 83.00 seconds
Content length: 8549 characters
