# **Import required packages**

In [None]:
!pip install sqlite-utils chromadb torch openai



# **Mount Google Drive folder for file access**

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# **Build a Financial Data Q&A System with SQLite, ChromaDB, and OpenAI**

1. Set up environment and GPU support
2. Initialize ChromaDB
3. Load and insert data into ChromaDB
4. Query ChromaDB using natural language
5. Use OpenAI to generate natural language responses

In [None]:
import torch
import sqlite3
import chromadb
import json
import re
import pandas as pd
import openai

**1. Set up environment and GPU support**

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

device  # Display current computation device (GPU or CPU)

device(type='cuda')

In [None]:
# Connect to SQLite and check if the financials table exists
db_path = "/content/drive/My Drive/FinScope3D/Structured_Data/nasdaq100.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())  # If result is [], the table is not loaded

[('financials',)]


**2. Initialize ChromaDB**

In [None]:
CHROMA_PATH = "/content/drive/My Drive/FinScope3D/Structured_Data/chroma_db"  # ChromaDB storage path
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)

# Create or load a ChromaDB collection for financial data
target_collection_name = "financial_data"
collection = chroma_client.get_or_create_collection(name=target_collection_name)

**3. Load and insert data into ChromaDB**

In [None]:
DB_PATH = "/content/drive/My Drive/FinScope3D/Structured_Data/nasdaq100.db"

In [None]:
# Read financial data from SQLite
def fetch_financials_from_db():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("SELECT ticker, year, data_type, metric_name, value FROM financials")
    rows = cursor.fetchall()
    conn.close()
    return rows  # Return as list of tuples

In [None]:
# Batch insert into ChromaDB
def insert_into_chromadb():
    financials_data = fetch_financials_from_db()
    batch_size = 1000
    batch_ids, batch_documents, batch_metadatas = [], [], []

    for i, (ticker, year, data_type, metric_name, value) in enumerate(financials_data):
        doc_id = f"{ticker}_{year}_{data_type}_{metric_name}"
        metadata = {
            "ticker": ticker,
            "year": year,
            "data_type": data_type,
            "metric_name": metric_name
        }

        batch_ids.append(doc_id)
        batch_documents.append(f"{ticker} {year} {data_type} {metric_name}: {value}")
        batch_metadatas.append(metadata)

        # Insert in batches to avoid memory issues
        if len(batch_ids) >= batch_size:
            collection.add(ids=batch_ids, documents=batch_documents, metadatas=batch_metadatas)
            batch_ids, batch_documents, batch_metadatas = [], [], []

    # Insert remaining data
    if batch_ids:
        collection.add(ids=batch_ids, documents=batch_documents, metadatas=batch_metadatas)

    print(f"✅ Successfully inserted {len(financials_data)} entries into ChromaDB.")

# Execute the insertion
insert_into_chromadb()

✅ 已成功插入 69851 筆數據到 ChromaDB（已移動到 GPU）！


**4. Query ChromaDB using natural language**

In [None]:
def query_chromadb(question, n_results=5):
    """Query financial data from ChromaDB using a natural language question."""
    results = collection.query(
        query_texts=[question],
        n_results=n_results
    )
    documents = results["documents"][0] if results["documents"] else []
    return documents

# Test a sample query
query_result = query_chromadb("AAPL 2023 financials Revenue")
print(query_result)

/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:06<00:00, 12.4MiB/s]


['AAPL 2023 financials Total Revenue: 383285000000.0', 'AAPL 2022 financials Total Revenue: 394328000000.0', 'AAPL 2024 financials Total Revenue: 391035000000.0', 'AAPL 2023 financials Operating Revenue: 383285000000.0', 'AAPL 2022 financials Operating Revenue: 394328000000.0']


**5. Use OpenAI to generate natural language responses**

In [None]:
# Set your OpenAI API Key
openai.api_key = "sk-proj-..."  # Replace with your actual API key

In [None]:
def generate_answer_with_openai(question):
    """Use OpenAI to analyze financial data from ChromaDB and generate a response."""
    retrieved_data = query_chromadb(question)

    client = openai.OpenAI(api_key=openai.api_key)

    if not retrieved_data:
        return "Sorry, I couldn't find any relevant financial data."

    formatted_data = "\n".join(retrieved_data)

    # Construct the prompt with retrieved data and user question
    prompt = f"""
    You are an AI financial analyst. Based on the following financial data, answer the question:
    ---
    {formatted_data}
    ---
    Question: {question}
    Please provide a professional yet easy-to-understand response.
    """

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=200
    )

    return response.choices[0].message.content.strip()

In [None]:
# Example query
user_question = "May I ask what Tesla's accounts payable was in 2021?"
answer = generate_answer_with_openai(user_question)
print(answer)

COST 2021 financials Total Revenue: 195929000000.0
DASH 2021 financials Total Revenue: 4888000000.0
ON 2021 financials Other Gand A: 304800000.0
CEG 2021 financials Total Expenses: 19287000000.0
DASH 2023 financials Total Revenue: 8635000000.0
Based on the financial data provided, there is no specific information regarding Tesla's accounts payable for 2021. The data includes total revenues and expenses for various companies, but does not mention Tesla or provide details about its accounts payable. If you have access to Tesla's financial statements or additional data, you may be able to find the accounts payable figure there. If you need help interpreting financial statements, feel free to ask!
