In [2]:
# Load a user query
from pathlib import Path
from dbchat import ROOT_DIR

# Example queries
test_data_path = ROOT_DIR.parent / "tests/data/inputs/end-to-end.csv"
# Metadata directory
DATA_DIR = ROOT_DIR.parent.parent / "data"
table_metadata_dir = DATA_DIR / "metadata"

table_meta_descriptions_file = DATA_DIR / "table_descriptions.csv"
db_path = str(DATA_DIR / "chinook.db")

# test_data_path = "/mnt/c/Users/ssch7/repos/db-chat-assistant/src/tests/data/inputs/end-to-end.csv"

def load_example_queries(test_data_path):
    test_data = []
    with open(test_data_path) as f:
        f.readline()  # Remove header row
        for row in f.readlines():
            id, user_query, tables, comment = row.split('|')
            test_data.append((id, user_query, tables, comment))
test_data = load_example_queries(test_data_path)

## Retrieve documents

In [24]:
# Retrieve a document, based on the query.
from typing import List
from llama_index import VectorStoreIndex, SimpleDirectoryReader

def load_raw_yaml():
    """
    docs = load_raw_yaml()
    index = VectorStoreIndex.from_documents(docs)
    """
    # Load the YAML metadata raw
    required_exts = [".yaml"]
    reader = SimpleDirectoryReader(
        input_dir=table_metadata_dir,
        required_exts=required_exts,
        recursive=False,
    )
    documents = reader.load_data()
    return documents

import csv
def load_table_meta_descriptions() -> List[dict]:
    # Load the CSV file as a list of dictionaries
    data = []
    with open(table_meta_descriptions_file, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data.append(dict(row))
    return data

from llama_index import download_loader
from sqlalchemy import create_engine
def load_metadata_from_sqllite():
    DatabaseReader = download_loader("DatabaseReader")

    engine = create_engine(f"sqlite:///{db_path}")
    reader = DatabaseReader(
        # uri = f"sqlite:///{db_path}"
        engine = engine
    )

    query = "SELECT * FROM table_descriptions"
    documents = reader.load_data(query=query)
    return documents

documents = load_metadata_from_sqllite()

## Build an index of the documents

In [25]:
# Build the index
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(table_metadata_dir / "indices/table_descriptions")

# Load index from disk
from llama_index import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=str(table_metadata_dir / "indices/table_descriptions"))
# load index
index = load_index_from_storage(storage_context)

Ollama - Llama 2 7B Vector store

In [33]:
# Mare sure the model is running (`ollama serve` in terminal)
from llama_index.llms import Ollama
llm = Ollama(model="llama2")

In [34]:
resp = llm.complete("Who is Paul Graham?")
print(resp)

In [None]:
from llama_index import ServiceContext, set_global_service_context
# set a global service context
ctx = ServiceContext.from_defaults(llm=llm)
set_global_service_context(ctx)

# Now you can use this service context when creating your VectorStoreIndex
from llama_index import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents, service_context=ctx)

OpenAI vector store

In [26]:
from llama_index.llms import OpenAI
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

# Instantiate the LLM with the desired model and parameters
llm = OpenAI(temperature=0.1, model="gpt-4")

# Create a ServiceContext with the LLM
service_context = ServiceContext.from_defaults(llm=llm)

# Load your documents
documents = SimpleDirectoryReader("data").load_data()

# Build the index with the ServiceContext
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

<llama_index.indices.vector_store.base.VectorStoreIndex at 0x7f1621a2ab90>