# Vector Search and RAG function application based on SuperDuperDB

In [None]:
import os
import click
from tqdm import tqdm

import sentence_transformers
from dotenv import load_dotenv
from superduper import (
    Document,
    Listener,
    model,ObjectModel,
    Schema,
    VectorIndex,
    superduper,
    vector
)
# from superduper.backends.mongodb import
import superduper_mongodb
load_dotenv()

## Connect to mongodb database

In [2]:
mongodb_uri = os.getenv("MONGODB_URI", "superduperdb-demo")
artifact_store = os.getenv("ARTIFACT_STORE", "data/artifact_store")

db = superduper(mongodb_uri, artifact_store=f"filesystem://{artifact_store}")

2024-Sep-08 23:40:02.64| INFO     | localhost.localdomain| superduper.base.build:56   | Data Client is ready. MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, serverselectiontimeoutms=5000)
2024-Sep-08 23:40:02.64| INFO     | localhost.localdomain| superduper.base.build:35   | Connecting to Metadata Client with engine:  MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, serverselectiontimeoutms=5000)
2024-Sep-08 23:40:02.65| INFO     | localhost.localdomain| superduper.base.build:141  | Connecting to compute client: Compute(uri=None, compute_kwargs={}, _path='superduper.backends.local.compute.LocalComputeBackend')
2024-Sep-08 23:40:02.65| INFO     | localhost.localdomain| superduper.base.datalayer:106  | Building Data Layer
2024-Sep-08 23:40:02.65| INFO     | localhost.localdomain| superduper.base.build:208  | Configuration: 
 +----------------+---------------------------------------+
| Configuration  |      

## Parse pdf files and store them in the database

In [3]:
from superduper.ext.unstructured.encoder import unstructured_encoder

db.apply(unstructured_encoder)

pdf_folder = 'pdf-folders'

pdf_paths = [os.path.join(pdf_folder, pdf) for pdf in os.listdir(pdf_folder)]
# collection = superduper_mongodb("source")
to_insert = [
    Document({"elements": unstructured_encoder(pdf_path)}) for pdf_path in pdf_paths
]
# db.execute(collection.insert_many(to_insert))
# _ = db['source'].insert_many(to_insert).execute()
db['source'].insert_many(to_insert).execute()

2024-Sep-08 23:40:10.06| INFO     | localhost.localdomain| superduper.backends.mongodb.data_backend:226  | Table source does not exist, auto creating...


([ObjectId('66de7c49c356594b294e9e19'), ObjectId('66de7c49c356594b294e9e1a')],
 None)

In [None]:
db.show()

In [None]:
db['source'].find_one().execute().unpack()

## Create a chunking model to chunk pdf chunks

In [4]:
def merge_metadatas(metadatas, return_center=False):
    MAX_NUM = 999999999
    if not metadatas:
        return {}
    p1, p2, p3, p4 = (MAX_NUM, MAX_NUM), (MAX_NUM, 0), (0, 0), (0, MAX_NUM)
    for metadata in metadatas:
        p1_, p2_, p3_, p4_ = metadata["coordinates"]["points"]
        p1 = (min(p1[0], p1_[0]), min(p1[1], p1_[1]))
        p2 = (min(p2[0], p2_[0]), max(p2[1], p2_[1]))
        p3 = (max(p3[0], p3_[0]), max(p3[1], p3_[1]))
        p4 = (max(p4[0], p4_[0]), min(p4[1], p4_[1]))
    points = (p1, p2, p3, p4)
    if return_center:
        points = {"x": (p1[0] + p3[0]) / 2, "y": (p1[1] + p3[1]) / 2}
        page_number = metadata["page_number"]
    return {"points": points, "page_number": page_number}


def create_chunk_and_metadatas(page_elements, stride=3, window=10):
    datas = []
    for i in range(0, len(page_elements), stride):
        windown_elements = page_elements[i : i + window]
        metadatas = [e.metadata.to_dict() for e in windown_elements]
        chunk = "\n".join([e.text for e in windown_elements])
        datas.append(
            {"txt": chunk, "metadata": merge_metadatas(metadatas, return_center=True)}
        )
    return datas


@model(flatten=True, model_update_kwargs={'document_embedded': False})
def get_chunks(elements):
    from collections import defaultdict

    pages_elements = defaultdict(list)
    for element in elements:
        pages_elements[element.metadata.page_number].append(element)

    all_chunks_and_links = sum(
        [
            create_chunk_and_metadatas(page_elements)
            for _, page_elements in pages_elements.items()
        ],
        [],
    )
    return all_chunks_and_links


In [5]:
MODEL_IDENTIFIER_CHUNK = "chunker"
upstream_listener= Listener(
        model=get_chunks,
        select=db['source'].select(),
        key="elements",
       uuid=MODEL_IDENTIFIER_CHUNK
)
db.apply(upstream_listener)

2024-Sep-08 23:40:49.38| INFO     | localhost.localdomain| superduper.jobs.queue:87   | Running jobs for listener.chunker with ids: [ObjectId('66de7c49c356594b294e9e19'), ObjectId('66de7c49c356594b294e9e1a')]
2024-Sep-08 23:40:49.43| INFO     | localhost.localdomain| superduper.backends.local.compute:58   | Submitting job. function:<function method_job at 0x7fa853e204a0>
2024-Sep-08 23:40:49.43| INFO     | localhost.localdomain| superduper.components.model:672  | Requesting prediction in db - [get_chunks] with predict_id chunker
Using select {'_base': '?source-find', '_builds': {'source-find': {'_path': 'superduper.backends.mongodb.query.parse_query', 'documents': [], 'query': 'source.find()'}}, '_blobs': {}, '_files': {}} and ids [ObjectId('66de7c49c356594b294e9e19'), ObjectId('66de7c49c356594b294e9e1a')]
2024-Sep-08 23:40:50.64| INFO     | localhost.localdomain| superduper.components.model:797  | Adding 2 model outputs to `db`
2024-Sep-08 23:40:51.89| SUCCESS  | localhost.localdomain

(['listener.chunker'],
 Listener(identifier='chunker', uuid='chunker', upstream=None, plugins=None, key='elements', model=ObjectModel(identifier='get_chunks', uuid='42f530c0-a350-467b-b6f1-a85a5d099444', upstream=None, plugins=None, signature='*args,**kwargs', datatype=None, output_schema=None, flatten=True, model_update_kwargs={'document_embedded': False}, predict_kwargs={}, compute_kwargs={}, validation=None, metric_values={}, num_workers=0, object=<function get_chunks at 0x7fa805048fe0>), select=source.find(), active=True, predict_kwargs={}))

In [6]:
db.show()

[{'type_id': 'datatype', 'identifier': 'unstructured'},
 {'type_id': 'listener', 'identifier': 'chunker'},
 {'type_id': 'model', 'identifier': 'get_chunks'}]

In [None]:
db.databackend.db.list_collection_names() 


In [7]:
upstream_listener.outputs_key
# '_outputs.chunker'

'_outputs.chunker'

In [None]:
# MODEL_IDENTIFIER_CHUNK = "chunk"
# from superduper import ObjectModel
# chunk_model = ObjectModel(
#     identifier=MODEL_IDENTIFIER_CHUNK,
#     object=get_chunks,
#     flatten=True,
#     model_update_kwargs={"document_embedded": False},
#     output_schema=Schema(identifier="myschema", fields={"txt": "string"}),
# )

# db.add(
#     Listener(
#         model=chunk_model,
#         select=select,
#         key="elements",
#     )
# )
# upstream_listener= Listener(
#         model=get_chunks,
#         select=db['source'].find(),
#         key="elements",
#        uuid=MODEL_IDENTIFIER_CHUNK
# )
# db.apply(upstream_listener)

## Embedding all text blocks and building vector indexes

In [None]:
# SOURCE_KEY = "elements"
MODEL_IDENTIFIER_EMBEDDING = "embedding"
VECTOR_INDEX_IDENTIFIER = "vector-index"
# COLLECTION_NAME_CHUNK = f"_outputs.{SOURCE_KEY}.{MODEL_IDENTIFIER_CHUNK}"
COLLECTION_NAME_CHUNK = f"_outputs.{MODEL_IDENTIFIER_CHUNK}" # _outputs.chunk
# CHUNK_OUTPUT_KEY = f"_outputs.{SOURCE_KEY}.{MODEL_IDENTIFIER_CHUNK}"
CHUNK_OUTPUT_KEY = f"_outputs.{MODEL_IDENTIFIER_CHUNK}.txt"
indexing_key = upstream_listener.outputs_key # Same as CHUNK_OUTPUT_KEY
chunk_collection = db[COLLECTION_NAME_CHUNK]

def preprocess(x):
    if isinstance(x, dict):
        # For model chains, the logic of this key needs to be optimized.
        chunk = sorted(x.items())[-1][1]
        return chunk["txt"]
    return x
from superduper_sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(
    identifier=MODEL_IDENTIFIER_EMBEDDING,
    object=sentence_transformers.SentenceTransformer("BAAI/bge-large-en-v1.5", device="cuda"),
    datatype=vector(shape=(1024,)),
    device="cuda",
    # predict_method="encode",
    # preprocess=preprocess,
    postprocess=lambda x: x.tolist(),
    # batch_predict=True,
    predict_kwargs={"show_progress_bar": True},
    # device='cuda'

)
vector_index_uuid="embedding-bge-large"

## Open AI Embedding Model

In [9]:
# SOURCE_KEY = "elements"
MODEL_IDENTIFIER_EMBEDDING = "embedding"
VECTOR_INDEX_IDENTIFIER = "vector-index"
# COLLECTION_NAME_CHUNK = f"_outputs.{SOURCE_KEY}.{MODEL_IDENTIFIER_CHUNK}"
COLLECTION_NAME_CHUNK = f"_outputs.{MODEL_IDENTIFIER_CHUNK}" # _outputs.chunk
# CHUNK_OUTPUT_KEY = f"_outputs.{SOURCE_KEY}.{MODEL_IDENTIFIER_CHUNK}"
CHUNK_OUTPUT_KEY = f"_outputs.{MODEL_IDENTIFIER_CHUNK}.txt"
indexing_key = upstream_listener.outputs_key # Same as CHUNK_OUTPUT_KEY
chunk_collection = db[COLLECTION_NAME_CHUNK]

# import os
# os.environ["OPENAI_API_KEY"] = "sk-proj-e"
from superduper_openai import OpenAIEmbedding

embedding_model = OpenAIEmbedding(
    identifier=MODEL_IDENTIFIER_EMBEDDING,
     model="text-embedding-ada-002",
)
vector_index_uuid="embedding-ada-002"

## Anthropic Embedding Model

In [None]:
# Not supported

## Apply Index

In [10]:
# Create vector-index
vector_index = \
    VectorIndex(
        VECTOR_INDEX_IDENTIFIER,
        indexing_listener=Listener(
            select=chunk_collection.select(),
            key=CHUNK_OUTPUT_KEY,  # Key for the documents
            # key=indexing_key,  # Key for the documents
            model=embedding_model,  # Specify the model for processing
            # predict_kwargs={"max_chunk_size": 64},
            uuid=vector_index_uuid,
            identifier="embedding-listener"
        )
    )
# db.apply()

## Start Indexing Embeddings

In [12]:
db.apply(vector_index)

2024-Sep-08 23:42:38.64| INFO     | localhost.localdomain| superduper.jobs.queue:87   | Running jobs for listener.embedding-listener with ids: [ObjectId('66de7c53c356594b294e9e1f'), ObjectId('66de7c53c356594b294e9e20'), ObjectId('66de7c53c356594b294e9e21'), ObjectId('66de7c53c356594b294e9e22'), ObjectId('66de7c53c356594b294e9e23'), ObjectId('66de7c53c356594b294e9e24'), ObjectId('66de7c53c356594b294e9e25'), ObjectId('66de7c53c356594b294e9e26'), ObjectId('66de7c53c356594b294e9e27'), ObjectId('66de7c53c356594b294e9e28'), ObjectId('66de7c53c356594b294e9e29'), ObjectId('66de7c53c356594b294e9e2a'), ObjectId('66de7c53c356594b294e9e2b'), ObjectId('66de7c53c356594b294e9e2c'), ObjectId('66de7c53c356594b294e9e2d'), ObjectId('66de7c53c356594b294e9e2e'), ObjectId('66de7c53c356594b294e9e2f'), ObjectId('66de7c53c356594b294e9e30'), ObjectId('66de7c53c356594b294e9e31'), ObjectId('66de7c53c356594b294e9e32'), ObjectId('66de7c53c356594b294e9e33'), ObjectId('66de7c53c356594b294e9e34'), ObjectId('66de7c53c3

100%|██████████| 37/37 [00:20<00:00,  1.83it/s]


2024-Sep-08 23:42:59.41| INFO     | localhost.localdomain| superduper.components.model:797  | Adding 3699 model outputs to `db`
2024-Sep-08 23:43:21.73| INFO     | localhost.localdomain| superduper.jobs.queue:87   | Running jobs for listener.embedding-listener with ids: [ObjectId('66de7c53c356594b294e9e1f'), ObjectId('66de7c53c356594b294e9e20'), ObjectId('66de7c53c356594b294e9e21'), ObjectId('66de7c53c356594b294e9e22'), ObjectId('66de7c53c356594b294e9e23'), ObjectId('66de7c53c356594b294e9e24'), ObjectId('66de7c53c356594b294e9e25'), ObjectId('66de7c53c356594b294e9e26'), ObjectId('66de7c53c356594b294e9e27'), ObjectId('66de7c53c356594b294e9e28'), ObjectId('66de7c53c356594b294e9e29'), ObjectId('66de7c53c356594b294e9e2a'), ObjectId('66de7c53c356594b294e9e2b'), ObjectId('66de7c53c356594b294e9e2c'), ObjectId('66de7c53c356594b294e9e2d'), ObjectId('66de7c53c356594b294e9e2e'), ObjectId('66de7c53c356594b294e9e2f'), ObjectId('66de7c53c356594b294e9e30'), ObjectId('66de7c53c356594b294e9e31'), Object

0it [00:00, ?it/s]

2024-Sep-08 23:43:21.81| SUCCESS  | localhost.localdomain| superduper.backends.local.compute:64   | Job submitted on <superduper.backends.local.compute.LocalComputeBackend object at 0x7fa836ddcad0>.  function:<function method_job at 0x7fa853e204a0> future:18d1bc73-9278-4dca-82ab-80bef056ced8
2024-Sep-08 23:43:21.89| SUCCESS  | localhost.localdomain| superduper.backends.local.compute:64   | Job submitted on <superduper.backends.local.compute.LocalComputeBackend object at 0x7fa836ddcad0>.  function:<function method_job at 0x7fa853e204a0> future:37c51828-2573-4002-b402-4543c5c4507f





2024-Sep-08 23:43:26.50| INFO     | localhost.localdomain| superduper.jobs.queue:87   | Running jobs for vector_index.vector-index with ids: [ObjectId('66de7c53c356594b294e9e1f'), ObjectId('66de7c53c356594b294e9e20'), ObjectId('66de7c53c356594b294e9e21'), ObjectId('66de7c53c356594b294e9e22'), ObjectId('66de7c53c356594b294e9e23'), ObjectId('66de7c53c356594b294e9e24'), ObjectId('66de7c53c356594b294e9e25'), ObjectId('66de7c53c356594b294e9e26'), ObjectId('66de7c53c356594b294e9e27'), ObjectId('66de7c53c356594b294e9e28'), ObjectId('66de7c53c356594b294e9e29'), ObjectId('66de7c53c356594b294e9e2a'), ObjectId('66de7c53c356594b294e9e2b'), ObjectId('66de7c53c356594b294e9e2c'), ObjectId('66de7c53c356594b294e9e2d'), ObjectId('66de7c53c356594b294e9e2e'), ObjectId('66de7c53c356594b294e9e2f'), ObjectId('66de7c53c356594b294e9e30'), ObjectId('66de7c53c356594b294e9e31'), ObjectId('66de7c53c356594b294e9e32'), ObjectId('66de7c53c356594b294e9e33'), ObjectId('66de7c53c356594b294e9e34'), ObjectId('66de7c53c356

Loading vectors into vector-table...: 3699it [00:04, 870.94it/s]

2024-Sep-08 23:43:36.81| SUCCESS  | localhost.localdomain| superduper.backends.local.compute:64   | Job submitted on <superduper.backends.local.compute.LocalComputeBackend object at 0x7fa836ddcad0>.  function:<function callable_job at 0x7fa853e20540> future:e14c6a5e-5933-4388-b630-0bce30cc9b70





(['listener.embedding-listener', 'vector_index.vector-index'],
 VectorIndex(identifier='vector-index', uuid='8d666dca-afda-4163-885a-60d777f51210', upstream=None, plugins=None, indexing_listener=Listener(identifier='embedding-listener', uuid='embedding-ada-002', upstream=None, plugins=None, key='_outputs.chunker.txt', model=OpenAIEmbedding(identifier='embedding', uuid='22214f2d-0a97-476d-88ee-c361077fa0f0', upstream=None, plugins=None, signature='singleton', datatype=DataType(identifier='vector[1536]', uuid='e536e6a3-3ed8-48a6-a5b9-e1b4683e42a9', upstream=None, plugins=None, encoder=None, decoder=None, info=None, shape=(1536,), directory=None, encodable='native', bytes_encoding=<BytesEncoding.BYTES: 'Bytes'>, intermediate_type='bytes', media_type=None), output_schema=None, flatten=False, model_update_kwargs={}, predict_kwargs={}, compute_kwargs={}, validation=None, metric_values={}, num_workers=0, model='text-embedding-ada-002', max_batch_size=8, openai_api_key=None, openai_api_base=No

In [11]:
print(len(embedding_model.predict("What is superduper")))

1536


In [None]:
db.show()

In [None]:
db[COLLECTION_NAME_CHUNK].find_one().execute().unpack()


## Define a vector search function

In [13]:
from pprint import pprint
def vector_search(query, top_k=5):
    collection = db[COLLECTION_NAME_CHUNK]
    out = db.execute(
        collection.like(
            Document({CHUNK_OUTPUT_KEY: query}),
            vector_index=VECTOR_INDEX_IDENTIFIER,
            n=top_k,
        ).select({})
    )

    if out:
        out = sorted(out, key=lambda x: x['score'], reverse=True)
    for r in out:
        score = r["score"]
        # chunk_data = r.outputs("elements", "chunk")
        chunk_data = r[upstream_listener.outputs_key] # upstream_listener.outputs_key
        metadata = chunk_data["metadata"]
        chunk_message = {}
        chunk_message["score"] = score
        chunk_message["metadata"] = metadata
        txt = chunk_data["txt"]
        print(txt)
        print()
        print(chunk_message)
        print("\n\n", '-' * 20)

In [14]:
vector_search("What is the function of keys 10 to 12 on the left steering wheel keypad?")

2024-Sep-08 23:45:31.72| INFO     | localhost.localdomain| superduper.base.datalayer:905  | {}
NOTE
For the sake of road safety, it is advised that you primarily use voice control (if available) or the steering wheel keypads when driving.
Steering wheel keypads
Keys 10 and 11 are used for phone calls. The others are used for navigating in the displays and controlling the infotainment system. The function of each key is the following:
1 Navigate left.
AA 338361
2 Navigate up.
3 Navigate right.
4 Navigate down.
5 Select.

{'score': 0.9044369247730382, 'metadata': {'points': {'x': 169.35586999999998, 'y': 225.92667}, 'page_number': 267}}


 --------------------
Keys 10 and 11 are used for phone calls. The others are used for navigating in the displays and controlling the infotainment system. The function of each key is the following:
1 Navigate left.
AA 338361
2 Navigate up.
3 Navigate right.
4 Navigate down.
5 Select.
6 Return to the home screen.
7 Open a menu.
8 Back.

{'score': 0.89933

## Define an LLM model Anthropic

In [None]:
from superduper_anthropic import AnthropicCompletions
MODEL_IDENTIFIER_LLM = "llm"
# import os
# os.environ["ANTHROPIC_API_KEY"] = "sk-ant-api03xxx"
predict_kwargs = {
    "max_tokens": 1024,
    "temperature": 0.8,
}

llm = AnthropicCompletions(
    identifier=MODEL_IDENTIFIER_LLM,
    model='claude-2.1',
    predict_kwargs=predict_kwargs
)
llm.predict("Tell me a joke")

In [None]:
db.drop(llm)

In [None]:
db.show()

## Define an LLM model OpenAI

In [6]:
from superduper_openai import OpenAIChatCompletion
MODEL_IDENTIFIER_LLM = "llm"
import os
os.environ['OPENAI_API_KEY'] = 'sk-proj-exxx'
llm = OpenAIChatCompletion(
    identifier=MODEL_IDENTIFIER_LLM, 
    model="gpt-3.5-turbo"
)        
llm.predict("Tell me a joke")


"Why couldn't the leopard play hide and seek?\n\nBecause he was always spotted!"

In [12]:
from superduper_openai import OpenAIChatCompletion
MODEL_IDENTIFIER_LLM = "llm"
os.environ['OPENAI_API_KEY'] = 'sk-proj-exx'

llm = OpenAIChatCompletion(
    identifier=MODEL_IDENTIFIER_LLM,
    uuid="llama-uuid",
    model='meta-llama/Meta-Llama-3.1-8B-Instruct',
    client_kwargs={
        "base_url": "http://127.0.0.1:8080/v1"
        # "default_headers": {"Content-Type": "application/json"}
    }
)
llm.predict("Tell me a joke")

'A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?" \n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."'

In [17]:
 db.apply(llm)

([],
 OpenAIChatCompletion(identifier='llm', uuid='54897657-b48b-44d7-90a2-50a563328047', upstream=None, plugins=None, signature='singleton', datatype='str', output_schema=None, flatten=False, model_update_kwargs={}, predict_kwargs={}, compute_kwargs={}, validation=None, metric_values={}, num_workers=0, model='gpt-3.5-turbo', max_batch_size=8, openai_api_key=None, openai_api_base=None, client_kwargs={}, batch_size=1, prompt=''))

In [None]:
db.load("model","llm")

In [18]:
print(db.load("model","llm").predict("Tell me a joke"))

Why couldn't the bicycle stand up by itself? Because it was two-tired!


## Generate Questions 

In [19]:
generate_template = """
Based on the information provided, please formulate one question related to the document excerpt. Answer in JSON format.

**Context**:
{%s}

Using the information above, generate your questions. Your question can be one of the following types: What, Why, When, Where, Who, How. Please respond in the following format:

 
{
  \"question_type\": \"Type of question, e.g., 'What'\",
  \"question\": \"Your question \",
}

 
"""

text ="""
The automatic activation of I-Roll that takes places when cruise control is active cannot be switched off. You can however disengage I-Roll so that it is not activated automatically when cruise control is not active.
To temporarily disengage I-Roll, press and hold the minus (-) button on the gear selector.
To engage I-Roll again, gently depress the accelerator pedal.
I-See
I-See is a set of functions that use information about the road topography ahead of the truck to optimise the gear selection and, as a result, save fuel. It lowers the fuel consumption and
Gearbox
improves the driveability when cruise control is active.
When you drive with cruise control active on a road, a sensor records the road topography. The recorded information is combined with geographical coordinates from the truck's GPS system. The data are saved, either in the system's memory or in an external topography database (via mobile network).
I-See uses these data to save fuel. When you drive with cruise control active on a road, for which data are available, I-See receives the data and can predict when hills and crests will appear. I-See automatically adapts throttle application, gear strategies and truck speed for more fuel efficient driving.
Activating I-See
"""
# llm_qna=db.load("model","llm")
prompt = lambda x: generate_template % x
res=llm.predict(prompt(text))
# print(prompt(text))

In [20]:
try:
    out=eval(res)
except (SyntaxError, NameError, TypeError, ZeroDivisionError):
    out=res.split("\n",2)[2]
    pass
eval(res)

{'question_type': 'How',
 'question': 'How does I-See use road topography information to optimize gear selection and save fuel?'}

In [21]:
print(res)

{
  "question_type": "How",
  "question": "How does I-See use road topography information to optimize gear selection and save fuel?"
}


In [22]:
MODEL_IDENTIFIER_LLM = "llm"
prompt_template = (
    "The following is a document and question about the volvo user manual\n"
    "Only provide a very concise answer\n"
    "{context}\n\n"
    "Here's the question:{input}\n"
    "answer:"
)

# from superduper.ext.vllm import VllmModel
from superduper_vllm import VllmModel
from superduper.ext.openai import OpenAIChatCompletion

# llm = VllmModel(
#     identifier=MODEL_IDENTIFIER_LLM,
#     model_name="TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
#     prompt_func=prompt_template,
#     vllm_kwargs={ 
#         "gpu_memory_utilization": 0.50,
#         "max_model_len": 2048,
#         "quantization": "awq"
#                    },
#     predict_kwargs={"max_tokens": 1024, "temperature": 0.8},
# )
# Add the llm instance

# db.apply(llm)

## Prompt Template for LLM

In [23]:
prompt_template = (
    "The following is a document and question about the volvo user manual\n"
    "Only provide a very concise answer\n"
    "{context}\n\n"
    "Here's the question:{input}\n"
    "answer:"
)

# @model
def build_prompt(query, docs):
    # print(docs)
    chunks = [doc["text"]["txt"] for doc in docs]
    context = "\n\n".join(chunks)
    # context="blah"
    prompt = prompt_template.format(context=context, input=query)
    return prompt

## Test Prompt with documents from vector search output

In [24]:
from superduper.components.model import QueryModel
item = {'_outputs.chunker.txt': '<var:query>'}
top_k = 3
vector_search_model = QueryModel(
    identifier="VectorSearch",
    select=chunk_collection.like(
        item, 
        vector_index=VECTOR_INDEX_IDENTIFIER, 
        n=top_k
    ).select(),
    # The _source is the identifier of the upstream data, which can be used to locate the data from upstream sources using `_source`.
    postprocess=lambda docs: [{"text": doc['_outputs.chunker'], "_source": doc["_source"],"score": doc["score"]} for doc in docs],
    db=db
)

## Test Vector Search Model

In [25]:
query="What is the function of keys 10 to 12 on the left steering wheel keypad?"
pprint(vector_search_model.predict(query=query))

2024-Sep-08 23:47:53.13| INFO     | localhost.localdomain| superduper.base.datalayer:905  | {}
[{'_source': ObjectId('66de7c49c356594b294e9e19'),
  'score': 0.9044369247730382,
  'text': {'metadata': {'page_number': 267,
                        'points': {'x': 169.35586999999998, 'y': 225.92667}},
           'txt': 'NOTE\n'
                  'For the sake of road safety, it is advised that you '
                  'primarily use voice control (if available) or the steering '
                  'wheel keypads when driving.\n'
                  'Steering wheel keypads\n'
                  'Keys 10 and 11 are used for phone calls. The others are '
                  'used for navigating in the displays and controlling the '
                  'infotainment system. The function of each key is the '
                  'following:\n'
                  '1 Navigate left.\n'
                  'AA 338361\n'
                  '2 Navigate up.\n'
                  '3 Navigate right.\n'
                 

In [26]:
query="What is the function of keys 10 to 12 on the left steering wheel keypad?"
docs=vector_search_model.predict(query=query)
type(docs[0])
print(len(docs))
prompt=build_prompt(query,docs)

2024-Sep-08 23:47:56.66| INFO     | localhost.localdomain| superduper.base.datalayer:905  | {}
3


In [27]:
print(db.load("model","llm").predict(prompt))

Keys 10 to 12 on the left steering wheel keypad control the audio in the truck.


## Define a QA function

In [28]:
from IPython.display import display
import pandas as pd
from superduper.components.model import QueryModel

def qa(query, vector_search_top_k=5):
    item = {'_outputs.chunker.txt': '<var:query>'}
    vector_search_model = QueryModel(
        identifier="VectorSearch",
        select=chunk_collection.like(
            item, 
            vector_index=VECTOR_INDEX_IDENTIFIER, 
            n=vector_search_top_k
        ).select(),
        postprocess=lambda docs: [{"text": doc['_outputs.chunker'], "_source": doc["_source"],"score": doc["score"]} for doc in docs],
        db=db
    )
    out=vector_search_model.predict(query=query)
    if out:
        out = sorted(out, key=lambda x: x["score"], reverse=True)
        prompt= build_prompt(query,out)
        output = db.load("model","llm").predict(prompt)
    page_messages = []
    for source in out:
        chunk_data = source['text'] # upstream_listener.outputs_key
        metadata = chunk_data["metadata"]
        page_number = metadata["page_number"]
        points = metadata["points"]
        score = source["score"]
        page_messages.append(
            {"page_number": page_number, "points": points, "score": score}
        )
    df = pd.DataFrame(page_messages)
    display(output)
    display(df)
    

In [29]:
query="What is the function of keys 10 to 12 on the left steering wheel keypad?"
qa(query, vector_search_top_k=5)

2024-Sep-08 23:48:14.40| INFO     | localhost.localdomain| superduper.base.datalayer:905  | {}


'Keys 10 to 12 on the left steering wheel keypad control the audio in the truck.'

Unnamed: 0,page_number,points,score
0,267,"{'x': 169.35586999999998, 'y': 225.92667}",0.904437
1,267,"{'x': 191.64962, 'y': 225.92667}",0.899331
2,1,"{'x': 399.51374, 'y': 349.85872500000005}",0.889845
3,89,"{'x': 210.554055, 'y': 225.92667}",0.889673
4,88,"{'x': 388.781385, 'y': 218.15015499999998}",0.884241


## Define a QA function (Legacy)

In [None]:
from IPython.display import Markdown
from IPython.display import display
import pandas as pd
def qa(query, vector_search_top_k=5):
    collection = db[COLLECTION_NAME_CHUNK]
    output, out = db.execute(
        model_name=MODEL_IDENTIFIER_LLM,
        query=query,
        context_select=collection.like(
            Document({CHUNK_OUTPUT_KEY: query}),
            vector_index=VECTOR_INDEX_IDENTIFIER,
            n=vector_search_top_k,
        ).select({}),
        context_key=f"{CHUNK_OUTPUT_KEY}.0.txt",
    )
    if out:
        out = sorted(out, key=lambda x: x["score"], reverse=True)
    page_messages = []
    for source in out:
        chunk_data = source.outputs("elements", "chunk")
        metadata = chunk_data["metadata"]
        page_number = metadata["page_number"]
        points = metadata["points"]
        score = source["score"]
        page_messages.append(
            {"page_number": page_number, "points": points, "score": score}
        )
    df = pd.DataFrame(page_messages)
    display(output.content)
    display(df)

In [None]:
qa("What is the function of keys 10 to 12 on the left steering wheel keypad?")