In [1]:
table_name = 'faqs'
topK = 3

sql = f"""
select payload, vector_distance(vector, :vector, COSINE) as score
from {table_name}
order by score
fetch approx first {topK} rows only
"""


In [2]:
# Define the query
question = 'What is Always Free?'

In [3]:
# Connect to the Oracle Database 23ai
un = "vector"
pw = "vector"
cs = "localhost/FREEPDB1"

import oracledb

connection = oracledb.connect(user=un, password=pw, dsn=cs)


In [4]:
from sentence_transformers import SentenceTransformer
encoder = SentenceTransformer('all-MiniLM-L12-v2')


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Retrieval Code
import array
import json

with connection.cursor() as cursor:
    embedding = list(encoder.encode(question))
    vector = array.array("f", embedding)

    results = []

    for (info, score, ) in cursor.execute(sql, vector=vector):
        text_content = info.read()
        results.append((score, json.loads(text_content)))


  return forward_call(*args, **kwargs)


In [6]:
# Check results
import pprint
pprint.pp(results)


[(0.3420591558253262,
  {'text': 'faq | What are Always Free services?\n'
           '\n'
           'Always Free services are part of Oracle Cloud Free Tier. Always '
           'Free services are available for an unlimited time. Some '
           'limitations apply. As new Always Free services become available, '
           'you will automatically be able to use those as well.\n'
           '\n'
           'The following services are available as Always Free:\n'
           '\n'
           'AMD-based Compute\n'
           'Arm-based Ampere A1 Compute\n'
           'Block Volume\n'
           'Object Storage\n'
           'Archive Storage\n'
           'Flexible Load Balancer\n'
           'Flexible Network Load Balancer\n'
           'VPN Connect\n'
           'Autonomous Data Warehouse\n'
           'Autonomous Transaction Processing\n'
           'Autonomous JSON Database\n'
           'NoSQL Database (Phoenix Region only)\n'
           'APEX Application Development\n'
           'R

In [7]:
from transformers import LlamaTokenizerFast
import sys

tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
tokenizer.model_max_length = sys.maxsize

def truncate_string(string, max_tokens):
    # Tokenize the text and count the tokens
    tokens = tokenizer.encode(string, add_special_tokens=True)
    
    # Truncate the tokens to a maximum length
    truncated_tokens = tokens[:max_tokens]
    
    # Transform the tokens back to text
    truncated_text = tokenizer.decode(truncated_tokens)
    
    return truncated_text


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


In [8]:
import os

def loadFAQs(directory_path):
    faqs = {}

    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):  # assuming FAQs are in .txt files
            file_path = os.path.join(directory_path, filename)

            with open(file_path) as f:
                raw_faq = f.read()

            filename_without_ext = os.path.splitext(filename)[0]  # remove .txt extension
            faqs[filename_without_ext] = [text.strip() for text in raw_faq.split('=====')]

    return faqs

faqs = loadFAQs('./txt-docs')

docs = [{'text': filename + ' | ' + section, 'path': filename}
        for filename, sections in faqs.items() for section in sections]


In [9]:
# Transform docs into a string array using the "payload" key
docs_as_one_string = "\n=======\n".join([doc["text"] for doc in docs])

docs_truncated = truncate_string(docs_as_one_string, 1000)


In [10]:
# Create the LLM Prompt
prompt = f"""\

<s>[INST] <<SYS>>
You are a helpful assistant named Oracle chatbot.
USE ONLY the sources below and ABSOLUTELY IGNORE any previous knowledge.
Use Markdown if appropriate.
Assume the customer is highly technical.
<</SYS>> [/INST]

[INST]
Respond to PRECISELY to this question: "{question}.", USING ONLY the following information and IGNORING ANY PREVIOUS KNOWLEDGE.

Include code snippets and commands where necessary.

NEVER mention the sources, always respond as if you have that knowledge yourself. Do NOT provide warnings or disclaimers.

=====
Sources: {docs_truncated}
=====

Answer (Three paragraphs, maximum 50 words each, 90% spartan):
[/INST]
"""


In [11]:
import oci
from LoadProperties import LoadProperties

# Setup basic variables
properties = LoadProperties()

# Use Instance Principals for Authentication
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()

generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(
    config={},
    signer=signer,
    service_endpoint=properties.getEndpoint(),
    retry_strategy=oci.retry.NoneRetryStrategy(),
    timeout=(10, 240)
)

chat_detail = oci.generative_ai_inference.models.ChatDetails()
chat_request = oci.generative_ai_inference.models.CohereChatRequest()
chat_request.message = prompt
chat_request.max_tokens = 1000
chat_request.temperature = 0.0
chat_request.frequency_penalty = 0
chat_request.top_p = 0.75
chat_request.top_k = 0

chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(
    model_id=properties.getModelName()
)
chat_detail.chat_request = chat_request
chat_detail.compartment_id = properties.getCompartment()

chat_response = generative_ai_inference_client.chat(chat_detail)


In [12]:
pprint.pp(
    chat_response.data.chat_response.chat_history[1].message
)


('Always Free is a program within Oracle Cloud Free Tier, offering a range of '
 'services with no time limit. These services are accessible to all, from '
 'developers to students, and include AMD-based Compute, Arm-based Ampere A1 '
 'Compute, Block Volume, Object Storage, and Archive. \n'
 '\n'
 'The Always Free program is designed to provide an opportunity to explore and '
 "build in the cloud without any cost. It's a great way to learn and test "
 'applications, with the added benefit of being able to use new Always Free '
 'services as they become available. \n'
 '\n'
 'One key advantage is the unlimited time frame, allowing users to utilize '
 'these services without the pressure of a time limit, unlike the Free Trial '
 'which has a 30-day cap.')
