In [None]:
%pip install -q sentence-transformers

In [None]:
# select * from table where text like '%internet%'
# What’s the internet usage policy?
# Step 1: Define Sample Documents
# Document Corpus
documents = [
    {"doc_id": "1", "section": "Pay Policies", "content": "Employees are paid bi-weekly via direct deposit."},
    {"section": "Leave of Absence", "content": "Employees must submit a leave request for approval."},
    # query = "What’s the internet usage policy?"
    {"section": "Internet Use", "content": "Company internet must be used for work-related tasks only."},
    {"section": "Internet Use", "content": "Company internet is a broadband internet."},
    {"section": "Break at Work", "content": "Employees can take an hour break."},
    {"section": "Harassment", "content": "Interact with each employee with Respect"}
]

# Step 2: Get Content Texts
content_corpus = [doc["content"] for doc in documents]

content_corpus

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
doc_vectors = model.encode(content_corpus)

doc_vectors

In [None]:
# Step 3: User Query and Semantic Matching

query = "What's the internet usage policy?"
query_vec = model.encode([query])[0]
query_vec

In [None]:
similarities = model.similarity(query_vec, doc_vectors)

# Ensure it's a 1D numpy array
import numpy as np
similarities = np.asarray(similarities).squeeze()
similarities

In [None]:
# Now get top 3
top_3_indices = np.argsort(similarities)[::-1][:3]
print(top_3_indices)
top_scores = similarities[top_3_indices]
top_scores

In [None]:
top_docs = [documents[i]['content'] for i in top_3_indices]
# documents = [
#     {"section": "Pay Policies", "content": "Employees are paid bi-weekly via direct deposit."},
#     {"section": "Leave of Absence", "content": "Employees must submit a leave request for approval."},
#     {"section": "Internet Use", "content": "Company internet must be used for work-related tasks only."},
#     {"section": "Break at Work", "content": "Employees can take an hour break."},
#     {"section": "Harassment", "content": "Interact with each employee with Respect"}
# ]

print (top_docs)
context = ", ".join(top_docs)
context

In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True, dotenv_path="../.env.local")
my_api_key = os.getenv("OPENAI_API_KEY")

my_client = OpenAI(api_key=my_api_key)
# my_client

def ask_question_open_ai(prompt):

    # print(f"User asked: {prompt}")
    # my_client.chat.completions.create

    llm_response = my_client.chat.completions.create(
        model="gpt-5-nano",
        # messages=[
        #     {"role": "system", "content": "You are a helpful assistant. Answer as concisely as possible."},
        #     {"role": "user", "content": prompt}
        # ]
        messages=[
            {"role": "system", "content": '''
             You are an assistant who answers only based on the given context.
             '''},
            {"role": "user", "content": f"Context: {context}\n\n User Question: {query}"} 
        ]

    )
    return llm_response.choices[0].message.content  

In [None]:
print (query)
response = ask_question_open_ai(query)

In [None]:
print(f"User query: {query}")
print(f"Context: {context}")

print(f"\n\nOpen AI Response: {response}")