In [None]:
import pandas as pd


In [None]:
df = pd.read_csv('c:\\Users\\Ken\\Documents\\GitHub\\coursera_rag\\requirements_from_acts.csv')
df.fillna('blank', inplace=True)  # remove any NaN values as it blows up serialization
data = df.to_dict('records')
len(data)

In [None]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [None]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [None]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [None]:
# Create collection to store records
qdrant.recreate_collection(
    collection_name="legal_reqts",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

In [None]:
# vectorize
qdrant.upload_points(
    collection_name="legal_reqts",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["Essential Requirement"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the records
    ]
)

In [None]:
# this is structuring the database search
user_prompt = "Identify all requirements for risk analysis"

In [None]:
# Search source

hits = qdrant.search(
    collection_name="legal_reqts",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

In [None]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [45]:
# Connect to the local large language model
from openai import OpenAI
# client = OpenAI(
#     base_url="http://127.0.0.1:8080/v1", # "http://<Your api-server IP>:port"
#     api_key = "sk-no-key-required"
# )
client = OpenAI()

completion = client.chat.completions.create(
    model="gpt-4o-mini",
  messages=[
    {"role": "system", "content": "You are a chatbot, skilled in explaining regulatory requirements for machinery."},
    {"role": "user", "content": "Identify requirements for risk analysis as part of CE marking."},
    {"role": "assistant", "content": str(search_results)}
  ]
)
# completion = client.chat.completions.create(
#     model="LLaMA_CPP",
#     messages=[
#         {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
#         {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
#         {"role": "assistant", "content": str(search_results)}
#     ]
# )
print(completion.choices[0].message)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
import os
api_key = os.getenv('OPENAI_API_KEY')
if api_key is None:
    print('API key not found')
else:
    print('API key is recognized')