In [30]:
import json
import pysolr
from tqdm import tqdm
import requests
from dotenv import load_dotenv
load_dotenv()

True

In [17]:
def add_vectors():
        embedding_host = '35.230.69.135'
        embedding_url = f"http://{embedding_host}:5052/getDiagEmbeddings"
        url = embedding_url
        headers = {
            'Content-Type': 'application/json'
        }
        #query = f"type:OBS AND {SolrConfig.CREATED_DATE}:[{self.extraction_start_time} TO *] AND {SolrConfig.CREATED_BY}:system AND -merged:true AND -active:false" if self.extraction_start_time else "type:OBS AND -merged:true AND -active:false"
        query = '*:*'
        pred_handle = pysolr.Solr("http://localhost:8890/solr/predict")
        all_obs = pred_handle.search(
            q=query, 
            rows=10000
        ).docs

        to_add = []
        progress_desc = f"Adding vectors to {query} observations"

        for i in tqdm(range(0, len(all_obs), 5), total=(len(all_obs) // 5) + 1, desc=progress_desc):
            batch = all_obs[i:i+5]
            payload = json.dumps({
                "sentences": [doc.get('case_description', '') for doc in batch]

            })
            resp = requests.post(url, headers=headers, data=payload).json()
            if "embeddings" not in resp:
                print("Error from embedding API:", resp)
                continue
            response = resp["embeddings"]
                        

            to_add.extend({"id": doc["id"], "vector": vector} for doc, vector in zip(batch, response))

        for i in range(0,len(to_add),5):
            pred_handle.add(to_add[i:i+5],fieldUpdates={"vector":"set"}, commit=True) 

add_vectors()

Adding vectors to *:* observations:  50%|█████     | 1/2 [00:01<00:01,  1.14s/it]


In [26]:
import json
import requests
import pysolr

def fetch_relevant_docs(user_query: str, top_k: int = 5):
    """
    Takes a user query, generates its embedding, and retrieves top-K relevant documents from Solr.
    """
    # Step 1: Generate embedding for query
    embedding_host = '35.230.69.135'
    embedding_url = f"http://{embedding_host}:5052/getDiagEmbeddings"
    url = embedding_url
    headers = {"Content-Type": "application/json"}
    payload = json.dumps({"sentences": [user_query]})
    response = requests.post(url, headers=headers, data=payload).json()
    query_embedding = response["embeddings"][0]

    # Step 2: Format Solr KNN query
    # Solr expects the embedding vector as a JSON array string
    vector_str = "[" + ",".join(map(str, query_embedding)) + "]"
    solr_query = "{!knn f=vector topK=" + str(top_k) + "}" + vector_str

    pred_handle = pysolr.Solr("http://localhost:8890/solr/predict")

    # Step 3: Execute search in Solr
    results = pred_handle.search(
        q=solr_query,
        fl="id,case_description,score"  # return id, text, and similarity score
    ).docs

    return results


In [27]:
query = "How do I run the payments service?"
docs = fetch_relevant_docs(query, top_k=5)

for d in docs:
    print(f"ID: {d['id']}, Score: {d['score']}")
    print(f"Description: {d['case_description']}\n")


ID: OBS-1001, Score: 0.9091029
Description: To run the payments service, clone the repo, set the PAYMENT_API_KEY in your environment, and run `docker-compose up`.

ID: OBS-1004, Score: 0.8629731
Description: The analytics service can be run locally with `python main.py`. Make sure you have installed requirements from `requirements.txt`.

ID: OBS-1003, Score: 0.85638607
Description: For the notification service, ensure you have an SMTP server configured. Update `config.yaml` with SMTP_HOST and SMTP_PORT.

ID: OBS-1002, Score: 0.8495734
Description: The authentication service requires a PostgreSQL database. Configure DATABASE_URL in `.env` before starting the service.

ID: OBS-1005, Score: 0.8140217
Description: When deploying to production, use Kubernetes manifests in the `k8s/` folder. Update secrets before applying configs.



In [31]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

def generate_solution(user_query: str, relevant_docs: list, model_name: str = "gpt-4o"):
    """
    
    Takes user query + relevant documents, and generates a solution using the model.
    """
    # Extract just the text content from docs
    docs_text = "\n\n".join(
        [doc.get("case_description", "") for doc in relevant_docs]
    )

    # Prompt template
    template = """You are an onboarding assistant.
Use the relevant documents below to answer the user's query.

User Query:
{query}

Relevant Documents:
{docs}

Answer clearly and step by step, using the documents when possible.
"""

    prompt = PromptTemplate.from_template(template)
    final_prompt = prompt.format(query=user_query, docs=docs_text)

    # Call model
    llm = ChatOpenAI(model=model_name, temperature=0)  # deterministic
    answer = llm.predict(final_prompt)

    return answer


In [32]:
query = "How do I run the payments service?"
docs = fetch_relevant_docs(query, top_k=5)
answer = generate_solution(query, docs)

print("Answer:", answer, "docs:", docs)


  answer = llm.predict(final_prompt)


Answer: To run the payments service, follow these steps:

1. **Clone the Repository**: Start by cloning the repository that contains the payments service code to your local machine.

2. **Set the Environment Variable**: You need to set the `PAYMENT_API_KEY` in your environment. This is crucial for the service to authenticate and function properly. You can set this in your terminal session or in an environment file, depending on your setup.

3. **Run the Service**: Once the repository is cloned and the environment variable is set, navigate to the directory containing the `docker-compose.yml` file. Then, execute the following command to start the payments service:
   ```
   docker-compose up
   ```

This will build and run the payments service using Docker, ensuring all necessary dependencies and configurations are in place. docs: [{'id': 'OBS-1001', 'case_description': 'To run the payments service, clone the repo, set the PAYMENT_API_KEY in your environment, and run `docker-compose up`.

In [5]:
import pysolr
solr = pysolr.Solr("http://localhost:8890/solr/predict")
docs = [
  {
    "id": "OBS-1001",
    "case_description": "To run the payments service, clone the repo, set the PAYMENT_API_KEY in your environment, and run `docker-compose up`."
  },
  {
    "id": "OBS-1002",
    "case_description": "The authentication service requires a PostgreSQL database. Configure DATABASE_URL in `.env` before starting the service."
  },
  {
    "id": "OBS-1003",
    "case_description": "For the notification service, ensure you have an SMTP server configured. Update `config.yaml` with SMTP_HOST and SMTP_PORT."
  },
  {
    "id": "OBS-1004",
    "case_description": "The analytics service can be run locally with `python main.py`. Make sure you have installed requirements from `requirements.txt`."
  },
  {
    "id": "OBS-1005",
    "case_description": "When deploying to production, use Kubernetes manifests in the `k8s/` folder. Update secrets before applying configs."
  }
]
solr.add(docs)
solr.commit()


'<?xml version="1.0" encoding="UTF-8"?>\n<response>\n\n<lst name="responseHeader">\n  <int name="status">0</int>\n  <int name="QTime">55</int>\n</lst>\n</response>\n'