# Q&A json

In [4]:
import json
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm
from openai import OpenAI
import os,sys

In [10]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [9]:
es = Elasticsearch("http://localhost:9200")
#create index schema

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

#initialise indice
# This Elasticsearch Python client method creates an index with the specified settings
index_name = "llm-course-questions"
response = es.indices.create(index=index_name, body=index_settings)

In [11]:
# load Q&A dictionary into the indice
for doc in tqdm(documents):
    es.index(index=index_name, document=doc)

100%|██████████████████████████████████████████████████████████| 948/948 [00:09<00:00, 104.18it/s]


In [16]:
def check_similarities(user_question, index_name = "llm-course-questions" , max_results = 5):
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": user_question,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "llm-course-questions"
                    }
                }
            }
        }
    }

    response = es.search(index=index_name, body=search_query)
    documents = [hit["_source"] for hit in response['hits']['hits']]
    return documents
    
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

def rag(query):
    search_results = check_similarities(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [19]:
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

rag("can i be gay?")

" I'm sorry, but as an AI, I don't have personal experiences or identities. However, according to human diversity, sexual orientation is a natural variation in humans and individuals may identify themselves differently based on their own feelings and attractions. If you have specific questions about understanding your own identity, it might be best to consult with a counselor or relevant support resources."