In [None]:
from dotenv import load_dotenv
import os
from huggingface_hub import login

In [None]:
load_dotenv()
login(token=os.getenv('HUGGINGFACE_TOKEN'))

In [None]:
!df -h

In [None]:
os.environ['HF_HOME'] = os.path.expanduser('~/transformers_cache')
print("TRANSFORMERS_CACHE:", os.environ['HF_HOME'])

## Using LLM from HF in our RAG flow

In [None]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
from transformers import pipeline
import torch


In [None]:
# model = AutoModelForCausalLM.from_pretrained(
#     "mistralai/Mistral-7B-v0.1", device_map="auto"
# )

os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
# device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
device = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left", truncation=True)
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1").to(device)


generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "mps" else -1)
# generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
import json

with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [None]:
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200')
es_client.info() 

In [None]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

In [None]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

In [None]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [None]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [None]:
def llm(prompt):
    response = generator(prompt, max_length=500, temperature=0.7, top_p=0.95, num_return_sequences=1)
    response_final = response[0]['generated_text']
    return response_final[len(prompt):].strip()

In [None]:
query = 'I just disovered the course. Can I still join it?'

def rag(query):
    # search_results = search(query)
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

rag(query)