# RAG with DIY-Search and MISTRAL-7B

In [1]:
import os
os.environ['HF_HOME'] = '/run/cache/' # DON'T FORGET THIS, ELSE DISK CACHE WILL RUN OUT OF SPACE WHEN YOU DOWNLOAD XL MODEL

import json
import requests
import minsearch

In [2]:
docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs = requests.get(docs_url).json()

In [3]:
documents = []

for course_dict in docs:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
index = minsearch.Index(text_fields=["question", "text", "section"],
                        keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7ff8c41e2730>

In [5]:
def search(q):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=q,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )
    return results

# mistral - 7B

In [7]:
import os
os.environ['HF_TOKEN'] = TOKEN # saved on laptop

In [8]:
from huggingface_hub import login
login(token=os.environ['HF_TOKEN'])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /run/cache/token
Login successful


In [None]:
# Use a pipeline as a high-level helper
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1")
# Load model directly

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]

In [9]:
def build_prompt(question, context):
    prompt = """You are a chatbot answering frequently asked questions for an online course. 
    Provide ANSWER to the QUESTION based on the CONTEXT given below. 
    Only state the facts from the CONTEXT else respond that information is not available.
    
    QUESTION = {question}
    CONTEXT = {context}
    ANSWER
    """
    prompt_output = prompt.format(question = question, context=context)
    return prompt_output

In [14]:
def llm(prompt):
    messages = [
    {"role": "user", "content": prompt}]
    
    return result

In [15]:
def rag_response(q):
    context = search(q)
    prompt = build_prompt(q, context)
    answer = llm(prompt)
    return answer

In [16]:
query = "Do i need to know python to pass the course?"
rag_response(query)

'You do not need to know Python to pass the course. The course materials and instructions are provided in a way that does not require prior knowledge of Python.'