If you're not running in Saturn Cloud, you need to install these libraries:

Make sure you use the latest versions

```
pip install -U transformers accelerate bitsandbytes
```

Write the following code in your terminal.

```
echo 'your_hugging_face_token' > ~/.huggingface_token
```

In [1]:
pip install --upgrade transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
rm -rf ~/.cache/huggingface/transformers

In [3]:
import os 
os.environ['HF_HOME'] = '/run/cache/'

from huggingface_hub import login

# Read token from the file
with open(os.path.expanduser('~/.huggingface_token'), 'r') as file:
    TOKEN = file.read().strip()

# Log in using the token
login(token=TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /run/cache/token
Login successful


In [4]:
!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-24 06:10:47--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3.7K) [text/plain]
Saving to: ‘minsearch.py’


2024-07-24 06:10:47 (90.9 MB/s) - ‘minsearch.py’ saved [3832/3832]



In [5]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7fa82941aeb0>

In [6]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [7]:
# let's see how many spaces had in my home directory
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay         100G   77G   24G  77% /
tmpfs            64M     0   64M   0% /dev
tmpfs           7.7G     0  7.7G   0% /sys/fs/cgroup
/dev/nvme0n1p1  100G   77G   24G  77% /run
tmpfs            14G     0   14G   0% /dev/shm
/dev/nvme2n1    2.0G  474M  1.5G  25% /home/jovyan
tmpfs            14G  120K   14G   1% /home/jovyan/.saturn
tmpfs            14G   12K   14G   1% /run/secrets/kubernetes.io/serviceaccount
tmpfs           7.7G   12K  7.7G   1% /proc/driver/nvidia
tmpfs           7.7G  9.2M  7.7G   1% /run/nvidia-persistenced/socket
tmpfs           7.7G     0  7.7G   0% /proc/acpi
tmpfs           7.7G     0  7.7G   0% /sys/firmware


In [8]:
# let's tell hugging face to use space in this .... directory

In [9]:
from transformers import MistralForCausalLM
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('mistralai/mathstral-7B-v0.1')

In [10]:
prompt = "What are the roots of unity?"
tokenized_prompts = tokenizer(prompt, return_tensors="pt") 
print(tokenized_prompts)

{'input_ids': tensor([[    1,  3963,  1228,  1040, 15534,  1070, 24480, 29572]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}


In [None]:
model = MistralForCausalLM.from_pretrained('mistralai/mathstral-7B-v0.1')
generation = model.generate(**tokenized_prompts, max_new_tokens=512)
print(tokenizer.decode(generation[0]))

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", 
                model="mistralai/mathstral-7B-v0.1", 
                tokenizer=tokenizer)

In [None]:
def build_prompt(query, search_results):
    prompt_template = """
    QUESTION: {question}

    CONTEXT:
    {context}

    ANSWER:
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"{doc['question']}\n{doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [None]:
def llm(prompt):
    response = pipe(prompt, 
                         max_length=500, 
                         temperature=0.7, 
                         top_p=0.95, 
                         num_return_sequences=1)
    response_final = response[0]['generated_text']
    return response_final[len(prompt):].strip()

In [None]:
def rag(query): 
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
query = "I just came across the course. Is it too late to join?"
rag(query)