In [1]:
!which python

/home/prakhar.k/project/conda/bin/python


In [2]:
!nvidia-smi

Sat Jun 22 20:58:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:3B:00.0 Off |                    0 |
| N/A   47C    P0              27W /  70W |      2MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
import os
# download model from huggingface in scratch folder where there is space on HPC
os.environ['HF_HOME'] = '/scratch/prakhar.k/.cache/huggingface'

In [4]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x2adab46b1130>

In [5]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [39]:
def build_prompt(query, search_results):
    prompt_template = """
    QUESTION: {question}
    
    CONTEXT:
    {context}
    
    ANSWER:
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"{doc['question']}\n{doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [40]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

## HF Login

In [9]:
from huggingface_hub import login

In [10]:
login(token=os.environ['HF_TOKEN'])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /scratch/prakhar.k/.cache/huggingface/token
Login successful


## Open Source LLM

In [14]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer

In [18]:
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
)

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [27]:
from transformers import pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [36]:
# Define the prompt
prompt = """
QUESTION: I just discovered the course. Can I still join it?

ANSWER:
"""

# Generate response
response = generator (prompt, max_length=300, temperature=0.7, top_p=0.95, num_return_sequences=1)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [37]:
response = response[0]['generated_text']
print(response[len(prompt):].strip())

Yes, you can join the course at any time.

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about the course.

ANSWER:

Please send your question to:

QUESTION: I have a question about


## Using Open Source LLM

In [41]:
def llm(prompt):
    response = generator(prompt, max_length=500, temperature=0.7, top_p=0.95, num_return_sequences=1)
    response_final = response[0]['generated_text']
    return response_final[len(prompt):].strip()

In [42]:
rag("I just discovered the course. Can I still join it?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"Yes, even if you don't register, you're still eligible to submit the homeworks.\n    Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute."

### END