In [7]:
import torch
import os

In [8]:
pdf_path = "merged_pdf/merged_pdf.pdf"

In [9]:
!pip install PyMuPDF



In [10]:
import fitz
from tqdm.auto import tqdm

def text_formatter(text:str) -> str:
  cleaned_text = text.replace("\n", " ").strip()
  return cleaned_text

def open_and_read_pdf(pdf_path: str) -> list[dict]:
  doc = fitz.open(pdf_path)
  pages_and_texts = []
  for page_number, page in tqdm(enumerate(doc)):
    text = page.get_text()
    text = text_formatter(text=text)
    pages_and_texts.append({"page_number": page_number-5,
                            "page_char_count": len(text),
                            "page_word_count": len(text.split(" ")),
                            "page_sentence_count_raw": len(text.split(". ")),
                            "page_token_count":len(text)/4,
                            "text":text})
  return pages_and_texts

pages_and_texts = open_and_read_pdf(pdf_path=pdf_path)
pages_and_texts

0it [00:00, ?it/s]

[{'page_number': -5,
  'page_char_count': 56,
  'page_word_count': 7,
  'page_sentence_count_raw': 1,
  'page_token_count': 14.0,
  'text': 'Chronicles Sem 1 2023-2024 Placement Awareness Committee'},
 {'page_number': -4,
  'page_char_count': 1961,
  'page_word_count': 325,
  'page_sentence_count_raw': 15,
  'page_token_count': 490.25,
  'text': "02 Debaayus Swain Pitching Coordinator Facing one of the toughest job markets after the 2009  recession, our students worked hard and proved their  mettle. I hope that these challenging times have  prepared the batch for the dynamic job landscape. We  had the fortune of hosting some exciting new recruiters  apart from our legacy ones. This particular season  established the value of putting your best foot forward in  the summer internship season and developing a well- rounded profile. I hope these Chronicles serve as a rough  guideline for students to optimize their summers and  profiles towards their target summer  internships\xa0or\xa0placem

In [11]:
!pip install tqdm



In [12]:
import random 

random.sample(pages_and_texts, k=3)

[{'page_number': 82,
  'page_char_count': 1147,
  'page_word_count': 204,
  'page_sentence_count_raw': 8,
  'page_token_count': 286.75,
  'text': 'Rubrik 88 Introduction Interviewee - Aadit Nayyar (2021A7PS2687P) Job Role - Software Engineer Intern Number of offers made - 2 Selection Process Branches open to - A7 - both single and dual degree students were eligible  for the role  CG Cutoff - 7.5+  Recruitment process - \u2028 Round 1 - Online Assessment  This round was purely a coding round and had four questions. Each of these  questions could be solved by a thorough understanding of concepts taught  in CS F211 - Data Structures and Algorithms. Most of these questions relied  on Dynamic Programming and/or Graphs, as is standard for most OAs.  Round 2 - Technical Interview 1  This round was singularly based on CS F211 - Data Structures and  Algorithms, and asked a lot of in-depth questions on Graphs. These  questions definitely required a strong background in both DSA and  competitive 

In [13]:
import pandas as pd 

df = pd.DataFrame(pages_and_texts)
df.head()

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count_raw,page_token_count,text
0,-5,56,7,1,14.0,Chronicles Sem 1 2023-2024 Placement Awareness...
1,-4,1961,325,15,490.25,02 Debaayus Swain Pitching Coordinator Facing ...
2,-3,21,3,1,5.25,SUMMER INTERNSHIPS 03
3,-2,131,25,1,32.75,Table of Contents 04 Analytics 05 Consulting 1...
4,-1,588,95,3,147.0,Analytics 05 DMI Finance 06 HDFC Bank 08 InfoE...


In [14]:
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count_raw,page_token_count
count,226.0,226.0,226.0,226.0,226.0
mean,107.5,952.36,169.19,6.61,238.09
std,65.38,372.74,67.82,3.16,93.19
min,-5.0,0.0,1.0,1.0,0.0
25%,51.25,799.0,139.25,5.0,199.75
50%,107.5,997.0,177.5,7.0,249.25
75%,163.75,1158.75,209.5,9.0,289.69
max,220.0,1961.0,348.0,16.0,490.25


In [15]:
from spacy.lang.en import English

nlp = English()

nlp.add_pipe("sentencizer")

doc = nlp("This is a sentence. This another sentence. I like elephants.")
assert len(list(doc.sents)) == 3

list(doc.sents)

[This is a sentence., This another sentence., I like elephants.]

In [16]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [17]:
for item in tqdm(pages_and_texts):
    item["sentences"] = list(nlp(item["text"]).sents)
    item["sentences"] = [str(sentence) for sentence in item["sentences"]]
    item["page_sentence_count_spacy"] = len(item["sentences"])

  0%|          | 0/226 [00:00<?, ?it/s]

In [18]:
random.sample(pages_and_texts, k=1)

[{'page_number': 46,
  'page_char_count': 892,
  'page_word_count': 159,
  'page_sentence_count_raw': 5,
  'page_token_count': 223.0,
  'text': 'Accenture 52 Introduction Interviewee - Hemant Seshadri Nemani (2021AAPS2170P) Job Role - Advance Application Engineering Intern Number of offers made - 3 Selection Process Branches open to - All except A7 - Both single and dual degree students  were eligible  CG Cutoff - 7+  Recruitment process - \u2028 Round 1 - Resume Shortlisting  Round 2 - Online Assessment  This round had two segments:¬ ÄÑ Logical Reasoning and Quantitative Aptitud\x9a \x92Ñ Coding round - For this round, we could use any programming language  we were comfortable with. Even the questions asked were of a quite  easy/medium level, with this being one of the easiest OAs I had  appeared for.  Round 3 - Communication Assessment  This round was fully automated as well and resembled English proficiency  tests. We had to listen to a sentence and repeat whatever we had heard.  Mo

In [19]:
df = pd.DataFrame(pages_and_texts)
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count_raw,page_token_count,page_sentence_count_spacy
count,226.0,226.0,226.0,226.0,226.0,226.0
mean,107.5,952.36,169.19,6.61,238.09,6.76
std,65.38,372.74,67.82,3.16,93.19,3.24
min,-5.0,0.0,1.0,1.0,0.0,0.0
25%,51.25,799.0,139.25,5.0,199.75,5.0
50%,107.5,997.0,177.5,7.0,249.25,7.0
75%,163.75,1158.75,209.5,9.0,289.69,9.0
max,220.0,1961.0,348.0,16.0,490.25,17.0


In [20]:
num_sentence_chunk_size = 10

def split_list(input_list: list,
              slice_size: int=num_sentence_chunk_size) -> list[list[str]]:
    return [input_list[i:i+slice_size] for i in range(0,len(input_list),slice_size)]

test_list = list(range(25))
split_list(test_list)

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
 [20, 21, 22, 23, 24]]

In [21]:
for item in tqdm(pages_and_texts):
    item["sentence_chunks"] = split_list(input_list=item["sentences"],
                                        slice_size=num_sentence_chunk_size)
    item["num_chunks"] = len(item["sentence_chunks"])

  0%|          | 0/226 [00:00<?, ?it/s]

In [22]:
random.sample(pages_and_texts, k=1)

[{'page_number': 218,
  'page_char_count': 598,
  'page_word_count': 105,
  'page_sentence_count_raw': 4,
  'page_token_count': 149.5,
  'text': 'Samsung R&D Institute, Noida 99 Personal Experience Sources of preparation -  I mainly used Geekforgeeks and InterviewBit for understanding of  concepts, and Codeforces for practice. Words of advice -   My advice would be to prepare for DSA very well, as the interview heavily  focused on it. Also, brush up some information about the company, as that  makes the HR interview pretty easy. |\x8a CS F211 - Data Structures & Algorithmq \x87\x8a CS F372 - Operating Systemq j\x8a CS F303 - Computer Networkq b\x8a CS F213 - Object Oriented Programmin\x85 g\x8a CS F212 - Database Systems Relevant Courses or Certifications',
  'sentences': ['Samsung R&D Institute, Noida 99 Personal Experience Sources of preparation -  I mainly used Geekforgeeks and InterviewBit for understanding of  concepts, and Codeforces for practice.',
   'Words of advice -   My adv

In [23]:
df = pd.DataFrame(pages_and_texts)
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count_raw,page_token_count,page_sentence_count_spacy,num_chunks
count,226.0,226.0,226.0,226.0,226.0,226.0,226.0
mean,107.5,952.36,169.19,6.61,238.09,6.76,1.1
std,65.38,372.74,67.82,3.16,93.19,3.24,0.34
min,-5.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,51.25,799.0,139.25,5.0,199.75,5.0,1.0
50%,107.5,997.0,177.5,7.0,249.25,7.0,1.0
75%,163.75,1158.75,209.5,9.0,289.69,9.0,1.0
max,220.0,1961.0,348.0,16.0,490.25,17.0,2.0


In [24]:
import re

pages_and_chunks = []
for item in tqdm(pages_and_texts):
    for sentence_chunk in item["sentence_chunks"]:
        chunk_dict = {}
        chunk_dict["page_number"] = item["page_number"]
        joined_sentence_chunk = "".join(sentence_chunk).replace("  "," ").strip()
        joined_sentence_chunk = re.sub(r'\.([A-Z])',r'. \1',joined_sentence_chunk)
        chunk_dict["sentence_chunk"] = joined_sentence_chunk
        pages_and_chunks.append(chunk_dict)

len(pages_and_chunks)

  0%|          | 0/226 [00:00<?, ?it/s]

248

In [25]:
from sentence_transformers import util,SentenceTransformer
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2",
                                      device="cpu")

In [26]:
embedding_model.to("cpu")

for item in tqdm(pages_and_chunks):
    item["embedding"] = embedding_model.encode(item["sentence_chunk"])

  0%|          | 0/248 [00:00<?, ?it/s]

In [27]:
text_chunks = [item["sentence_chunk"] for item in pages_and_chunks]
text_chunks[23]

'Hindustan Unilever Limited 22 Introduction Interviewee - Joyeeta Nath (2021A1PS2166P) Job Role - Research and Development Intern Number of offers made - 1 Selection Process Branches open to - Chemical (A1), Mechanical (A4), and Manufacturing (AB) - Both single and dual degree candidates were allowed to appear for the process. CG Cutoff - 7+ Recruitment process - \u2028 Round 1 - Resume Shortlisting In this round, around 30 people were shortlisted. Round 2 - Situational Response Test This asked us various questions aimed at testing our interest in the role, as well as our professional ethics and attitude.  Round 3 - Online Technical Interview In this round, we were asked three questions. These questions were aimed at assessing our core technical aptitude. They addressed prior work experience, as well as our relevant coursework in our major. In my case, since I am a Chemical Engineering student, one of these three questions was about a topic directly taught in class.'

In [28]:
len(text_chunks)

248

In [29]:
%%time

text_chunk_embeddings = embedding_model.encode(text_chunks,
                                               batch_size=32,
                                               convert_to_tensor=True)

text_chunk_embeddings

CPU times: user 1min 7s, sys: 52.9 s, total: 2min
Wall time: 11.8 s


tensor([[ 0.0545,  0.0610, -0.0203,  ..., -0.0104, -0.0534, -0.0647],
        [ 0.0276, -0.0117, -0.0145,  ...,  0.0193,  0.0200, -0.0579],
        [ 0.0442, -0.0304, -0.0229,  ...,  0.0296,  0.0489, -0.0471],
        ...,
        [ 0.0436, -0.0751, -0.0345,  ...,  0.0076, -0.0276, -0.0079],
        [ 0.0352, -0.0495, -0.0249,  ...,  0.0174, -0.0236, -0.0421],
        [ 0.0373,  0.0123, -0.0194,  ..., -0.0086,  0.0020, -0.0313]])

In [30]:
text_chunks_and_embeddings_df = pd.DataFrame(pages_and_chunks)
embeddings_df_save_path = "text_chunks_embeddings_df.csv"
text_chunks_and_embeddings_df.to_csv(embeddings_df_save_path,index=False)

In [31]:
text_chunks_and_embeddings_df_load = pd.read_csv(embeddings_df_save_path)
text_chunks_and_embeddings_df_load.head()

Unnamed: 0,page_number,sentence_chunk,embedding
0,-5,Chronicles Sem 1 2023-2024 Placement Awareness...,[ 5.45142889e-02 6.10405654e-02 -2.03054342e-...
1,-4,02 Debaayus Swain Pitching Coordinator Facing ...,[ 2.75518056e-02 -1.17114792e-02 -1.45208994e-...
2,-4,Vipul Singhal Dy. Manager - Placements In the ...,[ 4.42395769e-02 -3.03656831e-02 -2.29493119e-...
3,-3,SUMMER INTERNSHIPS 03,[ 2.36846693e-02 -2.61738598e-02 -8.31269193e-...
4,-2,Table of Contents 04 Analytics 05 Consulting 1...,[-2.31499579e-02 -2.16099415e-02 -5.09638898e-...


In [32]:
import numpy as np 

device = "mps" if torch.cuda.is_available() else "cpu"

text_chunks_and_embedding_df = pd.read_csv("text_chunks_embeddings_df.csv")

text_chunks_and_embedding_df["embedding"] = text_chunks_and_embedding_df["embedding"].apply(lambda x: np.fromstring(x.strip("[]"),sep=" "))

pages_and_chunks = text_chunks_and_embedding_df.to_dict(orient="records")
text_chunks_and_embedding_df

Unnamed: 0,page_number,sentence_chunk,embedding
0,-5,Chronicles Sem 1 2023-2024 Placement Awareness...,"[0.0545142889, 0.0610405654, -0.0203054342, -0..."
1,-4,02 Debaayus Swain Pitching Coordinator Facing ...,"[0.0275518056, -0.0117114792, -0.0145208994, -..."
2,-4,Vipul Singhal Dy. Manager - Placements In the ...,"[0.0442395769, -0.0303656831, -0.0229493119, -..."
3,-3,SUMMER INTERNSHIPS 03,"[0.0236846693, -0.0261738598, -0.00831269193, ..."
4,-2,Table of Contents 04 Analytics 05 Consulting 1...,"[-0.0231499579, -0.0216099415, -0.0509638898, ..."
...,...,...,...
243,216,Tata Consultancy Services 97 Personal Experien...,"[0.0228229556, -0.0844871104, -0.0250941664, -..."
244,217,"Samsung R&D Institute, Noida Introduction Inte...","[0.0622616597, -0.0709417537, -0.0126383519, -..."
245,218,"Samsung R&D Institute, Noida 99 Personal Exper...","[0.0436485335, -0.075106211, -0.0344931409, -0..."
246,219,"100 Sarthak Sharma Coordinator, Semester II, ...","[0.0351543948, -0.0495440401, -0.0249404833, -..."


In [33]:
embeddings = torch.tensor(np.stack(text_chunks_and_embedding_df["embedding"].tolist(),axis=0), dtype=torch.float32).to(device)
embeddings

tensor([[ 0.0545,  0.0610, -0.0203,  ..., -0.0104, -0.0534, -0.0647],
        [ 0.0276, -0.0117, -0.0145,  ...,  0.0193,  0.0200, -0.0579],
        [ 0.0442, -0.0304, -0.0229,  ...,  0.0296,  0.0489, -0.0471],
        ...,
        [ 0.0436, -0.0751, -0.0345,  ...,  0.0076, -0.0276, -0.0079],
        [ 0.0352, -0.0495, -0.0249,  ...,  0.0174, -0.0236, -0.0421],
        [ 0.0373,  0.0123, -0.0194,  ..., -0.0086,  0.0020, -0.0313]])

In [34]:
query = "DSA importance"
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
dot_scores
top_results_dot_product = torch.topk(dot_scores, k=5)
top_results_dot_product

torch.return_types.topk(
values=tensor([0.4979, 0.4231, 0.4141, 0.4075, 0.4049]),
indices=tensor([231, 204,  75, 213, 114]))

In [35]:
pages_and_chunks[231]

{'page_number': 204,
 'sentence_chunk': 'Tata 1mg 85 Personal Experience Sources of preparation - Neetcode 150 question list is enough for DSA preparation. Moreover, Interviewbit and Leetcode are good sources as well. Course Slides for CS funamentals like OOP, OS, DSA, DBMS are enough. Words of advice -  Do Competitive Coding and DSA consistently, Add some full-stack projects in your resume, Prior Intern experience gives you an edge. Get into depth of projects. Whatever you do, connect it to basics. There’s no point in just copying code. The following CS courses are extremely helpful\x95 |t CS F211 - Data Structures & Algorithm\x80 \x89t CS F372 - Operating System\x80 nt CS F303 - Computer Network\x80 kt CS F213 - Object Oriented Programmin\x8b ht CS F212 - Database Systems If you can get a Study Oriented Project, it would reflect well on your Resume. Relevant Courses or Certifications',
 'embedding': array([ 1.33535285e-02, -1.56400371e-02, -4.37605046e-02, -1.59021039e-02,
         4

In [36]:
import textwrap

In [37]:
def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [38]:
for score, idx in zip(top_results_dot_product[0], top_results_dot_product[1]):
    print(f"Score: {score:.4f}")
    print("Text:")
    print(pages_and_chunks[idx]["sentence_chunk"])

Score: 0.4979
Text:
Tata 1mg 85 Personal Experience Sources of preparation - Neetcode 150 question list is enough for DSA preparation. Moreover, Interviewbit and Leetcode are good sources as well. Course Slides for CS funamentals like OOP, OS, DSA, DBMS are enough. Words of advice -  Do Competitive Coding and DSA consistently, Add some full-stack projects in your resume, Prior Intern experience gives you an edge. Get into depth of projects. Whatever you do, connect it to basics. There’s no point in just copying code. The following CS courses are extremely helpful |t CS F211 - Data Structures & Algorithm t CS F372 - Operating System nt CS F303 - Computer Network kt CS F213 - Object Oriented Programmin ht CS F212 - Database Systems If you can get a Study Oriented Project, it would reflect well on your Resume. Relevant Courses or Certifications
Score: 0.4231
Text:
Ideaforge Technology Pvt. Ltd. 59 Personal Experience Sources of preparation - I mainly relied on my notes from college 

In [39]:
def retrieve_relevant_resources(query: str,
                                embeddings: torch.tensor,
                                model: SentenceTransformer=embedding_model,
                                n_resources_to_return: int=5,
                                print_time: bool=True):

    query_embedding = model.encode(query, convert_to_tensor=True)
    dot_scores = util.dot_score(query_embedding, embeddings)[0]

    scores, indices = torch.topk(input=dot_scores,
                                 k=n_resources_to_return)
    return scores, indices

In [40]:
retrieve_relevant_resources(query="CGPA importance", embeddings=embeddings)

(tensor([0.4356, 0.3936, 0.3571, 0.3080, 0.2650]),
 tensor([188, 165,  86, 206, 164]))

In [41]:
# Loading an LLM locally
import transformers

In [42]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available

from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)

if (is_flash_attn_2_available()) and (torch.mps.get_device_capability(0)[0] >= 8):
    attn_implementation = "flash_attention_2"
else:
    attn_implementation = "sdpa"

print(attn_implementation)

sdpa


In [43]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "google/gemma-2b-it"
token = "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"

# Use quantization only if CUDA is available
use_quantization_config = False  # Disable quantization since it's not supported on MPS

# Load the model and tokenizer
try:
    print("Attempting to load tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, use_auth_token=token)
    print("Tokenizer loaded successfully.")

    print("Attempting to load model...")
    llm_model = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path=model_id,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=False,
        use_auth_token=token,
    )
    print("Model loaded successfully.")

    # Move model to CPU
    llm_model.to("cpu")
    print("Model moved to CPU.")
except Exception as e:
    print(f"An error occurred: {e}")

Attempting to load tokenizer...




Tokenizer loaded successfully.
Attempting to load model...


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded successfully.
Model moved to CPU.


In [44]:
llm_model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaR

In [45]:
def get_model_num_params(model: torch.nn.Module):
    return sum([param.numel() for param in model.parameters()])

get_model_num_params(llm_model)

2506172416

In [46]:
def get_model_mem_size(model: torch.nn.Module):
    mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
    mem_buffers = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
    model_mem_bytes = mem_params + mem_buffers
    model_mem_mb = model_mem_bytes / (1024**2)
    model_mem_gb = model_mem_bytes / (1024**3)
    return {"model_mem_bytes": model_mem_bytes,
            "model_mem_mb": round(model_mem_mb,2),
            "model_mem_gb": round(model_mem_gb,2)}

get_model_mem_size(llm_model)

{'model_mem_bytes': 5012354048, 'model_mem_mb': 4780.15, 'model_mem_gb': 4.67}

In [47]:
input_text = "What is DSA, and how much important is it for our placements ?"

dialogue_template = [
    {"role":"user",
     "content":input_text}
]

prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                       tokenize=False,
                                       add_generation_prompt=True)
print(f"{prompt}")

<bos><start_of_turn>user
What is DSA, and how much important is it for our placements ?<end_of_turn>
<start_of_turn>model



In [None]:
%%time

# Toeknizing the input text and send it to the device 
input_ids = tokenizer(prompt,
                      return_tensors="pt")

ouotputs = llm_model.generate(**input_ids,
                              max_new_tokens=256)
print(f"Model output (tokens):\n{outputs[0]}\n")

In [None]:
outputs_decoded = tokenizer.decode(outputs[0])
print(f"{outputs_decoded}")

In [None]:
def prompt_formatter(query: str,
                    context_items: list[dict]) -> str:
    context = "- " + "\n- ".join([item["sentence_chunk"] for item in context_items])

    base_prompt=

    base_prompt = base_prompt.format(context=context,
                                     query=query)
    dialogue_template = [{
        "role":"user",
        "content":base_prompt
    }]
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                           tokenize=False,
                                           add_generation_prompt=True)

    return prompt

query = random.choice(query_list)

scores, indices = retrieve_relevant_resources(query=query,
                                              embeddings=embeddings)

context_items = [pages_and_chunks[i] for i in indices]

prompt = prompt_formatter(query=query,
                          context_items=context_items)

print(prompt)

In [None]:
def ask(query:str,
       temperature: float=0.7,
       max_new_tokens: int=256,
       format_answer_text=True,
       return_answer_only=True):
    scores, indices = retrieve_relevant_resources(query=query,
                                                  embeddings=embeddings)
    context_items = [pages_and_chunks[i] for i indices]
    for i, item in enumerate(context_items):
        item["score" = scores[i],cpu()]
        prompt = prompt_formatter(query=query,
                                  context_items=context_items)
        input_ids = tokenizer(prompt,return_tensors="pt")
        outputs = llm_model.generate(**input_ids,
                                     tempreature=temperature,
                                    do_sample=True,
                                    max_new_tokens=max_new_tokens)
        output_text=tokenizer.decode(outputs[0])
        if format_answer_text:
            output_text = output_text.replace(prompt, "").replace("<bos>","").replace("<eos>","")
        if return_answer_only:
            return output_text
        return output_text, context_items