# Testing the OLLAMA API

In [None]:
%pip install requests

In [None]:
import requests
import json

In [None]:
url = "http://localhost:11434/api/generate"

headers = {
    "Content-Type": "application/json"
}

data = {
    "model" : "llama3.2",
    "prompt" : "Hello, My name is Daniel Adnan",
    "stream" : False,
}

response = requests.post(url, headers=headers, data=json.dumps(data))

if response.status_code == 200:
    response_text = response.text
    data = json.loads(response_text)
    actual_response = data["response"]
    print(actual_response)
else: 
    print("Error: ", response.status_code, response.text)

## Adding memory to the model

By default OLLAMA does not preserve memory

In [None]:
data = {
    "model" : "llama3.2",
    "prompt" : "What is my name?",
    "stream" : False,
}

response = requests.post(url, headers=headers, data=json.dumps(data))

if response.status_code == 200:
    response_text = response.text
    data = json.loads(response_text)
    actual_response = data["response"]
    print(actual_response)
else: 
    print("Error: ", response.status_code, response.text)

Adding Memory

In [None]:
%pip install ollama

In [None]:

from ollama import chat as ollama_chat

model = 'llama3.2'
messages = []
# Roles
USER = 'user'
ASSISTANT = 'assistant'

def add_history(content, role):
    messages.append({'role': role, 'content': content})

In [None]:
def chat(message):
    add_history(message, USER)
    response = ollama_chat(model=model, messages=messages, stream=False)
    complete_message = ''
    for line in response:
        # Check if the line is a tuple and contains the 'message' key
        if isinstance(line, tuple) and line[0] == 'message':
            message_content = line[1].content
            complete_message += message_content
            # print(message_content, end='', flush=True)
        # else:
        #     print("Unexpected line format:", line)
    add_history(complete_message, ASSISTANT)
    return complete_message

In [None]:
chat_response = chat("Hello, my name is Shadab")
print(chat_response)

In [None]:
chat_response = chat("What is my name?")
print(chat_response)

In [None]:
messages = []
chat_response = chat("What is my name?")
print(chat_response)
print(messages)
messages = []

# Working on RAG

Getting the necessary libraries

In [None]:
%pip install transformers datasets torch faiss-cpu matplotlib scikit-learn

Add imports section

In [None]:
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
import torch
import numpy as np
import random
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np

In [None]:
# suppress warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

## Loading and preprocessing data

Import the pdf file


In [None]:

%pip install PyMuPDF

In [None]:
import fitz  

# Open the PDF file
pdf_document = "random_story.pdf"
document = fitz.open(pdf_document)

all_text = ""

for page_num in range(len(document)):
    page = document.load_page(page_num) 
    text = page.get_text()  
    all_text += text 

print(all_text)

Process the text (splitting by paragraph)

In [None]:
# Split the text into paragraphs (simple split by newline characters)
def read_and_split_text(all_text):
    
    paragraphs = all_text.split('\n')
    paragraphs = [para.strip() for para in paragraphs if len(para.strip()) > 0]
    return paragraphs


# Split the text into paragraphs
paragraphs = read_and_split_text(all_text)

for i in range(4):
    print(f"sample: {i} paragraph: {paragraphs[i]} \n" )


## Embedding

Tokenize the text

In [None]:
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_tokenizer

In [None]:
text = paragraphs[0]
print (text)

tokens_result=context_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
tokens_result

Encoding into vector embeddings

In [None]:
context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

In [None]:
outputs=context_encoder(**tokens_result)
outputs

Function to tokenize and embed the input text from PDF

In [None]:
def encode_contexts(text_list):
    embeddings = []
    for text in text_list:
        inputs = context_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
        outputs = context_encoder(**inputs)
        embeddings.append(outputs.pooler_output)
    return torch.cat(embeddings).detach().numpy()

random.shuffle(paragraphs)

context_embeddings = encode_contexts(paragraphs)

# store the dimenstion of the vector embeddings
paragraphs_column = context_embeddings.shape[1]
print(paragraphs_column)

## Indexing (with FAISS)

In [None]:
import faiss

# Convert list of numpy arrays into a single numpy array
embedding_dim = paragraphs_column 
context_embeddings_np = np.array(context_embeddings).astype('float32')

# Create a FAISS index for the embeddings
index = faiss.IndexFlatL2(embedding_dim)
index.add(context_embeddings_np)  # Add the context embeddings to the index

## Question Encoder & Tokenizer

Load DPR question encoder and tokenizer

In [None]:
question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

Encoding and tokenizing sample question

In [None]:
question = 'Who is Arif?'
question_inputs = question_tokenizer(question, return_tensors='pt')
question_embedding = question_encoder(**question_inputs).pooler_output.detach().numpy()

Search context from input PDF file

In [None]:
# Search the index
D, I = index.search(question_embedding, k=5)  # Retrieve top 5 relevant contexts
print("D:",D)
print("I:",I)

print("Top 5 relevant contexts:")
for i, idx in enumerate(I[0]):
    print(f"{i+1}: {paragraphs[idx]}")
    print(f"distance {D[0][i]}\n")

Function to search context from question

In [None]:
def search_relevant_contexts(question, question_tokenizer, question_encoder, index, k=20): # return top 5 relevant contexts
    # Tokenize the question
    question_inputs = question_tokenizer(question, return_tensors='pt')

    # Encode the question to get the embedding
    question_embedding = question_encoder(**question_inputs).pooler_output.detach().numpy()

    # Search the index to retrieve top k relevant contexts
    D, I = index.search(question_embedding, k)

    return D, I


# Test the function
question = "What is the name of father of Arif?"
D, I = search_relevant_contexts(question, question_tokenizer, question_encoder, index, k=5)

print("Distances:", D)
print("Indices:", I)

## Integrating OLLAMA

Function to generate an answer using OLLAMA

In [None]:
def chat(message):
    add_history(message, USER)
    response = ollama_chat(model=model, messages=messages, stream=False)
    complete_message = ''
    for line in response:
        # Check if the line is a tuple and contains the 'message' key
        if isinstance(line, tuple) and line[0] == 'message':
            message_content = line[1].content
            complete_message += message_content
            # print(message_content, end='', flush=True)
        # else:
        #     print("Unexpected line format:", line)
    add_history(complete_message, ASSISTANT)
    return complete_message

def generate_answer_with_ollama(question, relevant_contexts):
    context_text = " ".join(relevant_contexts)
    prompt = f"Context: {context_text}\n\nQuestion: {question}\nAnswer:"
    response = chat(prompt)
    return response

Test the function

In [None]:
question = "Can you summarize the story?"
D, I = search_relevant_contexts(question, question_tokenizer, question_encoder, index, k=20)

relevant_contexts = [paragraphs[i] for i in I[0]]

# print the relevant contexts
for i, context in enumerate(relevant_contexts):
    print(f"{i+1}: {context}\n")

answer = generate_answer_with_ollama(question, relevant_contexts)

if answer:
    print(answer)

else:
    print("No answer found")