# Ollama RAG Example

## Import/Setup


In [None]:
import logging
from datetime import datetime
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma

logging.basicConfig(level=logging.INFO)


## Loading PDF file(s)


In [None]:
logging.info("Loading PDF")
loader = PyPDFLoader("./ISTQB_CTFL_Syllabus_v4.0.1.pdf")
pages = loader.load()
logging.info(f"Loaded {len(pages)} pages")


## Splitting the pages into chunks


In [None]:
logging.info("Splitting text")
text_splitter = CharacterTextSplitter(
    separator=".",
    is_separator_regex=False,
    # chunk_size=1000,
    # chunk_overlap=10,
)
docs = text_splitter.split_documents(pages)
logging.info(f"Split into {len(docs)} documents")


INFO:root:Splitting text
INFO:root:Split into 80 documents


## Loading the chunks into the vector DB


In [84]:
COLLECTION_NAME = f"istqb_ctfl_{datetime.now().strftime('%Y%m%d%H%M%S')}"

logging.info("Embedding text")
embedding_model = OllamaEmbeddings(model="nomic-embed-text")
vector_db = Chroma(
    collection_name=COLLECTION_NAME,
    embedding_function=embedding_model,
)
vector_db.add_documents(docs)
logging.info(f"Embedded {len(vector_db)} documents")


INFO:root:Embedding text
INFO:root:Embedded 80 documents


## Index search using the vector DB


In [85]:
docs = vector_db.similarity_search_with_score("boundary value analysis", k=10)

# sort by score descending
docs = sorted(docs, key=lambda x: x[1], reverse=True)

print(f"TOP result: {docs[0][0].page_content}\n\n")

for i, (doc, score) in enumerate(docs):
    print(f"""
# {i} #######################
Document {doc.metadata}:
Score: {score}
Length: {len(doc.page_content)}
Content:
--------------------------
{doc.page_content[:100]}...
##########################
""")


TOP result: Certified Tester 
Foundation Level 
 
  
 
v4.0.1 Page 36 of 78  2024-09-15 
© International Software Testing Qualifications Board 
• Fixing and reporting. For every defect, a defect report should be created so that corrective 
actions can be followed up. Once the exit criteria are reached, the work product can be accepted. 
The review results are reported. 
3.2.3. Roles and Responsibilities in Reviews 
Reviews involve various stakeholders, who may take on several roles. The principal roles and their 
responsibilities are: 
• Manager – decides what is to be reviewed and provides resources, such as staff and time for the 
review 
• Author – creates and fixes the work product under review 
• Moderator (also known as the facilitator) – ensures the effective running of review meetings, 
including mediation, time management, and a safe review environment in which everyone can 
speak freely 
• Scribe (also known as recorder) – collates anomalies from reviewers and records review 