# Ollama 


## Import Libraries


In [1]:
from PyPDF2 import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_ollama import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.schema import Document
import os
from IPython.display import display, Markdown
import warnings

warnings.filterwarnings("ignore")

# Set environment variable for protobuf
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

## Loading the PDF

In [2]:
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

local_path = "fortnite.pdf"
if local_path:
    pdf_text = extract_text_from_pdf(local_path)
    print(f"PDF loaded successfully: {local_path}")
else:
    print("Upload a PDF file")

PDF loaded successfully: fortnite.pdf


## Spliting text into chunks

In [3]:
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(pdf_text)

# Wrap each chunk into a Document object
documents = [Document(page_content=chunk) for chunk in chunks]
print(f"Text split into {len(documents)} chunks")

Text split into 77 chunks


## Creating vectorstore


In [4]:
# vector database
vector_db = Chroma.from_documents(
    documents=documents,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)
print("Vector database created successfully")

Vector database created successfully


## LLM and Retrieval Set up 

In [5]:
# Set up LLM and retrieval
local_model = "llama3.2" 
llm = ChatOllama(model=local_model)

In [6]:
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 4
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

## Creating chain

In [7]:
# RAG prompt template
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [8]:
# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


## Chat with PDF

In [9]:

def chat_with_pdf(question):
    """
    Chat with the PDF using the RAG chain.
    """
    return display(Markdown(chain.invoke(question)))


In [10]:
# Example 1
chat_with_pdf("What are the various terms and conditions?")

Here are some of the various terms and conditions mentioned in the provided text:

1. **Arbitration**: The agreement includes a provision for arbitration, which requires disputes to be resolved through private arbitration rather than in court.
2. **Confidential Information**: Confidential information is defined as non-public information related to the Software, including gameplay, content, Game Currency, and Services.
3. **Cheat Detection and Cheats**: Cheat detection refers to functionality intended to identify and prevent cheating, while cheats refer to programs or methods that provide an unfair competitive advantage in the Software.
4. **Defenses against Unenforceable Provisions**: If any provision of this Agreement is held by a court or tribunal of competent jurisdiction to be unenforceable, that provision shall be enforced only to the furthest extent possible under applicable law.
5. **Dispute Resolution**: The agreement specifies which court has jurisdiction over disputes between Epic and the user (Superior Court of Wake County, North Carolina).
6. **Exclusive Remedy**: This Agreement does not confer any rights or remedies on any person other than the parties to this Agreement, except as expressly stated.
7. **Governing Law**: The agreement is governed by the laws of the State of North Carolina.
8. **Opt-out Notice**: Users have 30 days to provide an opt-out notice if they do not wish to be bound by arbitration provisions.
9. **Platform-Specific Terms**: The agreement includes platform-specific terms, such as those applicable to Sony PlayStation users (Section 18.1).
10. **Right of Assignment**: Epic may at any time assign, transfer, charge, or subcontract all or any of its rights or obligations under this Agreement.
11. **Termination**: Sections 2, 5-13, and 15-17 will survive any termination of this Agreement.
12. **Transfer Restrictions**: If restrictions on the transfer of the Software in this Agreement are not enforceable under the law of your country, then this Agreement will be binding on any recipient of the Software.

These terms and conditions cover various aspects of the relationship between Epic and its users, including arbitration, confidentiality, cheat detection, dispute resolution, governing law, platform-specific terms, and termination.

## Cleaning up the vector database


In [11]:
# Optional: Clean up when done 
vector_db.delete_collection()
print("Vector database deleted successfully")

Vector database deleted successfully
