# Using `ollama` models with RAG

### Setup

In [None]:
import openai
import requests
import json
from openai import OpenAI
from dotenv import load_dotenv

from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import Ollama
from langchain_ollama import OllamaLLM

import pandas as pd

__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')


In [None]:
_ = load_dotenv()

* Connect to Ollama server

In [None]:
client = OpenAI(
    base_url = 'http://10.30.16.100:11434/v1',
    api_key='ollama', # required, but unused
)

### Setup a query on a base model with no recent DP knowledge

In [None]:
query =  '''
        Who came second in the NCAA Division I Championships 
        men's breaststroke in Indianapolis in March 2024 and 
        which university do they represent?
        '''

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful assistant. Answer questions ONLY if you know the answer"},
    {"role": "user", "content": query},
]

In [None]:
response = client.chat.completions.create(
  model="llama2",
  messages=messages,
  max_tokens=300
)
print(response.choices[0].message.content)

### Manually include relevant document as context

In [None]:
dp_doc = open('documents/dp_sports/penn-mens-swim-ncaa-championships-matt-fallon-recap.txt').read()

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful assistant. Answer questions ONLY if you know the answer"},
    {"role": "user", "content": dp_doc},
    {"role": "user", "content": query
    },
   
  ]

In [None]:
response = client.chat.completions.create(
  model="llama2",
  messages=messages,
  max_tokens=300
)
print(response.choices[0].message.content)

## `langchain` RAG example

### 1. Load documents

In [None]:
loader = DirectoryLoader('./documents', glob="dp_sports/*.txt")

In [None]:
docs = loader.load()
print(f'Loaded {len(docs)} documents')

### 2. Split into chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, 
    add_start_index=True,
    separators=[" "]
)
all_splits = text_splitter.split_documents(docs)

In [None]:
len(all_splits)

* Looking at chunks

In [None]:
all_splits[0]

In [None]:
all_splits[1]

### Load chunks into vector store

In [None]:
vectorstore = Chroma.from_documents(documents=all_splits, 
                                    embedding=OpenAIEmbeddings(),
                                    persist_directory='./chroma_db')

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", 
                                     search_kwargs={"k": 6})



#### Example query on vector database

In [None]:
print(query)

In [None]:
retrieved_docs = retriever.invoke(query)
len(retrieved_docs)

In [None]:
relevance_docs_and_scores = vectorstore.similarity_search_with_relevance_scores(query,
                                                                                k=6)

In [None]:
chunks = []
for chunk, score in relevance_docs_and_scores:
    chunks.append({'content': chunk.page_content,
                   'similarity_score': score,
                   'document': chunk.metadata['source']})

In [None]:
pd.DataFrame(chunks)

## `langchain` RAG chain

In [None]:
prompt = hub.pull("rlm/rag-prompt")

In [None]:
prompt

* Setup LLM component

In [None]:
ollama = Ollama(
        base_url="http://10.30.16.100:11434",
        model="llama2")

### Setup chains

In [None]:


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_result = (retriever | format_docs)

norag_chain = (
    prompt
    | ollama
    | StrOutputParser()
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ollama
    | StrOutputParser()
)

In [None]:
query2 = "Which team ended Penn's baseball win streak?"

#### with RAG

In [None]:
rag_chain.invoke(query)

#### No RAG

In [None]:
norag_chain.invoke({"question": query, 'context': ''})

#### Looking at the context

In [None]:
print(rag_result.invoke(query))