<a href="https://colab.research.google.com/github/mrodgers/ollama_rag_colab/blob/main/Testing_Ollama_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# type these codes in below terminal after run the cell (%xterm)
# curl -fsSL https://ollama.com/install.sh | sh
# ollama serve &
# ollama pull llama3.1 &
# ollama pull nomic-embed-text &

In [None]:
!pip install colab-xterm
%load_ext colabxterm
%xterm

In [None]:
!pip -q install langchain langchain-core langchain-community ollama beautifulsoup4 chromadb gradio

In [None]:
from langchain_community.llms.ollama import Ollama

In [None]:
MODEL = "llama3.1"
llm = Ollama(model=MODEL)

# the response here, could be used to test.
# response = llm.invoke("What is the meaning of life?")
# print(response)

In [None]:
import gradio as gr
import ollama
from bs4 import BeautifulSoup as bs
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings

# Define a simple Document class to wrap the content
class Document:
    def __init__(self, page_content, metadata=None):
        self.page_content = page_content
        self.metadata = metadata if metadata is not None else {}

# Function to load data from an uploaded file
def load_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as f:
            content = f.read()
    return content

# Function to process the uploaded file and create a vector store
def process_file(file_path):
    content = load_file(file_path)
    docs = [Document(page_content=content)]

    # Split the loaded documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)

    # Create Ollama embeddings and vector store
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

    return vectorstore

# Define the function to call the Ollama Llama3 model
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

# Define the RAG setup
def rag_chain(question, vectorstore):
    retriever = vectorstore.as_retriever()
    retrieved_docs = retriever.invoke(question)
    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return ollama_llm(question, formatted_context)

# Define the Gradio interface
def get_important_facts(file, question):
    vectorstore = process_file(file.name)
    return rag_chain(question, vectorstore)

# Create a Gradio app interface
iface = gr.Interface(
  fn=get_important_facts,
  inputs=[gr.File(type="filepath", file_count="single", label="Upload a file"), gr.Textbox(lines=2, placeholder="Enter your question here...")],
  outputs="text",
  title="RAG with Llama3.1",
  description="Upload a file and ask questions about the provided context",
  allow_flagging="never",
)

# Launch the Gradio app
iface.launch(debug=True)


Thanks to https://medium.com/@tharindumadhusanka99/llama3-rag-on-google-colab-73c43aa53281 for some of the code!

In [None]:
# import gradio as gr
# import ollama
# from bs4 import BeautifulSoup as bs
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.document_loaders import WebBaseLoader
# from langchain_community.vectorstores import Chroma
# from langchain_community.embeddings import OllamaEmbeddings

# # Load the data from the web URL
# url = 'https://en.wikipedia.org/wiki/Ohiya'
# loader = WebBaseLoader(url)
# docs = loader.load()

# # Split the loaded documents into chunks
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# splits = text_splitter.split_documents(docs)

# # Create Ollama embeddings and vector store
# embeddings = OllamaEmbeddings(model="nomic-embed-text")
# vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# # Define the function to call the Ollama Llama3 model
# def ollama_llm(question, context):
#     formatted_prompt = f"Question: {question}\n\nContext: {context}"
#     response = ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': formatted_prompt}])
#     return response['message']['content']

# # Define the RAG setup
# retriever = vectorstore.as_retriever()

# def rag_chain(question):
#     retrieved_docs = retriever.invoke(question)
#     formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
#     return ollama_llm(question, formatted_context)

# # Define the Gradio interface
# def get_important_facts(question):
#     return rag_chain(question)

# # Create a Gradio app interface
# iface = gr.Interface(
#   fn=get_important_facts,
#   inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
#   outputs="text",
#   title="RAG with Llama3.1",
#   description="Ask questions about the provided context",
#   allow_flagging="never",
# )

# # Launch the Gradio app
# iface.launch()
