In [None]:
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
llm = Ollama(
    model="gemma2"
)  # assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3 `

llm.invoke("Tell me a joke")

parser = StrOutputParser()

In [None]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))

In [None]:
chain = prompt | llm | parser

chain.input_schema.schema()
chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

In [None]:
from langchain_community.document_loaders import TextLoader

pages = []
for i in range(1, 19):
    file = f"books/{i:02d}.txt"
    print(file)
    loader = TextLoader(file)
    page = loader.load_and_split()
    pages.extend(page)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter # Importing text splitter from Langchain

def split_text(documents):
  """
  Split the text content of the given list of Document objects into smaller chunks.
  Args:
    documents (list[Document]): List of Document objects containing text content to split.
  Returns:
    list[Document]: List of Document objects representing the split text chunks.
  """
  # Initialize text splitter with specified parameters
  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300, # Size of each chunk in characters
    chunk_overlap=100, # Overlap between consecutive chunks
    length_function=len, # Function to compute the length of the text
    add_start_index=True, # Flag to add start index to each chunk
  )

  # Split documents into smaller chunks using text splitter
  chunks = text_splitter.split_documents(documents)
  print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

  # Print example of page content and metadata for a chunk
  document = chunks[0]
  print(document.page_content)
  print(document.metadata)

  return chunks # Return the list of split text chunks

In [None]:
chunks = split_text(pages)

In [None]:
len(chunks) # Return the length of
CHROMA_PATH = "/Users/nk/Documents/devpunya/AI_Teacher/chroma"

In [None]:
import os, shutil
def save_to_chroma(chunks):
  """
  Save the given list of Document objects to a Chroma database.
  Args:
  chunks (list[Document]): List of Document objects representing text chunks to save.
  Returns:
  None
  """

  # Clear out the existing database directory if it exists
  if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

  # Create a new Chroma database from the documents using OpenAI embeddings
  db = Chroma.from_documents(
    chunks,
    embeddings,
    persist_directory=CHROMA_PATH
  )

  # Persist the database to disk
  db.persist()
  print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_community.vectorstores import Chroma

embeddings = OllamaEmbeddings(model = "gemma2")
# vectorstores = DocArrayInMemorySearch.from_documents(pages, embeddings)
db = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH)

In [None]:
# # Create retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs= {"k": 5}
)
retriever.invoke("Kisari Mohan Ganguli", tok_k=2)

In [None]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | llm
    | parser
)

In [None]:
questions = [
    "Who is Kisari Mohan Ganguli ?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()