In [None]:
import re
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.llms import Ollama

# Ollama server details
OLLAMA_URL = "http://127.0.0.1:12345"

# Initialize models
embedding_model = OllamaEmbeddings(base_url=OLLAMA_URL, model="nomic-embed-text:latest")
ollama_llm = Ollama(model="llama3:latest", base_url=OLLAMA_URL)

# Load PDF document
file_path =  "machine learning.pdf"
loader = UnstructuredPDFLoader(file_path)
documents = loader.load()

# Clean the document text
text_without_newlines = []
for document in documents:
    text = document.page_content
    cleaned_text = re.sub(r'\n+', ' ', text)
    text_without_newlines.append(cleaned_text)

# Split text into chunks with overlap
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)
chunks = text_splitter.split_text(" ".join(text_without_newlines))

# Generate embeddings using Nomic and save to Chroma DB
chroma_db = Chroma.from_texts(chunks, embedding_model, persist_directory="chroma_db265")
chroma_db.persist()

print("Chunks have been embedded with Nomic and saved to Chroma DB!")

# Query Chroma DB
def query_chroma(query_text):
    query_embedding = embedding_model.embed_query(query_text)
    results = chroma_db.similarity_search_by_vector(query_embedding, k=3)
    return results

# Text Generation Function using Llama 3
def generate_response_with_llama3(retrieved_text, query):
    # Prepare the prompt template
    prompt_template = f"""
    You are a helpful assistant. Using the following retrieved context and user query, generate an informative response based on the query.

    Context:
    {retrieved_text}

    Query:
    {query}

    Please keep the response concise and relevant. Limit the output to 200 tokens.
    """
    try:
        response = ollama_llm(prompt_template)
        return response
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test query
query = "courses"
retrieved_results = query_chroma(query)

# Combine retrieved text for the prompt
retrieved_text = " ".join([result.page_content for result in retrieved_results])

# Generate response
if retrieved_text:
    generated_response = generate_response_with_llama3(retrieved_text, query)
    print("\nGenerated Response:")
    print(generated_response)
else:
    print("No relevant results found in the database.")
