In [None]:
import os
import ollama
import chromadb
import markdown

In [None]:
# Initialize Ollama client
ollama_client = ollama.Client()

# Initialize ChromaDB client
chroma_client = chromadb.Client()

# Set the path to your folder containing the files
my_folder = "../my_folder"

# Name of the database
collection_name = "md_embeddings"

# Create a collection in ChromaDB
try:
    collection = chroma_client.get_collection(name=collection_name)
    print(f"Collection '{collection_name}' already exists.")
except ValueError:
    collection = chroma_client.create_collection(name=collection_name)
    print(f"Created new collection '{collection_name}'.")

In [None]:
# Remove collection
#chroma_client.delete_collection(name=collection_name)

In [None]:
def read_markdown_files(directory):
    content_list = []
    for filename in os.listdir(directory):
        if filename.endswith(".md") or filename.endswith(".qmd"):
            with open(os.path.join(directory, filename), 'r') as file:
                content = file.read()
                content_list.append((filename, content))
    return content_list

def generate_embedding(text):
    # Generate embedding using Ollama
    response = ollama_client.embeddings(model="llama3", prompt=text)
    embedding = response['embedding']
    return embedding

def store_vectors_in_chromadb(vectors, metadata, chroma_collection):
    for vector, meta in zip(vectors, metadata):
        chroma_collection.add(
            embeddings=[vector],
            documents=[meta['content']],
            ids=[meta['filename']]
        )

def find_closest_files(query_text, chroma_collection, top_n=5):
    # Step 1: Generate the embedding for the query content
    query_embedding = generate_embedding(query_text)

    # Step 2: Query ChromaDB with the generated embedding
    results = chroma_collection.query(
        query_embeddings=[query_embedding],
        n_results=top_n
    )

    return results

In [None]:
def store_vectors_in_chromadb(vectors, metadata, chroma_collection):
    for vector, meta in zip(vectors, metadata):
        vector_id = meta['filename']
        
        # Check if the ID already exists
        try:
            # Retrieve the existing document
            existing_document = chroma_collection.get(ids=[vector_id])
            if existing_document:
                # Update existing vector
                chroma_collection.update(
                    embeddings=[vector],
                    documents=[meta['content']],
                    ids=[vector_id]
                )
            else:
                # Add new vector
                chroma_collection.add(
                    embeddings=[vector],
                    documents=[meta['content']],
                    ids=[vector_id]
                )
        except Exception as e:
            print(f"Error storing vector: {e}")

def retrieve_context_from_chromadb(query_text, chroma_collection, top_n=5):
    # Generate the embedding for the query content
    query_embedding = generate_embedding(query_text)

    # Query ChromaDB with the generated embedding
    results = chroma_collection.query(
        query_embeddings=[query_embedding],
        n_results=top_n
    )

    # Extract documents and their IDs from the results
    document_ids = results.get('ids', [[]])[0]  # Extract the list of IDs
    documents = results.get('documents', [[]])[0]  # Extract the list of documents
    context = " ".join(f"[{doc_id}] {doc}" for doc_id, doc in zip(document_ids, documents))
    return context

def get_system_message_rag(content):
    return f"""You are an expert consultant helping executive advisors to get relevant information from internal documents.

    Generate your response by following the steps below:
    1. Recursively break down the question into smaller questions.
    2. For each question/directive:
        2a. Select the most relevant information from the context in light of the conversation history.
    3. Generate a draft response using selected information.
    4. Remove duplicate content from draft response.
    5. Generate your final response after adjusting it to increase accuracy and relevance.
    6. Do not try to summarise the answers, explain it properly.
    6. Only show your final response! 
    
    Constraints:
    1. DO NOT PROVIDE ANY EXPLANATION OR DETAILS OR MENTION THAT YOU WERE GIVEN CONTEXT.
    2. Don't mention that you are not able to find the answer in the provided context.
    3. Don't make up the answers by yourself.
    4. Try your best to provide answer from the given context.

    CONTENT:
    {content}
    """

def get_ques_response_prompt(question):
    return f"""
    ==============================================================
    Based on the above context, please provide the answer to the following question:
    {question}
    """

def generate_rag_response(content, question, model="llama3"):
    stream = ollama_client.chat(model=model, messages=[
        {"role": "system", "content": get_system_message_rag(content)},            
        {"role": "user", "content": get_ques_response_prompt(question)}
    ], stream=True)

    print(get_system_message_rag(content))
    print(get_ques_response_prompt(question))
    print("####### THINKING OF ANSWER............ ")
    full_answer = ''
    for chunk in stream:
        print(chunk['message']['content'], end='', flush=True)
        full_answer = ''.join([full_answer, chunk['message']['content']])

    return full_answer

def main():
    # Step 1: Read and parse QMD/MD files
    markdown_directory = my_folder
    markdown_files_content = read_markdown_files(markdown_directory)
    
    # Step 2: Vectorize content using Ollama
    vectors = []
    metadata = []
    for filename, content in markdown_files_content:
        embedding = generate_embedding(content)
        vectors.append(embedding)
        metadata.append({'filename': filename, 'content': content})
    
    # Step 3: Store vectors in ChromaDB
    store_vectors_in_chromadb(vectors, metadata, collection)

    # Step 4: Retrieve information and generate a response
    query_text = "What is the commonly referenced measure of central tendency?"
    context = retrieve_context_from_chromadb(query_text, collection)
    response = generate_rag_response(context, query_text)
    
    print("Generated Response:")
    print(response)

if __name__ == "__main__":
    main()

In [None]:
def main():
    # Step 1: Read and parse QMD/MD files
    markdown_directory = my_folder
    markdown_files_content = read_markdown_files(markdown_directory)
    
    # Step 2: Vectorize content using Ollama
    vectors = []
    metadata = []
    for filename, content in markdown_files_content:
        embedding = generate_embedding(content)
        vectors.append(embedding)
        metadata.append({'filename': filename, 'content': content})
    
    # Step 3: Store vectors in ChromaDB
    store_vectors_in_chromadb(vectors, metadata, collection)

    while True:
        query_text = input("Enter your query (or 'exit' to quit): ")
        if query_text.lower() == 'exit':
            print("Exiting...")
            break
        
        # Retrieve context from ChromaDB
        context = retrieve_context_from_chromadb(query_text, collection)
        
        # Generate response with context
        response = generate_rag_response(context, query_text)
        
        # Print response
        print("\nGenerated Response:")
        print(response)

if __name__ == "__main__":
    main()