In [21]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Document
from typing import List
import re
from pathlib import Path
from llama_index.llms.ollama import Ollama
import os
import qdrant_client
from llama_index.core import (
    VectorStoreIndex,
    StorageContext
)
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings

from llama_index.readers.obsidian import ObsidianReader
from llama_index.core.readers.base import BaseReader
from llama_index.readers.file.markdown import MarkdownReader

from llama_index.embeddings.huggingface import HuggingFaceEmbedding



In [22]:


class ObsidianProcessor:
    def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
        self.node_parser = SentenceSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )

    def clean_text(self, text: str) -> str:
        """Clean Obsidian-specific markdown and formatting"""
        # Remove Obsidian internal links [[...]]
        text = re.sub(r'\[\[([^\]]+)\]\]', r'\1', text)
        # Remove URLs
        text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
        # Remove empty lines
        text = '\n'.join(line for line in text.split('\n') if line.strip())
        return text

    def process_documents(self, documents: List[Document]) -> List[Document]:
        """Process and chunk documents"""
        cleaned_docs = []
        for doc in documents:
            if doc.text.strip():  # Skip empty documents
                cleaned_text = self.clean_text(doc.text)
                if cleaned_text:
                    doc.text = cleaned_text
                    cleaned_docs.append(doc)

        # Chunk documents
        nodes = self.node_parser.get_nodes_from_documents(cleaned_docs)
        return nodes

In [23]:
from llama_index.core import ServiceContext
from datetime import datetime

def create_enhanced_index(documents: List[Document], embed_model, llm):
    # Create processor and process documents
    processor = ObsidianProcessor(chunk_size=512, chunk_overlap=50)
    processed_docs = processor.process_documents(documents)

    # Create service context
    service_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        chunk_size=512,
        chunk_overlap=50
    )

    # Create index with metadata
    index = VectorStoreIndex.from_documents(
        processed_docs,
        service_context=service_context,
        show_progress=True
    )

    return index

In [24]:
from llama_index.core.chat_engine import CondenseQuestionChatEngine
from llama_index.core.prompts import PromptTemplate

class PersonalObsidianChat:
    def __init__(self, index, llm):
        self.index = index
        self.llm = llm

        # Custom prompt for context retrieval
        self.custom_prompt = PromptTemplate(
            "You are a helpful AI assistant with access to my personal notes. "
            "Based on the context provided, please give a thoughtful and accurate response. "
            "If you're not sure about something, please say so.\n\n"
            "Context: {context}\n"
            "Question: {query}\n\n"
            "Response: "
        )

        # Create simple chat engine without unsupported parameters
        self.chat_engine = self.index.as_chat_engine(
            verbose=True
        )

    def chat(self, query: str) -> str:
        try:
            response = self.chat_engine.chat(query)
            return response.response
        except Exception as e:
            return f"Error processing query: {str(e)}"

    def search_notes(self, query: str, top_k: int = 3):
        """Search through notes and return most relevant passages"""
        retriever = self.index.as_retriever(similarity_top_k=top_k)
        nodes = retriever.retrieve(query)
        return nodes

In [25]:
class MyObsidianReader(BaseReader):
    """Utilities for loading data from an Obsidian Vault.

    Args:
        input_dir (str): Path to the vault.

    """

    def __init__(self, input_dir: str):
        """Init params."""
        self.input_dir = Path(input_dir)

    def my_load_data(self, *args, **load_kwargs):
        """Load data from the input directory."""
        docs = []
        for dirpath, dirnames, filenames in os.walk(self.input_dir):
            # Exclude 'image_media' from directory traversal
            if "Images_Media" in dirnames:
                dirnames.remove("Images_Media")
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
            for filename in filenames:
                if filename.endswith(".md"):
                    filepath = os.path.join(dirpath, filename)
                    content = MarkdownReader().load_data(Path(filepath))
                    docs.extend(content)
        return docs

    def load_langchain_documents(self, **load_kwargs):
        """Load data in LangChain document format."""
        docs = self.load_data(**load_kwargs)
        return [d.to_langchain_format() for d in docs]

In [26]:
# Initialize components
llm = Ollama(model="Tinydolphin", request_timeout=120.0)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Set up settings
Settings.llm = llm
Settings.embed_model = embed_model

# Create index
reader = MyObsidianReader(input_dir="/path/to/obsidian/vault")
documents = reader.my_load_data()
index = create_enhanced_index(documents, embed_model, llm)

# Create chat interface
obsidian_chat = PersonalObsidianChat(index, llm)

# Example usage
query = "What are my notes about social perception?"
response = obsidian_chat.chat(query)
print(response)

# Search specific notes
relevant_notes = obsidian_chat.search_notes("Nice Guys")
for note in relevant_notes:
    print(f"Relevance: {note.score:.2f}")
    print(note.text[:200] + "...\n")

  service_context = ServiceContext.from_defaults(


Parsing nodes: 0it [00:00, ?it/s]

[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Answer:  Sure, I can help with that. Can you please provide me with your username or any relevant information to identify you in a conversation?
[0m Sure, I can help with that. Can you please provide me with your username or any relevant information to identify you in a conversation?
