# CasaOS Spotlight Indexer

[test](localhost:8888)

In [4]:
# imports

from dotenv import load_dotenv
from IPython.display import display
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import MarkdownTextSplitter
from langchain.vectorstores import Pinecone
import ipywidgets as widgets
import os
import pinecone

load_dotenv()

pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])

embeddings = OpenAIEmbeddings()

output_text = widgets.Output()

def index():
    index_name = "langchain-demo"

    index = pinecone.Index(index_name=index_name)
    index.delete(delete_all=True)

    docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)

    text_splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=100)
    # traverse the ./content directory recursively and load each file that contains plain text

    docs = []
    count = 0
    for root, _, files in os.walk("./content"):
        for file in files:
            with output_text:
                print(f"{root}/{file}")

            if file.endswith(".txt") or file.endswith(".md"):
                filepath = os.path.join(root, file)
                count += 1
                with open(filepath, "r") as f:
                    text = f.read()
                    metadata={'source': filepath}
                    doc = Document(page_content=text, metadata=metadata)
                    split_docs = text_splitter.split_documents([doc])
                    docs.extend(split_docs)

    docsearch.add_documents(docs)

    print(f"{count} documents indexed")

button = widgets.Button(
    description="Index Documents",
    icon="check",
)

button.on_click(lambda _: index())

display(button, output_text)

Button(description='Index Documents', icon='check', style=ButtonStyle())

Output()