In [2]:
# imports

from dotenv import load_dotenv
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import MarkdownTextSplitter
from langchain.vectorstores import Pinecone
import os
import pinecone


  from tqdm.autonotebook import tqdm


In [3]:
# vectorization

load_dotenv()

pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])

embeddings = OpenAIEmbeddings()

index_name = "langchain-demo"

index = pinecone.Index(index_name=index_name)
index.delete(delete_all=True)

text_splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=100)
# traverse the ./content directory recursively and load each file that contains plain text

docs = []
count = 0
for root, _, files in os.walk("./content"):
    for file in files:
        if file.endswith(".txt") or file.endswith(".md"):
            filepath = os.path.join(root, file)
            count += 1
            with open(filepath, "r") as f:
                text = f.read()
                metadata={'source': filepath}
                doc = Document(page_content=text, metadata=metadata)
                split_docs = text_splitter.split_documents([doc])
                docs.extend(split_docs)

docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)

print(f"{count} documents indexed")

.....................

# CasaOS Spotlight

In [9]:
from IPython.display import display
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import ipywidgets as widgets

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True
)

status_text = widgets.Output()

output_text = widgets.Output(
    style={'description_width': 'initial', 'font_size': '20px'}
)

def demo(query):
    with status_text:
        print("thinking...")

    result = qa(
        {
            "query": query['new']
        }
    )

    with output_text:
        if result["result"]:
            print(result["result"], end=' ')
            print(result["source_documents"][0].metadata)
        else:
            print("I'm sorry I don't have any idea about this ask. Try a different question?")

    status_text.clear_output(wait=False)


input_text = widgets.Text(
    continuous_update=False, 
    layout=widgets.Layout(width='62%'), placeholder='What do you want to know?',
    style={'description_width': 'initial', 'font_size': '24px'}
)

# Display widget
display(input_text, status_text, output_text)

input_text.observe(demo, names='value')

Text(value='', continuous_update=False, layout=Layout(width='62%'), placeholder='What do you want to know?', s…

Output()

Output()