# requirements

## Pinecone

Create an index `langchain-demo` with a dimension of `1536`, if using OpenAI.

See https://platform.openai.com/docs/guides/embeddings/second-generation-models 

In [3]:
%%sh
pip install --upgrade ipywidgets
pip install --upgrade langchain
pip install --upgrade openai
pip install --upgrade pinecone-client
pip install --upgrade python-dotenv
pip install --upgrade requests
pip install --upgrade tiktoken


Collecting ipywidgets
  Using cached ipywidgets-8.0.6-py3-none-any.whl (138 kB)
  Using cached ipywidgets-8.0.5-py3-none-any.whl (138 kB)
Collecting langchain
  Using cached langchain-0.0.188-py3-none-any.whl (969 kB)
  Using cached langchain-0.0.187-py3-none-any.whl (960 kB)
Collecting openai
  Using cached openai-0.27.7-py3-none-any.whl (71 kB)
  Using cached openai-0.27.6-py3-none-any.whl (71 kB)
Collecting pinecone-client
  Using cached pinecone_client-2.2.1-py3-none-any.whl (177 kB)
  Using cached pinecone_client-2.1.0-py3-none-any.whl (170 kB)
Collecting python-dotenv
  Using cached python_dotenv-1.0.0-py3-none-any.whl (19 kB)
  Using cached python_dotenv-0.21.1-py3-none-any.whl (19 kB)
Collecting requests
  Using cached requests-2.31.0-py3-none-any.whl (62 kB)
  Using cached requests-2.30.0-py3-none-any.whl (62 kB)
Collecting tiktoken
  Using cached tiktoken-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
  Using cached tiktoken-0.3.3-cp39-cp39-manylinux_

# imports

In [1]:
from dotenv import load_dotenv
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import MarkdownTextSplitter
from langchain.vectorstores import Pinecone
import os
import pinecone


  from tqdm.autonotebook import tqdm


# vectorization

In [2]:
load_dotenv()

pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])

embeddings = OpenAIEmbeddings()

index_name = "langchain-demo"

text_splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=100)
# traverse the ./content directory recursively and load each file that contains plain text

docs = []
for root, _, files in os.walk("./content"):
    for file in files:
        if file.endswith(".txt") or file.endswith(".md"):
            filepath = os.path.join(root, file)
            print('.', end='')
            with open(filepath, "r") as f:
                text = f.read()
                metadata={'source': filepath}
                doc = Document(page_content=text, metadata=metadata)
                split_docs = text_splitter.split_documents([doc])
                docs.extend(split_docs)

docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)


.....................

# QA bot

In [9]:
from IPython.display import display
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import ipywidgets as widgets

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(),
    return_source_documents=True
)

def demo(query):
    result = qa(
        {
            "query": query['new']
        }
    )

    if result["result"]:
        print(result["result"], end=' ')
        print(result["source_documents"][0].metadata)
    else:
        print("I'm sorry I don't have any idea about this ask. Try a different question?")


# Create text widget for input
input_text = widgets.Text(continuous_update=False, layout=widgets.Layout(width='100%'), placeholder='What do you want to know?')

# Display widget
display(input_text)

input_text.observe(demo, names='value')

Text(value='', continuous_update=False, layout=Layout(width='100%'), placeholder='What do you want to know?')

 You can install apps on CasaOS using the command line or through the CasaOS interface. {'source': './content/guides.md'}
