# requirements

In [1]:
%%sh
pip install --upgrade langchain
pip install --upgrade openai
pip install --upgrade python-dotenv
pip install --upgrade redis
pip install --upgrade requests
pip install --upgrade tiktoken

Collecting langchain
  Downloading langchain-0.0.161-py3-none-any.whl (758 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 759.0/759.0 KB 352.2 kB/s eta 0:00:00
Collecting PyYAML>=5.4.1
  Downloading PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 682.2/682.2 KB 2.2 MB/s eta 0:00:00
Collecting SQLAlchemy<3,>=1.4
  Downloading SQLAlchemy-2.0.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.7/2.7 MB 1.2 MB/s eta 0:00:00
Collecting numexpr<3.0.0,>=2.8.4
  Downloading numexpr-2.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (381 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 381.4/381.4 KB 1.5 MB/s eta 0:00:00
Collecting pydantic<2,>=1
  Downloading pydantic-1.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 762

# environment

In [2]:
%%sh
docker exec redis uname || docker run --rm --name redis -d -p 13333:8001 -p 10001:6379 redis/redis-stack:latest

Error response from daemon: No such container: redis
Unable to find image 'redis/redis-stack:latest' locally
latest: Pulling from redis/redis-stack
ca1778b69356: Pulling fs layer
518777ca318a: Pulling fs layer
4f4fb700ef54: Pulling fs layer
f3214da40ad6: Pulling fs layer
4cf419baf0fd: Pulling fs layer
f3214da40ad6: Waiting
6f8031a5cee9: Pulling fs layer
4cf419baf0fd: Waiting
33f7730dff5d: Pulling fs layer
6f8031a5cee9: Waiting
33f7730dff5d: Waiting
ae439f3bd24e: Pulling fs layer
0051d2b6e334: Pulling fs layer
1bb975664fd4: Pulling fs layer
6b38923421ac: Pulling fs layer
6dece0c7252b: Pulling fs layer
ae439f3bd24e: Waiting
e3de16b18118: Pulling fs layer
0051d2b6e334: Waiting
6dece0c7252b: Waiting
8222ec87331e: Pulling fs layer
8f6dbd1e47fa: Pulling fs layer
24fdcd115c41: Pulling fs layer
8222ec87331e: Waiting
1bb975664fd4: Waiting
e3de16b18118: Waiting
6b38923421ac: Waiting
8f6dbd1e47fa: Waiting
24fdcd115c41: Waiting
4f4fb700ef54: Verifying Checksum
4f4fb700ef54: Download complete
f3214

4e0cdde306c6f021fe29dd1e1ce8e8dd3a2e879d886ce56f495163b50ec46ace


# imports

In [3]:
from dotenv import load_dotenv
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import MarkdownTextSplitter
from langchain.vectorstores.redis import Redis
import os


# vectorization

In [4]:
load_dotenv()
embeddings = OpenAIEmbeddings()

index_name = "demo"
Redis.drop_index(index_name=index_name, delete_documents=True)

text_splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=100)
# traverse the ./content directory recursively and load each file that contains plain text

docs = []
for root, _, files in os.walk("./content"):
    for file in files:
        if file.endswith(".md") or file.endswith(".html") or file.endswith(".json"):
            filepath = os.path.join(root, file)
            print(filepath)
            with open(filepath, "r") as f:
                text = f.read()
                metadata={'source': filepath}
                doc = Document(page_content=text, metadata=metadata)
                split_docs = text_splitter.split_documents([doc])
                docs.extend(split_docs)

rds = Redis.from_documents(docs, embeddings, index_name=index_name)


./content/troubleshooting.md
./content/home.md
./content/user-pages.md
./content/apps.html
./content/guides.md
./content/get-started.md
./content/contribute.md
./content/contribute/alpha.md
./content/contribute/wiki-editing-guidelines.md
./content/contribute/development.md
./content/troubleshooting/appstore-does-not-load.md
./content/guides/running-casaos-on-windows-with-wsl2.md
./content/guides/move-docker-images-and-volumes-to-a-diffferent-storage.md
./content/user-pages/tigerinus.md
./content/zh/troubleshooting.md
./content/zh/apps.md
./content/zh/home.md
./content/zh/user-pages.md
./content/zh/guides.md
./content/zh/get-started.md
./content/zh/contribute.md
./content/zh/contribute/development.md
./content/apps/jellyfin.html
./content/appfile/jellyfin.json


# recommendation

In [6]:
query = "move docker image to another host"
for doc in rds.similarity_search(query):
    print(doc.metadata)
    #print(doc.page_content)

{'source': './content/guides/move-docker-images-and-volumes-to-a-diffferent-storage.md'}
{'source': './content/guides/move-docker-images-and-volumes-to-a-diffferent-storage.md'}
{'source': './content/guides/move-docker-images-and-volumes-to-a-diffferent-storage.md'}
{'source': './content/guides/move-docker-images-and-volumes-to-a-diffferent-storage.md'}


# QA bot

In [7]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), 
    chain_type="stuff", 
    retriever=rds.as_retriever(),
    return_source_documents=True
)

In [9]:
result = qa(
    {
        "query": "I want to contribute to CasaOS as a developer. How should I get started?",
    }
)

print(result["result"])
print("")
print(result["source_documents"][0].metadata)

 You should read the prerequisites for coding on the 'Development' page before submitting your contribution. You should be familiar with Golang and shell scripting for backend development, or Vue.js for frontend development. You should also be familiar with Git and the whole pull request (PR) process on GitHub.

{'source': './content/contribute.md'}
