<a href="https://colab.research.google.com/github/sugarforever/WTFAcademyChatBot/blob/main/WTFAcademyChatBotChroma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install openai
%pip install chromadb
%pip install langchain
%pip install unstructured

In [None]:
!git clone https://github.com/AmazingAng/WTF-Solidity.git

In [None]:
!ls -alt WTF-Solidity

In [None]:
from langchain.document_loaders import DirectoryLoader

In [None]:
def load_all_courses(solidity_root):
  loader = DirectoryLoader(solidity_root, glob = "**/readme.md")
  docs = loader.load()

  return docs

In [None]:
docs = load_all_courses("./WTF-Solidity/")

In [None]:
print (f'You have {len(docs)} document(s) in your data')
print (f'There are {len(docs[0].page_content)} characters in your document')

You have 149 document(s) in your data
There are 3335 characters in your document


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)

In [None]:
print (f'Now you have {len(split_docs)} documents')

Now you have 729 documents


In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import os

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:
persist_directory = 'chroma_storage'

In [None]:
vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_directory)
vectorstore.persist()

Using embedded DuckDB with persistence: data will be stored in: chroma_storage


In [None]:
# Load the vectorstore from disk
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

query = "如何利用Solidity实现插入排序？"
docs = vectordb.similarity_search(query)

Using embedded DuckDB with persistence: data will be stored in: chroma_storage


In [None]:
print(len(docs))

4


In [None]:
print(docs[0])

page_content='这一讲，我们介绍了solidity中控制流，并且用solidity写了插入排序。看起来很简单，但实际很难。这就是solidity，坑很多，每个月都有项目因为这些小bug损失几千万甚至上亿美元。掌握好基础，不断练习，才能写出更好的solidity代码。' metadata={'source': 'WTF-Solidity/10_InsertionSort/readme.md'}


In [None]:
import chromadb
from chromadb.config import Settings
client = chromadb.Client(
    Settings(
        chroma_db_impl="duckdb+parquet",
        persist_directory=persist_directory))

client.list_collections()

Using embedded DuckDB with persistence: data will be stored in: chroma_storage
No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction


[Collection(name=langchain)]

In [None]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [None]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

query = "如何利用Solidity实现插入排序？"
docs = vectorstore.similarity_search(query, 3, include_metadata=True)

In [None]:
print(len(docs))
print(docs[0])

3
page_content='这一讲，我们介绍了solidity中控制流，并且用solidity写了插入排序。看起来很简单，但实际很难。这就是solidity，坑很多，每个月都有项目因为这些小bug损失几千万甚至上亿美元。掌握好基础，不断练习，才能写出更好的solidity代码。' metadata={'source': 'WTF-Solidity/10_InsertionSort/readme.md'}


In [None]:
chain.run(input_documents=docs, question=query)

' 可以使用以下Solidity代码实现插入排序：\n\nfunction insertionSort(uint[] memory a) public pure returns(uint[] memory) {\n    // note that uint can not take negative value\n    for (uint i = 1;i < a.length;i++){\n        uint temp = a[i];\n        uint j=i;\n        while( (j >= 1) && (temp < a[j-1])){\n            a[j] = a[j-1];\n            j--;\n        }\n        a[j] = temp;\n    }\n    return(a);\n}'