In [9]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec

import os



DATA_PATH="data"

os.environ["OPENAI_API_KEY"] = ""
os.environ['PINECONE_API_KEY'] = ''
api_key = os.environ["PINECONE_API_KEY"]
OPENAI_API_KEY=os.environ["OPENAI_API_KEY"]
pc = Pinecone(api_key=api_key)



embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


# def load_documents():
loader = DirectoryLoader(DATA_PATH, glob="*md")
documents = loader.load()
    # return documents

# def split_text(documents: list[Document]):
text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=500,
        length_function=len,
        add_start_index=True,
    )
chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

document = chunks[1]
print(document.page_content)
print(document.metadata)

    # return chunks 


Split 1 documents into 40 chunks.
Reentrancy Description: A reentrancy attack exploits the vulnerability in smart contracts when a function makes an external call to another contract before updating its own state. This allows the external contract, possibly malicious, to reenter the original function and repeat certain actions, like withdrawals, using the same state. Through such attacks, an attacker can possibly drain all the funds from a contract. • A simple example of a reentrancy attack is a contract that allows users to deposit funds and then withdraw those funds later. Suppose the contract does not properly check for reentrancy. In that case, an attacker could call the deposit function multiple times in a row before calling the withdraw function, effectively stealing funds from the contract. • One way to prevent reentrancy attacks is to use a mutex, or mutual exclusion, lock to prevent multiple calls to the same function from occurring at the same time. Another way is to use a gu

In [10]:
index_name = "vulnhunt-gpt"

# def create_index(docs):
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
if index_name not in existing_indexes:
    # dimensions are for text-embedding-ada-002
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="euclidean",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"), 
    )   
    index = pc.Index(index_name)
    print("successfully initialize index")
# add records/vectors into index
vectorstore_from_docs = PineconeVectorStore.from_documents(
    chunks,
    index_name=index_name,
    embedding=embeddings
)
print("successfully add records")

successfully initialize index
successfully add records


In [1]:
results = vectorstore_from_docs.similarity_search(
"what is overflow",
k = 2
)
for res in results: 
    print(res, "\n -----------\n")



NameError: name 'vectorstore_from_docs' is not defined

In [4]:
from langchain_openai import ChatOpenAI  
from langchain.chains import RetrievalQAWithSourcesChain  


In [7]:
query = "what is the vulnerability in this code, and give me the remediation of the vulnerability \
pragma solidity ^0.4.15; \n\n\
contract Main { \n\
    uint private sellerBalance=0; \n\n\
    function add(uint value) returns (bool){ \n\
        sellerBalance += value; // possible overflow \n\n\
        // possible auditor assert \n\
        // assert(sellerBalance >= value); \n\
    } \n\n\
    function safe_add(uint value) returns (bool){ \n\
        require(value + sellerBalance >= sellerBalance); \n\
        sellerBalance += value; \n\
    }   \n\
}"
llm = ChatOpenAI(  
    openai_api_key=OPENAI_API_KEY,  
    model_name='gpt-3.5-turbo',  
    temperature=0.0, 
)  
qa = RetrievalQAWithSourcesChain.from_chain_type(  
    llm=llm,  
    chain_type="stuff",  
    retriever=vectorstore_from_docs.as_retriever()  
)  
qa.invoke(query)  

{'question': 'what is the vulnerability in this code, and give me the remediation of the vulnerability pragma solidity ^0.4.15; \n\ncontract Main { \n    uint private sellerBalance=0; \n\n    function add(uint value) returns (bool){ \n        sellerBalance += value; // possible overflow \n\n        // possible auditor assert \n        // assert(sellerBalance >= value); \n    } \n\n    function safe_add(uint value) returns (bool){ \n        require(value + sellerBalance >= sellerBalance); \n        sellerBalance += value; \n    }   \n}',
 'answer': 'The vulnerability in the code is a possible overflow in the `add` function. The remediation for this vulnerability is to use the `safe_add` function that includes a require statement to prevent overflow.\n',
 'sources': 'data\\vulnhunt_doc.md'}