This document is a live developer environment for creating and testing the backend functionality before porting it over to a FastAPI framework.

# Setup

Handle imports.

In [1]:
import os, re
from llama_index.llms import openai
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
    SimpleDirectoryReader,
    Document,
    VectorStoreIndex,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    Settings
)

  from .autonotebook import tqdm as notebook_tqdm


Setup directories.

In [3]:
DATA_DIR = './data'
INDEX_DIR = './storage'
for dir in [DATA_DIR, INDEX_DIR]:
  if not os.path.exists(dir):
    os.mkdir(dir)

# Model selection

Can add more models from: https://huggingface.co/spaces/mteb/leaderboard

In [4]:
models = [ # RAG LLMs are preferred
    'BAAI/bge-small-en',
    'BAAI/bge-small-en-v1.5',
    'BAAI/bge-base-en-v1.5',
    'multi-qa-MiniLM-L6-cos-v1',
    'openai'
]
print('List of supported models:')
for m in range(0, len(models)):
   print(f'type {m} for {models[m]}')
try:
    model_choice = input('Please type your model choice: ')
except ValueError:
    print("Must be an integer number!")
model = models[int(model_choice)]

if model == 'openai':
    os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
else:
    Settings.embed_model = HuggingFaceEmbedding(
        model_name=model
    )

List of supported models:
type 0 for BAAI/bge-small-en
type 1 for BAAI/bge-small-en-v1.5
type 2 for BAAI/bge-base-en-v1.5
type 3 for multi-qa-MiniLM-L6-cos-v1
type 4 for openai


# Read documents

Read and index documents present into knowledge base. Create a document with text and metadata.

In [5]:
documents = []
reader = SimpleDirectoryReader(
    input_dir=DATA_DIR,
    recursive=True # catch subdirectories
)

<llama_index.core.readers.file.base.SimpleDirectoryReader at 0x16d3f4d10>

In [7]:
reader_data = reader.iter_data()
for docs in reader_data:
   #print((docs))

   for doc in docs:
      documents.append(Document(text=doc.text, metadata={"source": doc.metadata["file_name"]}))
documents

Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 29 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wron

[Document(id_='5672e895-c655-412d-ae12-40e6c60494d9', embedding=None, metadata={'source': 'A Letter to DeFi.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='HomeAboutSign inSubscribeA Letter to DeFi\nNext →\nSign upSibylline Labs © 2024. Powered by Ghost\nDeFiiscompletelyunusableTradFiiscompletelybrokenayyImao\nThe future of DeFi will look nothing like itcurrently does if it is to ever achieve either itsmission or absolute full potential. The sector has anincredible amount of upside but is finding itselfhampered by a lack of focus, arrogance, anduncertainty.The BackdropDecentralised finance, shortened to simply DeFi, isthe fundamental notion that access to financialservices and security should be a universal right,and that we can leverage decentralised technologiesto be able to realise this idea.Blockchain and crypto technology can be used to bringus products that allow us to swap currencies andother tokens, safely store digital value, trad

In [8]:
documents[0].doc_id

'5672e895-c655-412d-ae12-40e6c60494d9'

Clean documents and create Document objects with metadata.

In [9]:
def cleanText(text):
   cleaned_text = re.sub(r'\s+', ' ', text)
   return cleaned_text
cleaned_documents = [
    Document(text=cleanText(doc.text), metadata=doc.metadata) 
    for doc in documents
]
cleaned_documents

[Document(id_='787080fe-b560-447f-930e-0b259f438d1c', embedding=None, metadata={'source': 'A Letter to DeFi.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='HomeAboutSign inSubscribeA Letter to DeFi Next → Sign upSibylline Labs © 2024. Powered by Ghost DeFiiscompletelyunusableTradFiiscompletelybrokenayyImao The future of DeFi will look nothing like itcurrently does if it is to ever achieve either itsmission or absolute full potential. The sector has anincredible amount of upside but is finding itselfhampered by a lack of focus, arrogance, anduncertainty.The BackdropDecentralised finance, shortened to simply DeFi, isthe fundamental notion that access to financialservices and security should be a universal right,and that we can leverage decentralised technologiesto be able to realise this idea.Blockchain and crypto technology can be used to bringus products that allow us to swap currencies andother tokens, safely store digital value, trade an

# Index documents

Create a combined document object from the cleaned documents. Store the results in a vector index.

In [10]:
service_context = ServiceContext.from_defaults(
    embed_model="local:BAAI/bge-small-en-v1.5"
)
try:
    index = VectorStoreIndex.from_documents(cleaned_documents, service_context=service_context)
except NameError:
   index = load_index_from_storage()
index.storage_context.persist()

  service_context = ServiceContext.from_defaults(


# Query loop

In [15]:
chat_engine = index.as_chat_engine()
user_input = ""
print("Type 'exit' to quit")
while True:
    user_input = input("\nEnter your query: ")
    if user_input != "exit":
        response = chat_engine.query(user_input)
        print("\n" + response.response)  # This prints the main response text
        
        sources = set()
        for source in response.source_nodes:
            sources.add(source.node.metadata["source"])
        sources = list(sources) # make subscriptable

        if len(sources) > 0:
            print("\nSources: ")
            for s in range(0,len(sources)):
                print(f'[{s+1}]: {sources[s]}')
    else:
        print("Goodbye!")
        break

Type 'exit' to quit

Dan Miles writes about various aspects related to the DeFi (Decentralized Finance) sector, including the involvement of different players such as Ultimate, Meld, and Privy. He also discusses the interest of traditional financial institutions in exploring the DeFi space and the potential for a financial revolution through DeFi. Miles emphasizes the need for sensible conversations and regulatory considerations for DeFi to mature and reach its full potential.

Sources: 
[1]: A Letter to DeFi.pdf
Goodbye!


Limitations:
- This chatbot is not good for general knowledge: specific questions must be asked, or the sources returned can be unreliable.