In [1]:
# !pip install --upgrade langchain "deeplake[enterprise]" openai tiktoken sentence-transformers huggingface_hub transformers

In [2]:
git_pj_name = "nautilus_trader"
subname = 'all-MiniLM-L12-v2_splitted'
root_dir = f"./{git_pj_name}"
username = "intuitionwith"  # replace with your username from app.activeloop.ai

In [3]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
activeloop_token = getpass.getpass("Activeloop Token:")
os.environ["ACTIVELOOP_TOKEN"] = activeloop_token

In [4]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import DeepLake

#embeddings = OpenAIEmbeddings(disallowed_special=())

embeddings_model_name = "sentence-transformers/all-MiniLM-L12-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name, model_kwargs=model_kwargs)

### Vectorization

In [5]:
import os
from langchain.document_loaders import TextLoader

def get_file_extension(filename):
    # Split the filename and extension using os.path.splitext()
    _, file_extension = os.path.splitext(filename)
    # Remove the leading dot (.) from the extension
    return file_extension[1:]

etc = []
python = []
rs = []

ExtPython = ['py', 'pyx', 'pxd']
ExtRust = ['rs']

for dirpath, dirnames, filenames in os.walk(root_dir):
    if '.git' in dirpath or '.git' in dirnames:
        continue
    
    for file in filenames:
        try:
            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
            ext: str = get_file_extension(file)
            loaded = loader.load_and_split()

            if ext in ExtPython:
                python.extend(loaded)
            elif ext in ExtRust:
                rs.extend(loaded)
            else:
                etc.extend(loaded)
        except Exception as e:
            print(f'Exception occured in {f"{dirpath}/{file}"}: {e}')

Exception occured in ./nautilus_trader\docs\_images/architecture-overview.png: Error loading ./nautilus_trader\docs\_images\architecture-overview.png
Exception occured in ./nautilus_trader\docs\_images/cython-logo.png: Error loading ./nautilus_trader\docs\_images\cython-logo.png
Exception occured in ./nautilus_trader\docs\_images/favicon-32x32.png: Error loading ./nautilus_trader\docs\_images\favicon-32x32.png
Exception occured in ./nautilus_trader\docs\_images/ferris.png: Error loading ./nautilus_trader\docs\_images\ferris.png
Exception occured in ./nautilus_trader\docs\_images/nautilus-art.png: Error loading ./nautilus_trader\docs\_images\nautilus-art.png
Exception occured in ./nautilus_trader\docs\_images/nautilus-trader-logo.png: Error loading ./nautilus_trader\docs\_images\nautilus-trader-logo.png
Exception occured in ./nautilus_trader\docs\_images/ns-logo.png: Error loading ./nautilus_trader\docs\_images\ns-logo.png
Exception occured in ./nautilus_trader\docs\_images/nt-white-lar

In [6]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, Language

python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=1000, chunk_overlap=0
)

rust_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.RUST, chunk_size=1000, chunk_overlap=0
)

text_splitter = CharacterTextSplitter(chunk_size=950, chunk_overlap=50)
texts = python_splitter.split_documents(python)
texts.extend(rust_splitter.split_documents(rs))
texts.extend(text_splitter.split_documents(etc))

Created a chunk of size 1122, which is longer than the specified 950
Created a chunk of size 1771, which is longer than the specified 950
Created a chunk of size 1525, which is longer than the specified 950
Created a chunk of size 1126, which is longer than the specified 950
Created a chunk of size 990, which is longer than the specified 950
Created a chunk of size 1162, which is longer than the specified 950
Created a chunk of size 1525, which is longer than the specified 950
Created a chunk of size 1541, which is longer than the specified 950
Created a chunk of size 1884, which is longer than the specified 950
Created a chunk of size 962, which is longer than the specified 950
Created a chunk of size 1124, which is longer than the specified 950


In [7]:
texts

[Document(page_content='# Configuration file for the Sphinx documentation builder.\n#\n# This file only contains a selection of the most common options. For a full\n# list see the documentation:\n# https://www.sphinx-doc.org/en/master/usage/configuration.html\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\n\nimport os\nimport sys\n\nimport nautilus_trader\n\n\nsys.path.insert(0, os.path.abspath(".."))\nsys.path.append(os.path.abspath("./_pygments"))\n\n# -- Project information -----------------------------------------------------\nproject = "NautilusTrader"\nauthor = "Nautech Systems Pty Ltd."\ncopyright = "2015-2023 Nautech Systems Pty Ltd"\nversion = nautilus_trader.__version__', metadata={'source': './nauti

In [8]:
# username = "intuitionwith"  # replace with your username from app.activeloop.ai
# db = DeepLake(
#     dataset_path=f"hub://{username}/twitter-algorithm",
#     embedding_function=embeddings,
# )
# db.add_documents(texts)

db = DeepLake(
    dataset_path=f"hub://{username}/{git_pj_name+'_'+subname}",
    embedding=embeddings,
    runtime={"tensor_db": True}
)
db.add_documents(texts)

Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Your Deep Lake dataset has been successfully created!


 

Batch upload: 36395 samples are being uploaded in 37 batches of batch size 1000


Evaluating ingest: 100%|██████████| 37/37 [10:41<00:00
\

Dataset(path='hub://intuitionwith/nautilus_trader_all-MiniLM-L12-v2_splitted', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape       dtype  compression
  -------    -------     -------     -------  ------- 
 embedding  embedding  (36395, 384)  float32   None   
    id        text      (36395, 1)     str     None   
 metadata     json      (36395, 1)     str     None   
   text       text      (36395, 1)     str     None   


 

['8c657ce1-3411-11ee-b60b-68545ad2e9d8',
 '8c657ce2-3411-11ee-98a7-68545ad2e9d8',
 '8c657ce3-3411-11ee-b082-68545ad2e9d8',
 '8c657ce4-3411-11ee-b8f9-68545ad2e9d8',
 '8c657ce5-3411-11ee-9aea-68545ad2e9d8',
 '8c657ce6-3411-11ee-8297-68545ad2e9d8',
 '8c657ce7-3411-11ee-9092-68545ad2e9d8',
 '8c657ce8-3411-11ee-bdca-68545ad2e9d8',
 '8c657ce9-3411-11ee-a0b8-68545ad2e9d8',
 '8c657cea-3411-11ee-9f19-68545ad2e9d8',
 '8c657ceb-3411-11ee-99ea-68545ad2e9d8',
 '8c657cec-3411-11ee-bac4-68545ad2e9d8',
 '8c657ced-3411-11ee-b229-68545ad2e9d8',
 '8c657cee-3411-11ee-b51c-68545ad2e9d8',
 '8c657cef-3411-11ee-9369-68545ad2e9d8',
 '8c657cf0-3411-11ee-bdf0-68545ad2e9d8',
 '8c657cf1-3411-11ee-9e80-68545ad2e9d8',
 '8c657cf2-3411-11ee-96ea-68545ad2e9d8',
 '8c657cf3-3411-11ee-a556-68545ad2e9d8',
 '8c657cf4-3411-11ee-b910-68545ad2e9d8',
 '8c657cf5-3411-11ee-ba1e-68545ad2e9d8',
 '8c657cf6-3411-11ee-8dfa-68545ad2e9d8',
 '8c657cf7-3411-11ee-a04c-68545ad2e9d8',
 '8c657cf8-3411-11ee-88e9-68545ad2e9d8',
 '8c657cf9-3411-

### Using Vec DB

In [15]:
from langchain.vectorstores import DeepLake

In [16]:
db = DeepLake(
    dataset_path=f"hub://{username}/{git_pj_name+'_'+subname}",
    read_only=True,
    embedding=embeddings,
    runtime={"tensor_db": True}
)

Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Deep Lake Dataset in hub://intuitionwith/nautilus_trader_all-MiniLM-L12-v2_splitted already exists, loading from the storage
Specifying runtime option when loading a Vector Store is not supported and this parameter will be ignored. If you wanted to create a new Vector Store, please specify a path to a Vector Store that does not already exist. To transfer an existing Vector Store to the Managed Tensor Database, use the steps in the link below: (https://docs.activeloop.ai/enterprise-features/managed-database/migrating-datasets-to-the-tensor-database).


In [17]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

In [18]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

model = ChatOpenAI(model_name="gpt-4")  # switch to 'gpt-4'
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)

In [24]:
example_questions = [
    "What does trader do?",
    "How to optimize gil lock problem in python?",
    "I want to implement ML Strategy in nautilustrader. How can I do?",
    "How do you get assigned to Trader?",
    "How can I run multiple strategies?",
    "How can I use multi-threaded trading agents in Cython?",
    "How to implement UDP Feeder socket inside Nautilus Core, with Rust tokio crate, binding with pyo3?"
]
chat_history = []

for question in example_questions:
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")

-> **Question**: What does trader do? 

**Answer**: The Trader class provides a trader for managing a fleet of trading strategies. It takes in various components such as a message bus, cache, portfolio, data engine, risk engine, execution engine, clock, logger, and configuration. The Trader class allows you to add strategies, start and stop trading, subscribe to message bus topics, and get the state of strategies. 

-> **Question**: How to optimize gil lock problem in python? 

**Answer**: One way to optimize the GIL (Global Interpreter Lock) problem in Python is to use multiple threads or processes. Since the GIL only allows one thread to execute Python bytecode at a time, using multiple threads can help utilize multiple CPU cores and improve overall performance.

Here are a few strategies to optimize the GIL problem:

1. Use multiprocessing: Instead of using threads, you can use the multiprocessing module to create multiple processes. Each process will have its own Python interpreter

In [20]:
from pprint import pprint
pprint(chat_history)

[('What does trader do?',
  'The Trader class provides a trader for managing a fleet of trading '
  'strategies. It is responsible for handling the execution of trading '
  'commands, managing the portfolio, and interacting with the data and risk '
  'engines. It is a component of the trading system and is initialized with '
  'various dependencies such as the message bus, cache, portfolio, data '
  'engine, risk engine, execution engine, clock, and logger. The Trader class '
  'also has methods for starting and stopping the trader, adding strategies, '
  'and subscribing to message bus topics.'),
 ('How to optimize gil lock problem in python?',
  'There are several ways to optimize the GIL (Global Interpreter Lock) '
  'problem in Python:\n'
  '\n'
  "1. Use multiprocessing: Python's multiprocessing module allows you to spawn "
  'multiple processes, each with its own interpreter and GIL. By distributing '
  'the workload across multiple processes, you can effectively utilize '
  'mul

In [21]:
# chat_history = []
# while question := input(f"Input your question about {git_pj_name}: "):
#     if question == "stop":
#         break

#     result = qa({"question": question, "chat_history": chat_history})
#     chat_history.append((question, result["answer"]))
#     print(f"-> **Question**: {question} \n")
#     print(f"**Answer**: {result['answer']} \n")

#     print()