In [1]:
from llmcoder import LLMCoder

import shutil

import time

from tqdm.auto import tqdm

import numpy as np

In [2]:
code = """from langchain.document_loaders import DirectoryLoader, JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Embed and store
from langchain.vectorstores.elasticsearch import ElasticsearchStore 
from langchain.embeddings import GPT4AllEmbeddings
from langchain.embeddings import OllamaEmbeddings # We can also try Ollama embeddings

from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

import os
import json
from tqdm import tqdm
from bs4 import BeautifulSoup
import time

import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# pip install jq
schema = {
    'jq_schema': '.text'
}

es = get_es_connection()

my_array = np.array([1, 2, 3, 4, 5])

REINDEX = False

# pip install gpt4all
# Needs Ubuntu > 22.04 because of glibc

if REINDEX:
    # Clear the index
    es.indices.delete(index="eurlex-langchain", ignore=[400, 404])

    start_time = time.time()
    vectorstore = """

In [3]:
# Clear the mypy cache
shutil.rmtree(".mypy_cache", ignore_errors=True)

In [4]:
def measure_time(setup=False):
    llmcoder = LLMCoder(
        analyzers=["mypy_analyzer_v1", "signature_analyzer_v1", "gpt_score_analyzer_v1"],
        max_iter=3,
        feedback_variant="coworker",
        n_procs=16,
        verbose=False)

    # Clear the mypy cache
    shutil.rmtree(".mypy_cache", ignore_errors=True)

    setup_time = time.time()
    if setup:
        llmcoder.setup(code)
    setup_time = time.time() - setup_time
    
    complete_time = time.time()
    for _ in range(10):
        result = llmcoder.complete(code, n=1)
    complete_time = time.time() - complete_time

    return setup_time, complete_time / 10

In [5]:
setup_times_no_setup = []
complete_times_no_setup = []

setup_times_with_setup = []
complete_times_with_setup = []

In [6]:
N_REPEAT = 10

for i in tqdm(range(N_REPEAT)):
    setup_time, complete_time = measure_time(setup=False)
    setup_times_no_setup.append(setup_time)
    complete_times_no_setup.append(complete_time)

    setup_time, complete_time = measure_time(setup=True)
    setup_times_with_setup.append(setup_time)
    complete_times_with_setup.append(complete_time)

  0%|          | 0/10 [00:00<?, ?it/s]

In [7]:
print("No setup")
print(f"Setup time: {np.mean(setup_times_no_setup):.2f} ± {np.std(setup_times_no_setup):.2f}")
print(f"Complete time: {np.mean(complete_times_no_setup):.2f} ± {np.std(complete_times_no_setup):.2f}")
print()
print("With setup")
print(f"Setup time: {np.mean(setup_times_with_setup):.2f} ± {np.std(setup_times_with_setup):.2f}")
print(f"Complete time: {np.mean(complete_times_with_setup):.2f} ± {np.std(complete_times_with_setup):.2f}")

No setup
Setup time: 0.00 ± 0.00
Complete time: 5.10 ± 0.56

With setup
Setup time: 12.18 ± 3.36
Complete time: 4.56 ± 1.10
