In [1]:
cd ../

/future/u/hmoazam/home/dsp


In [2]:
root_path = '.'

import os
import logging
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(root_path, 'cache')
os.environ["DSP_CACHE_SQLITE_PATH"] = "cache.db"
os.environ["DSP_LOGGING_LEVEL"] = str(logging.DEBUG)

import dsp
import openai
from dsp.utils.cache import sqlite_cache_wrapper as cache_wrapper

# openai_key = os.getenv('OPENAI_API_KEY')  # or replace with your API key
colbert_server = 'http://ec2-44-228-128-229.us-west-2.compute.amazonaws.com:8893/api/search'

lm = dsp.GPT3(model='gpt-3.5-turbo', model_type="chat")
rm = dsp.ColBERTv2(url=colbert_server)


Not loading Cohere because it is not installed.


In [3]:
from datetime import datetime, timedelta
import time
import random
import string


### SQLite cache unit tests

In [4]:
@cache_wrapper
def test_function_cached(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    return hash(kwargs_)

def test_function(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    return hash(kwargs_)

In [7]:
def generate_random_string(length):
    # Define the pool of characters to choose from
    characters = string.ascii_letters + string.digits  

    # Generate a random string of given length
    random_string = ''.join(random.choices(characters, k=length))
    return random_string

def generate_random_value():
    value_type = random.choice(['long_string', 'short_string', 'float', 'int', 'list'])

    if value_type == 'long_string':
        return generate_random_string(random.randint(20, 50))
    elif value_type == 'short_string':
        return generate_random_string(random.randint(5, 10))
    elif value_type == 'float':
        return random.uniform(0.0, 100.0)
    elif value_type == 'int':
        return random.randint(1, 100)
    elif value_type == 'list':
        list_length = random.randint(1, 5)
        return [generate_random_value() for _ in range(list_length)]

def generate_random_dict(num_items):
    random_dict = {}
    for _ in range(num_items):
        key = generate_random_string(random.randint(5, 10))
        value = generate_random_value()
        random_dict[key] = value
    return random_dict

kwargs = [generate_random_dict(random.randint(0, 30)) for _ in range(50)]

print(kwargs)

[{'WbW1x7NO8M': [24.811055864868116], 'L7KPgHU': 'BDHiHcbxbxgLRwxxoXyOryO79vaL1C3ec6Rv'}, {'b9Wg2KtrTm': 20.757592683077785, 'ExZPJWbeZ': 42.99945209548094, 'mKuwD3q': [7, 'y20ZuOUKwE', 'zj37gmbtV2', 54.68325739908624], 'tRdlkjp': 'ODCsAGHQKWq6WYZNLtmNJ0YjOMT', 'WOlLnswa': ['DImagGZpPRc9uuhB3ZWjRl1Sc', 'KVMSbcaYmSV4qDO5GWlKFN', 31], 'WIUaxk5': 67.06807254286554, '8FT1Jf9': 49.6285126980962, 'sE3lfq7': 25, 'yFnJbepnW': 'ZGrTbqhGRcJIHIrAUuN51hb1bZkOFEdbUnQowTidJmbMl', 'I92XO2kd1': 35.53238457748152, 'aH2Ym': ['MH4TvzYEpvM13JGEsDx7GHN', 'eDTclGCHl'], '27ffwv': 50.12083507021728, 'xJf8O7O': 'sjL6nE', 'duHMswta': 59, 'ckstfUaID': [9, 29.434682903132636], 'ZsGA10O': 42.401275772208876, 'bJ5M0apBW2': 27.97489604329092, 'yqJ4x': [['Mx5Gs', [['4Gw8khUIylnbmTSqVzwrxPWx6Q0gyiirA7', ['gMGS0aqhnrajDUe3chlOHJtf3GsKLhMbCP19f747VA1tI8', '0NBuURurHcCtkDOYlE7urj9JOjRRv1JjPY8LPCakXPj']], 37.619551057445356]], 44, 'j3q2tqtZe', 'hOLDWKkHlB2Fc7QV2ZIkgcfs6LDXllF2mGLqylFOA8pr0'], 'W997v': 'pMWwsc53u4', 'QREvq

In [14]:
# test write performance
start_time = time.time()
for kwarg in kwargs:
    test_function_cached(**kwarg)
end_time = time.time()
print(f'Cached function writes took {end_time - start_time} seconds')

# test read performance + save outputs
outputs = []
start_time2 = time.time()
for kwarg in kwargs:
    outputs.append(test_function_cached(**kwarg))
end_time2 = time.time()
print(f'Cached function reads took {end_time2 - start_time2} seconds')


# test correctness
for i, kwarg in enumerate(kwargs):
    assert test_function(**kwarg) == outputs[i]




Cached function writes took 9.680100679397583 seconds
Cached function reads took 2.423262357711792 seconds


In [43]:
# threaded tests
import threading 

@cache_wrapper
def test_function_cached2(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    random.randint(0, 5)
    return hash(kwargs_)

def run_function_in_thread(kwargs):
    result = test_function_cached2(**kwargs)
    
start_time = time.time()
# thread_results = []
threads = []

# Create a thread for each call to test_function_cached
for kwarg in kwargs:
    thread = threading.Thread(target=run_function_in_thread, args=(kwarg,))
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()

end_time = time.time()

print(f'Cached function writes (threaded) took {end_time - start_time} seconds')

# test read performance + save outputs
outputs = []
start_time2 = time.time()
threads = []

# Create a thread for each call to test_function_cached
for kwarg in kwargs:
    thread = threading.Thread(target=run_function_in_thread, args=(kwarg,))
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()

end_time2 = time.time()
print(f'Cached function reads (threaded) took {end_time2 - start_time2} seconds')

Cached function writes (threaded) took 0.8201169967651367 seconds
Cached function reads (threaded) took 0.22392773628234863 seconds


In [47]:
# check correctness threaded
for kwarg in kwargs:
    ref = test_function(**kwarg)
    cached = test_function_cached(**kwarg)
    assert ref == cached

### SQLite cache integration tests


###### Case 1: 
- Example doesn't exists in the cache and the experiment end timestamp is not set to the future
- Expected:
    - Runs into an exception since it affects reproducibility. The example is missing in that timerange and should not be computed.

In [4]:
case1_cache_end_timerange = datetime.now().timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case1_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

Exception: Oops. Cache does not exist for the given experiment timerange of between 1969-12-31T16:00:00 and 2023-08-04T14:10:52.923220.

In [5]:
openai.api_key = "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"

###### Case 2: 
- Example doesn't exists in the cache and the experiment end timestamp is set to the future (by default)
- Expected:
    - The experiment is not trying to reproduce but create new reproducible results. Therefore, it is allowed to re-compute

In [6]:

with dsp.settings.context(lm=lm):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

2023-08-04 14:10:59,519 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and future. Computing!


2023-08-04 14:11:00,476 - DSP - DEBUG - returning record: cdafffd6-48e9-4f0f-ab89-dd3707d8b90a
{
  "id": "chatcmpl-7jwMV2AxoATFw1QgGCXYmiVCvMEqz",
  "object": "chat.completion",
  "created": 1691183459,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Synchronized swimming was considered a valid Olympic sport in the year 1984."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 26,
    "completion_tokens": 16,
    "total_tokens": 42
  }
}


###### Case 3: 
- Example already exists in the cache within the specified timerange
- Expected:
    - Returns the results of the example without re-computing

In [7]:
case3_cache_end_timerange = (datetime.now() + timedelta(days=2)).timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case3_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

2023-08-04 14:11:06,942 - DSP - DEBUG - Cached experiment result found between 1969-12-31T16:00:00 and 2023-08-06T14:11:06.922726. Retrieving result from cache.
2023-08-04 14:11:06,946 - DSP - DEBUG - returning record: cdafffd6-48e9-4f0f-ab89-dd3707d8b90a
{
  "id": "chatcmpl-7jwMV2AxoATFw1QgGCXYmiVCvMEqz",
  "object": "chat.completion",
  "created": 1691183459,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Synchronized swimming was considered a valid Olympic sport in the year 1984."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 26,
    "completion_tokens": 16,
    "total_tokens": 42
  }
}


###### Case 4: 
- There exists an example specified in the timerange but it failed for some reason.
- Expected:
    - Returns the exact error that occured, for the sake of reproducibility

In [8]:
# Let's intentionally make it fail
case4_cache_end_timerange = (datetime.now() + timedelta(seconds=3)).timestamp()
try:
    with dsp.settings.context(lm=lm, experiment_end_timestamp=case4_cache_end_timerange):
        answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:", dummy_kwargs="sample")
finally:
    time.sleep(3)

2023-08-04 14:11:10,804 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-04T14:11:13.775158. Computing!


InvalidRequestError: Unrecognized request argument supplied: dummy_kwargs

In [9]:
# Should fail with the same exception
case4_cache_end_timerange = datetime.now().timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case4_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:", dummy_kwargs="sample")

2023-08-04 14:11:21,346 - DSP - DEBUG - Cached experiment result found between 1969-12-31T16:00:00 and 2023-08-04T14:11:21.327657. Retrieving result from cache.
2023-08-04 14:11:21,349 - DSP - DEBUG - returning record: 9647b97e-135f-46d5-bef0-1a2f71498e37
2023-08-04 14:11:21,351 - DSP - DEBUG - Failed operation found in the cache for experiment timerange between 1969-12-31T16:00:00 and 2023-08-04T14:11:21.327657. Raising the same exception.


Exception: Traceback (most recent call last):

  File "/future/u/hmoazam/home/dsp/dsp/utils/cache.py", line 395, in wrapper
    result = func(*args, **kwargs)

  File "/future/u/hmoazam/home/dsp/dsp/modules/gpt3.py", line 190, in cached_gpt3_turbo_request
    return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))

  File "/future/u/hmoazam/miniconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)

  File "/future/u/hmoazam/miniconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(

  File "/future/u/hmoazam/miniconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

  File "/future/u/hmoazam/miniconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(

  File "/future/u/hmoazam/miniconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 763, in _interpret_response_line
    raise self.handle_error_response(

openai.error.InvalidRequestError: Unrecognized request argument supplied: dummy_kwargs


# concurrency test

In [10]:
import threading
case5_cache_end_timerange = (datetime.now() + timedelta(days=2)).timestamp()

    
def run_snippet():
    with dsp.settings.context(lm=lm, experiment_end_timestamp=case5_cache_end_timerange):
        answer = dsp.settings.lm.basic_request(prompt="Q: which war did Archimedes invent war machines for?\nA:")

threads = []
for _ in range(10):
    thread = threading.Thread(target=run_snippet)
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()


2023-08-04 14:12:00,873 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-06T14:11:27.312773. Computing!
2023-08-04 14:12:02,084 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-06T14:11:27.312773. Computing!
2023-08-04 14:12:12,916 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-06T14:11:27.312773. Computing!
2023-08-04 14:12:31,451 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-06T14:11:27.312773. Computing!
2023-08-04 14:12:31,460 - DSP - DEBUG - returning record: b5580f87-1bb3-43e9-8948-cacce55abe18
2023-08-04 14:12:31,464 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1969-12-31T16:00:00 and 2023-08-06T14:11:27.312773. C