In [1]:
cd ../

/future/u/hmoazam/home/dsp


In [16]:
root_path = '.'

import os
import logging
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(root_path, 'cache')
os.environ["DSP_CACHE_SQLITE_PATH"] = "cache.db"
os.environ["DSP_LOGGING_LEVEL"] = str(logging.INFO)

import dsp
import openai
from dsp.utils.cache import cache_wrapper

# openai_key = os.getenv('OPENAI_API_KEY')  # or replace with your API key
colbert_server = 'http://ec2-44-228-128-229.us-west-2.compute.amazonaws.com:8893/api/search'

lm = dsp.GPT3(model='gpt-3.5-turbo', model_type="chat")
rm = dsp.ColBERTv2(url=colbert_server)


In [17]:
from datetime import datetime, timedelta
import time
import random
import string


### SQLite cache unit tests

In [45]:
@cache_wrapper
def test_function_cached(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    return hash(kwargs_)

def test_function(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    return hash(kwargs_)

In [42]:
def generate_random_string(length):
    # Define the pool of characters to choose from
    characters = string.ascii_letters + string.digits  

    # Generate a random string of given length
    random_string = ''.join(random.choices(characters, k=length))
    return random_string

def generate_random_value():
    value_type = random.choice(['long_string', 'short_string', 'float', 'int', 'list'])

    if value_type == 'long_string':
        return generate_random_string(random.randint(20, 50))
    elif value_type == 'short_string':
        return generate_random_string(random.randint(5, 10))
    elif value_type == 'float':
        return random.uniform(0.0, 100.0)
    elif value_type == 'int':
        return random.randint(1, 100)
    elif value_type == 'list':
        list_length = random.randint(1, 5)
        return [generate_random_value() for _ in range(list_length)]

def generate_random_dict(num_items):
    random_dict = {}
    for _ in range(num_items):
        key = generate_random_string(random.randint(5, 10))
        value = generate_random_value()
        random_dict[key] = value
    return random_dict

kwargs = [generate_random_dict(random.randint(0, 30)) for _ in range(50)]

print(kwargs)

[{'BAAmffcR9': 'x7wwA60UN', 'PDHAaONd': [94, 4], 'OoPUDDxy0M': 'bMEas', '6fma5qY': '6n7NiEKvAGq6l8g44UrDDuvJQ7fBFdkWmP', 'pEDRV2': 5, 'wnCBe': 87.7625883183242, 'F8HyzhT': 12, 'gjdR6K': 5.4599287859469765, 'tg2Ea0w7h': [[80], 'pYHE891ajR', 'JpUOxKc4uoK0fZJ42ZX72vMaB7TlV9VV3lYAP2VYCSz'], 'iWecC': 35, '1RqSU0bz': 75, 'oVyXoxH': 73.1900464002794, 'i1Rm0B': ['5Dama0So0oTs8EwbeAXgBt'], 'Glx44MgfH': 'Jd1EAJA3GZxyYCo3pPE7WMntAYZ', 'gPwCY1HuZ': [75.0589930909783, 'm2qfgXayB2cagGdUaPQfpsw7BGbLLxeSV39sF29cPIZOJWBAN'], 'VgEPam': 23, 'dNcyS': '3Mmdr', 'Mk0dKt3AWm': 'n3Ben01D'}, {'3vPhWxVY3O': 'rcaOBjieJYxovJUMVGrNLw', '9gl9v': 39.27176496947131, 'Ndvjsn': 'RIIEAEEMBX027JGHzeUVOps6', 'SR77cPyE': ['QK6lfRu7520M8sKn5Ym6ipDRnwYbLejhvsa1', 70.87942663473808, 27], 'M4f3pZTz00': ['b9HozL5jCG', ['7FeHnV', 'CVsznYRX0', 'HVOQmbYo2HbV17vjV4FDNr']], '7Fp5gbQK': 80, 'F78QTj': [63.386665323049826, 'HCi0IfcJjSot24bWOT9L2RiwYUAGwm', 18], 'hooZ3sC': 'seveOaB', 'kLKmTdMy': 83, '84u0N': 'PrZakb', 'qig4LJpvT': 'Qw4Q5

In [14]:
# test write performance
start_time = time.time()
for kwarg in kwargs:
    test_function_cached(**kwarg)
end_time = time.time()
print(f'Cached function writes took {end_time - start_time} seconds')

# test read performance + save outputs
outputs = []
start_time2 = time.time()
for kwarg in kwargs:
    outputs.append(test_function_cached(**kwarg))
end_time2 = time.time()
print(f'Cached function reads took {end_time2 - start_time2} seconds')


# test correctness
for i, kwarg in enumerate(kwargs):
    assert test_function(**kwarg) == outputs[i]




Cached function writes took 9.680100679397583 seconds
Cached function reads took 2.423262357711792 seconds


In [43]:
# threaded tests
import threading 

@cache_wrapper
def test_function_cached2(**kwargs):
    kwargs_ = ','.join(f'{key}={value}' for key, value in kwargs.items())
    random.randint(0, 5)
    return hash(kwargs_)

def run_function_in_thread(kwargs):
    result = test_function_cached2(**kwargs)
    
start_time = time.time()
# thread_results = []
threads = []

# Create a thread for each call to test_function_cached
for kwarg in kwargs:
    thread = threading.Thread(target=run_function_in_thread, args=(kwarg,))
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()

end_time = time.time()

print(f'Cached function writes (threaded) took {end_time - start_time} seconds')

# test read performance + save outputs
outputs = []
start_time2 = time.time()
threads = []

# Create a thread for each call to test_function_cached
for kwarg in kwargs:
    thread = threading.Thread(target=run_function_in_thread, args=(kwarg,))
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()

end_time2 = time.time()
print(f'Cached function reads (threaded) took {end_time2 - start_time2} seconds')

Cached function writes (threaded) took 0.8201169967651367 seconds
Cached function reads (threaded) took 0.22392773628234863 seconds


In [47]:
# check correctness threaded
for kwarg in kwargs:
    ref = test_function(**kwarg)
    cached = test_function_cached(**kwarg)
    assert ref == cached

### SQLite cache integration tests


###### Case 1: 
- Example doesn't exists in the cache and the experiment end timestamp is not set to the future
- Expected:
    - Runs into an exception since it affects reproducibility. The example is missing in that timerange and should not be computed.

In [4]:
case1_cache_end_timerange = datetime.now().timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case1_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

Exception: Cache does not exist for the given experiment timerange of between 1970-01-01T05:30:00 and 2023-07-02T23:23:49.595134.

###### Case 2: 
- Example doesn't exists in the cache and the experiment end timestamp is set to the future (by default)
- Expected:
    - The experiment is not trying to reproduce but create new reproducible results. Therefore, it is allowed to re-compute

In [5]:

with dsp.settings.context(lm=lm):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

2023-07-02 23:23:55,545 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1970-01-01T05:30:00 and future. Computing!
2023-07-02 23:23:57,303 - DSP - DEBUG - returning record: cfce7175-a710-4de0-8a97-b742163fbd10
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Synchronized swimming was considered a valid Olympic sport in the year 1984.', 'role': 'assistant'}}], 'created': 1688320436, 'id': 'chatcmpl-7XvYiddyDMG9XhlW2J81IbXDRN6io', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 16, 'prompt_tokens': 26, 'total_tokens': 42}}


###### Case 3: 
- Example already exists in the cache within the specified timerange
- Expected:
    - Returns the results of the example without re-computing

In [6]:
case3_cache_end_timerange = (datetime.now() + timedelta(days=2)).timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case3_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:")
    print(answer)

2023-07-02 23:24:02,178 - DSP - DEBUG - Cached experiment result found between 1970-01-01T05:30:00 and 2023-07-04T23:24:02.177181. Retrieving result from cache.
2023-07-02 23:24:02,183 - DSP - DEBUG - returning record: cfce7175-a710-4de0-8a97-b742163fbd10
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Synchronized swimming was considered a valid Olympic sport in the year 1984.', 'role': 'assistant'}}], 'created': 1688320436, 'id': 'chatcmpl-7XvYiddyDMG9XhlW2J81IbXDRN6io', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 16, 'prompt_tokens': 26, 'total_tokens': 42}}


###### Case 4: 
- There exists an example specified in the timerange but it failed for some reason.
- Expected:
    - Returns the exact error that occured, for the sake of reproducibility

In [7]:
# Let's intentionally make it fail
case4_cache_end_timerange = (datetime.now() + timedelta(seconds=3)).timestamp()
try:
    with dsp.settings.context(lm=lm, experiment_end_timestamp=case4_cache_end_timerange):
        answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:", dummy_kwargs="sample")
finally:
    time.sleep(3)

2023-07-02 23:24:09,363 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1970-01-01T05:30:00 and 2023-07-02T23:24:12.357561. Computing!


InvalidRequestError: Unrecognized request argument supplied: dummy_kwargs

In [8]:
# Should fail with the same exception
case4_cache_end_timerange = datetime.now().timestamp()

with dsp.settings.context(lm=lm, experiment_end_timestamp=case4_cache_end_timerange):
    answer = dsp.settings.lm.basic_request(prompt="Q: At which year was synchronized swimming considered as a valid olympic sport?\nA:", dummy_kwargs="sample")

2023-07-02 23:24:18,425 - DSP - DEBUG - returning record: a0713e07-ad74-4ac8-a1a8-58a5b94b3860
2023-07-02 23:24:18,430 - DSP - DEBUG - Failed operation found in the cache for experiment timerange between 1970-01-01T05:30:00 and 2023-07-02T23:24:18.424656. Raising the same exception.


Exception: Traceback (most recent call last):

  File "/Users/sri/CS/Research/Stanford/research_2023/dsp/dsp/utils/cache.py", line 333, in wrapper
    result = func(*args, **kwargs)

  File "/Users/sri/CS/Research/Stanford/research_2023/dsp/dsp/modules/gpt3.py", line 190, in cached_gpt3_turbo_request
    return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))

  File "/Users/sri/.pyenv/versions/dsp/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)

  File "/Users/sri/.pyenv/versions/dsp/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(

  File "/Users/sri/.pyenv/versions/dsp/lib/python3.9/site-packages/openai/api_requestor.py", line 226, in request
    resp, got_stream = self._interpret_response(result, stream)

  File "/Users/sri/.pyenv/versions/dsp/lib/python3.9/site-packages/openai/api_requestor.py", line 619, in _interpret_response
    self._interpret_response_line(

  File "/Users/sri/.pyenv/versions/dsp/lib/python3.9/site-packages/openai/api_requestor.py", line 679, in _interpret_response_line
    raise self.handle_error_response(

openai.error.InvalidRequestError: Unrecognized request argument supplied: dummy_kwargs


# concurrency test

In [9]:
import threading
case5_cache_end_timerange = (datetime.now() + timedelta(days=2)).timestamp()

    
def run_snippet():
    with dsp.settings.context(lm=lm, experiment_end_timestamp=case5_cache_end_timerange):
        answer = dsp.settings.lm.basic_request(prompt="Q: which war did Archimedes invent war machines for?\nA:")

threads = []
for _ in range(10):
    thread = threading.Thread(target=run_snippet)
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()


2023-07-02 23:24:39,663 - DSP - DEBUG - Could not find a succesful experiment result in the cache for timerange between 1970-01-01T05:30:00 and 2023-07-04T23:24:39.655138. Computing!
2023-07-02 23:24:39,666 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,667 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,670 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,676 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,680 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,680 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,682 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,683 - DSP - DEBUG - Operation is pending in the cache. Polling for result.
2023-07-02 23:24:39,684 - DSP - DEBUG - Operation is pend