# Run parallel calls to OpenAI with rate limit

In [1]:
import pandas as pd

df = pd.read_csv('data/Magazine_Subscriptions_5.csv')

In [2]:
df.head(1)

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,reviewerName,reviewText,summary,unixReviewTime,vote,style,image
0,4,True,"02 26, 2014",A5QQOOZJOVPSF,B00005N7P0,John L. Mehlmauer,"I'm old, and so is my computer. Any advice th...",Cheapskates guide,1393372800,,,


In [3]:
from phospho import lab

# Convert every valid row of the df into a lab.Message
messages = lab.Message.from_df(df, content="reviewText", created_at="unixReviewTime")
messages[:4]

[Message(id='0', created_at=1393372800, role=None, content="I'm old, and so is my computer.  Any advice that can help me maximize my computer perfomance is very welcome.  MaximumPC has some good tips on computer parts, vendors, and usefull tests", previous_messages=[], metadata={}),
 Message(id='1', created_at=1078531200, role=None, content="There's nothing to say, but if you want a REAL men's magazine, this is it. They have great articles and stories, and I love the bits of knowledge that starts the Know & Tell section, and I love the last page, which has an interview with a celebrity. Just get this magazine and forget all the other mature men's mags.", previous_messages=[], metadata={}),
 Message(id='2', created_at=1058227200, role=None, content="If you're the kind of man who looks at himself in a full length mirrror every morning, this is the magazine for you.\nIf you think the car you drive defines you, the clothes you wear are the measure of your self-worth, the watch on your wris

In [4]:
from dotenv import load_dotenv
import re

load_dotenv()



async def get_keywords(
    message: lab.Message,
    model: str="openai:gpt-3.5-turbo",
):
    """
    This function uses GPT-3 to extract keywords from a given message.
    """
    provider, model_name = lab.get_provider_and_model(model)
    openai_client = lab.get_async_client(provider)

    # Label every message with keywords it contains
    prompt = f"""You are an annotator reading Amazon product reviews. Your job is to label
    each review with keywords that describe the main topics covered in the review.
    The review is: 
    {message.content}

    Return a list of max 10 keywords as a bullet point list: `- keyword1\n- keyword2\n- keyword3`
    Keywords:"""

    response = await openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a business analyst, expert in e-commerce.",
            },
            {"role": "user", "content": prompt},
        ],
    )

    response = response.choices[0].message.content
    # Parse the response to extract the keywords with regex
    keywords = re.findall(r"- (.*)", response)
    return lab.JobResult(
        value=keywords,
        result_type=lab.ResultType.list,
    )


# Try the function one a single message
await get_keywords(messages[0])

JobResult(value=['computer performance', 'advice', 'MaximumPC', 'computer parts', 'vendors', 'useful tests'], result_type=<ResultType.list: 'list'>, logs=[], metadata={}, created_at=1710790553, job_id=None)

In [6]:
workload = lab.Workload(jobs=[lab.Job(job_function=get_keywords)])

# Run the workload on the messages
await workload.async_run(messages[:100], executor_type="parallel", max_parallelism=10)

No job_config provided. Running with empty config
100%|██████████| 100/100 [00:08<00:00, 11.35it/s]


{'0': {'get_keywords': JobResult(value=['computer performance', 'advice', 'MaximumPC', 'computer parts', 'vendors', 'useful tests'], result_type=<ResultType.list: 'list'>, logs=[], metadata={}, created_at=1710790564, job_id='get_keywords')},
 '1': {'get_keywords': JobResult(value=["men's magazine", 'articles', 'stories', 'Know & Tell', 'celebrity interview', "mature men's mags"], result_type=<ResultType.list: 'list'>, logs=[], metadata={}, created_at=1710790564, job_id='get_keywords')},
 '2': {'get_keywords': JobResult(value=['self-image', 'materialism', 'status symbols', 'relationships', 'magazine subscription'], result_type=<ResultType.list: 'list'>, logs=[], metadata={}, created_at=1710790564, job_id='get_keywords')},
 '3': {'get_keywords': JobResult(value=['advertisement', 'little content'], result_type=<ResultType.list: 'list'>, logs=[], metadata={}, created_at=1710790564, job_id='get_keywords')},
 '4': {'get_keywords': JobResult(value=['magazine content', 'computer technology', '

In [7]:
await workload.async_run_on_alternative_configurations(messages[:10])

Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.
Job get_keywords: No alternative configurations found. Skipping.


In [8]:
workload.optimize_jobs(accuracy_threshold=0.0)

Can't run Workload.optimize(): No alternative results found. Make sure you called Workload.async_run_on_alternative_configurations() first. Skipping.
