In [1]:
%run "/code/source/notebooks/notebook_settings.py"
from source.library.openai import OpenAI, InstructModels, EmbeddingModels, cost
from source.service.datasets import DATA
# This is needed because openai.text_completion calls asynchronous functions but 
# Jupyter is already running its own event loop.
import nest_asyncio
nest_asyncio.apply()

with open('/.openai_api_key', 'r') as handle:
    oai = OpenAI(api_key=handle.read().strip())

reddit = DATA.reddit.load()

2023-04-12 01:57:20 - INFO     | Loading data `reddit` from `data/processed/reddit.pkl`


In [2]:
reddit.head()

Unnamed: 0,id,subreddit,title,post,text
0,6y0l40,Volvo,2012 c30 spoiler holding water,Lately my car has been a magnet for for dings and other mishaps while parked in my reserved spot and have been parking elsewhere which is unfortunately under a tree. I've noticed after a night of ...,2012 c30 spoiler holding water - Lately my car has been a magnet for for dings and other mishaps while parked in my reserved spot and have been parking elsewhere which is unfortunately under a tre...
1,7w66o5,Volvo,"About to purchase 2014 V60, anything I should be aware of?","My 2004 V50 is slowly but surely giving up, it feels like I replaced half of the replaceable parts and got the other half repaired within the last year and I think it's time to let him go, especia...","About to purchase 2014 V60, anything I should be aware of? - My 2004 V50 is slowly but surely giving up, it feels like I replaced half of the replaceable parts and got the other half repaired with..."
2,6ujf4y,Volvo,Experiencing the new XC60,"I test drove the XC60 T8 yesterday and Oh. My. God is it quick. I was obviously aware of the performance numbers (bhp, 0-60 times etc), but it's a completely different experience trying it out for...","Experiencing the new XC60 - I test drove the XC60 T8 yesterday and Oh. My. God is it quick. I was obviously aware of the performance numbers (bhp, 0-60 times etc), but it's a completely different ..."
3,70pr0e,Volvo,Pilot Assist 2 upgrade 2016 xc90,I am looking to pick up a used 2016 and am really interested in the Pilot Assist 2 retro fit upgrade that can be added.<lb><lb>What do i need to look for to make sure the 2016 model i get can be r...,Pilot Assist 2 upgrade 2016 xc90 - I am looking to pick up a used 2016 and am really interested in the Pilot Assist 2 retro fit upgrade that can be added.<lb><lb>What do i need to look for to make...
4,4uqr9q,Volvo,2013 XC60 Negotiation with Volvo dealership,I am in the middle of a negotiation on the price of a 2013 XC60. We are off by 1k. I am debating about throwing out a request that he upgrades the software in my 2006 2.5 xc90 AWD with the polesta...,2013 XC60 Negotiation with Volvo dealership - I am in the middle of a negotiation on the price of a 2013 XC60. We are off by 1k. I am debating about throwing out a request that he upgrades the sof...


In [3]:
reddit.iloc[0]['text']

"2012 c30 spoiler holding water - Lately my car has been a magnet for for dings and other mishaps while parked in my reserved spot and have been parking elsewhere which is unfortunately under a tree. I've noticed after a night of rain when I leave the next day water runs down the back window when I accelerate. Opening the back glass before driving usually means water runs down the sides but I cannot seem to find where it's pooling up. <lb><lb>Anyone know if there's drains in the back hatch that might be clogged? Or is this normal?"

In [4]:
# cost to send all reddit posts e.g. as prompts... non-sensical but just getting an idea
print(f"{cost(reddit.iloc[0]['text'], InstructModels.BABBAGE) * len(reddit)}")

0.0585


In [5]:
def generate_prompt(post: str) -> str:
    template = f"""
    Summarize this reddit post:

    ```
    {post}
    ```
    """
    return template

posts = reddit['text'].iloc[0:10]
prompts = [generate_prompt(x) for x in posts]
responses = oai.text_completion(model=InstructModels.BABBAGE, prompts=prompts, max_tokens=200)
responses

OpenAIResponses(responses=[OpenAIResponse(response_status=200, response_reason='OK', result=OpenAIInstructResult(result={'id': 'cmpl-74K1ZfnXahxgWRAwdsCGP7Rwzpecy', 'object': 'text_completion', 'created': 1681264641, 'model': 'text-babbage-001', 'choices': [{'text': " A c30 spoiler holding water is a common problem on a car parked in a reserved spot. The water can run down the back window if you open the back glass before driving. Sometimes the water pooled up in the back hatch and it's been a problem for people.", 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 149, 'completion_tokens': 54, 'total_tokens': 203}})), OpenAIResponse(response_status=200, response_reason='OK', result=OpenAIInstructResult(result={'id': 'cmpl-74K1ZQ0ZkNZoy9yXFQCTypVk2PD5W', 'object': 'text_completion', 'created': 1681264641, 'model': 'text-babbage-001', 'choices': [{'text': ' A V60 2014 model is being tested for a potential sale tomorrow. The owner is not sure if it is pos

In [6]:
assert not responses.any_errors
assert not responses.any_missing_data

In [7]:
print(f"${responses.total_cost:.4f}")

$0.0014


In [8]:
print(responses[0].result.usage_prompt_tokens)
print(responses[0].result.usage_completion_tokens)

149
54


In [9]:
for response, post in zip(responses, posts, strict=True):
    print(f"\n\n---------\nTotal Cost: ${response.result.cost_total:.5f}")
    print(
        f"Total Tokens: {response.result.usage_total_tokens}; "
        f"Prompt Tokens: {response.result.usage_prompt_tokens}; "
        f"Completion Tokens: {response.result.usage_completion_tokens}"
    )
    print(f"Post: `{post}`")
    print(f"Summary: `{response.result.reply}`")



---------
Total Cost: $0.00010
Total Tokens: 203; Prompt Tokens: 149; Completion Tokens: 54
Post: `2012 c30 spoiler holding water - Lately my car has been a magnet for for dings and other mishaps while parked in my reserved spot and have been parking elsewhere which is unfortunately under a tree. I've noticed after a night of rain when I leave the next day water runs down the back window when I accelerate. Opening the back glass before driving usually means water runs down the sides but I cannot seem to find where it's pooling up. <lb><lb>Anyone know if there's drains in the back hatch that might be clogged? Or is this normal?`
Summary: `A c30 spoiler holding water is a common problem on a car parked in a reserved spot. The water can run down the back window if you open the back glass before driving. Sometimes the water pooled up in the back hatch and it's been a problem for people.`


---------
Total Cost: $0.00020
Total Tokens: 394; Prompt Tokens: 306; Completion Tokens: 88
Post: `

---

In [21]:
prompt_costs = reddit['text'].\
    apply(lambda x: cost(generate_prompt(x), model=InstructModels.BABBAGE))\
    .sum()
print(f"Cost for prompts with `babbage`: ${prompt_costs:.4f}")

prompt_costs = reddit['text'].\
    apply(lambda x: cost(generate_prompt(x), model=InstructModels.DAVINCI))\
    .sum()
print(f"Cost for prompts with `davinci`: ${prompt_costs:.4f}")

Cost for prompts with `babbage`: $0.1087
Cost for prompts with `davinci`: $4.1225


In [25]:
# total cost to create embeddings for entire dataset
print(f"Cost for responses (100 tokens) with `babbage`: ${cost(100 * 1000, model=InstructModels.BABBAGE):.4f}")
print(f"Cost for responses (100 tokens) with `davinci`: ${cost(100 * 1000, model=InstructModels.DAVINCI):.4f}")

Cost for responses (100 tokens) with `babbage`: $0.0500
Cost for responses (100 tokens) with `davinci`: $2.0000


---

# Embeddings

In [28]:
# total cost to create embeddings for entire dataset
total_embeddings_cost = reddit['text'].\
    apply(lambda x: cost(x, model=EmbeddingModels.ADA)).\
    sum()
print(f"Total cost to generate embeddings: ${total_embeddings_cost:.4f}")

Total cost to generate embeddings: $0.0735


In [11]:
posts = reddit['text'].iloc[0:10].tolist()
responses = oai.text_embeddings(model=EmbeddingModels.ADA, inputs=posts)
responses

OpenAIResponses(responses=[OpenAIResponse(response_status=200, response_reason='OK', result=OpenAIEmbeddingResult(result={'object': 'list', 'data': [{'object': 'embedding', 'index': 0, 'embedding': [-0.012604503, 0.00089806254, 0.017458536, -0.017578388, -0.026380898, 0.013889589, -0.0121916775, -0.022186056, -0.017591706, 0.0044312184, 0.002948042, 0.02686031, -0.007956884, 0.022891855, 0.0097946245, -0.0037620412, 0.006924819, -0.0044112434, 0.015234603, -0.034544196, 0.0029963162, 0.006561932, 0.005613099, 0.0028664758, 0.008356392, -0.008782536, 0.02054807, -0.024196917, 0.015793916, -0.008609415, 0.01275099, 0.012917452, -0.047594815, -0.011219539, -0.004254769, -0.009042216, 0.0082631735, -0.026687188, 0.017365316, -0.009281921, 0.013223742, 0.0050804205, -0.0044678403, -0.02799225, -0.0130040115, 0.017458536, -0.029590286, 0.017937945, 0.0062756175, 0.016766053, 0.023238096, -0.0034790558, -0.014222514, -0.019615883, -0.0033092645, 0.008163297, -0.008549488, 0.015074799, 0.01423

In [12]:
for response, post in zip(responses, posts, strict=True):
    print(f"\n\n---------\nTotal Cost: ${response.result.cost_total:.5f}")
    print(
        f"Total Tokens: {response.result.usage_total_tokens}; "
    )
    print(f"Post: `{post}`")
    print(f"Embedding Len: `{len(response.result.embedding)}`")



---------
Total Cost: $0.00005
Total Tokens: 117; 
Post: `2012 c30 spoiler holding water - Lately my car has been a magnet for for dings and other mishaps while parked in my reserved spot and have been parking elsewhere which is unfortunately under a tree. I've noticed after a night of rain when I leave the next day water runs down the back window when I accelerate. Opening the back glass before driving usually means water runs down the sides but I cannot seem to find where it's pooling up. <lb><lb>Anyone know if there's drains in the back hatch that might be clogged? Or is this normal?`
Embedding Len: `1536`


---------
Total Cost: $0.00011
Total Tokens: 280; 
Post: `About to purchase 2014 V60, anything I should be aware of? - My 2004 V50 is slowly but surely giving up, it feels like I replaced half of the replaceable parts and got the other half repaired within the last year and I think it's time to let him go, especially after the last week's ignition failure that's going to cost 

---