In [1]:
%cd /code

/code


In [20]:
from pydantic import BaseModel
from pydantic import ValidationError


class AppConfig(BaseModel):
    debug: bool = False
    max_users: int = 100
    db_url: str

config_data = {
    "debug": True,
    "db_url": "postgresql://localhost/mydb",
}

app_config = AppConfig(**config_data)

# Validate the config data
print(app_config.debug)
print(app_config.max_users)
print(app_config.db_url)

# should convert to False
app_config = AppConfig(debug='False', db_url="postgresql://localhost/mydb",)
assert not app_config.debug

# should fail because we cannot convert 'asdf' to bool
import pytest
with pytest.raises(ValidationError) as exc_info:
    app_config = AppConfig(debug='asdf', db_url="postgresql://localhost/mydb")

True
100
postgresql://localhost/mydb


In [22]:
from dataclasses import dataclass

@dataclass
class AppConfig():
    db_url: str
    debug: bool = False
    max_users: int = 100

# this works, whereas pydantic fails because it's an unexpected value
app_config = AppConfig(debug='asdf', db_url="postgresql://localhost/mydb")


---

In [6]:
from source.service.openai import OpenAI, InstructModels
import source.config.config as config
# This is needed because openai.text_completion calls asynchronous functions but 
# Jupyter is already running its own event loop.
import nest_asyncio
nest_asyncio.apply()


oai = OpenAI(config.OPENAI_TOKEN)
repsonse = oai.text_completion(
    model=InstructModels.BABBAGE,
    max_tokens=400,
    prompts=["Describe the color blue."],
    logprobs=5
)
repsonse

OpenAIResponses(responses=[OpenAIResponse(response_status=200, response_reason='OK', result=OpenAIInstructResult(result={'id': 'cmpl-7A25FXphA0JVzTZAj6mDtxY3xa1IK', 'object': 'text_completion', 'created': 1682625645, 'model': 'text-babbage-001', 'choices': [{'text': '\n\nBlue is a color that is typically seen in the sky. It is a color that is typically seen in the sky because it is the color of the ocean.', 'index': 0, 'logprobs': {'tokens': ['\n', '\n', 'Blue', ' is', ' a', ' color', ' that', ' is', ' typically', ' seen', ' in', ' the', ' sky', '.', ' It', ' is', ' a', ' color', ' that', ' is', ' typically', ' seen', ' in', ' the', ' sky', ' because', ' it', ' is', ' the', ' color', ' of', ' the', ' ocean', '.'], 'token_logprobs': [-0.007235635, -0.00019607085, -0.4514989, -0.010052814, -0.15963839, -0.093085915, -0.70885044, -0.26145902, -0.9423441, -0.770603, -0.12273231, -0.0056997957, -0.010831532, -0.75065094, -0.32411826, -0.093435384, -0.40164256, -0.9365042, -0.032318383, -0.1

In [10]:
repsonse[0].result.result


{'id': 'cmpl-7A25FXphA0JVzTZAj6mDtxY3xa1IK',
 'object': 'text_completion',
 'created': 1682625645,
 'model': 'text-babbage-001',
 'choices': [{'text': '\n\nBlue is a color that is typically seen in the sky. It is a color that is typically seen in the sky because it is the color of the ocean.',
   'index': 0,
   'logprobs': {'tokens': ['\n',
     '\n',
     'Blue',
     ' is',
     ' a',
     ' color',
     ' that',
     ' is',
     ' typically',
     ' seen',
     ' in',
     ' the',
     ' sky',
     '.',
     ' It',
     ' is',
     ' a',
     ' color',
     ' that',
     ' is',
     ' typically',
     ' seen',
     ' in',
     ' the',
     ' sky',
     ' because',
     ' it',
     ' is',
     ' the',
     ' color',
     ' of',
     ' the',
     ' ocean',
     '.'],
    'token_logprobs': [-0.007235635,
     -0.00019607085,
     -0.4514989,
     -0.010052814,
     -0.15963839,
     -0.093085915,
     -0.70885044,
     -0.26145902,
     -0.9423441,
     -0.770603,
     -0.12273231,
   

In [21]:
log_probs = repsonse[0].result.result['choices'][0]['logprobs']
log_probs

{'tokens': ['\n',
  '\n',
  'Blue',
  ' is',
  ' a',
  ' color',
  ' that',
  ' is',
  ' typically',
  ' seen',
  ' in',
  ' the',
  ' sky',
  '.',
  ' It',
  ' is',
  ' a',
  ' color',
  ' that',
  ' is',
  ' typically',
  ' seen',
  ' in',
  ' the',
  ' sky',
  ' because',
  ' it',
  ' is',
  ' the',
  ' color',
  ' of',
  ' the',
  ' ocean',
  '.'],
 'token_logprobs': [-0.007235635,
  -0.00019607085,
  -0.4514989,
  -0.010052814,
  -0.15963839,
  -0.093085915,
  -0.70885044,
  -0.26145902,
  -0.9423441,
  -0.770603,
  -0.12273231,
  -0.0056997957,
  -0.010831532,
  -0.75065094,
  -0.32411826,
  -0.093435384,
  -0.40164256,
  -0.9365042,
  -0.032318383,
  -0.14012349,
  -0.66834843,
  -0.34360558,
  -0.66507566,
  -0.07923324,
  -2.6275558,
  -0.83135515,
  -0.027726602,
  -0.112598486,
  -0.700335,
  -0.34151262,
  -0.019613776,
  -0.19097842,
  -1.4488124,
  -0.13821945],
 'top_logprobs': [{'\n': -0.007235635,
   'A': -11.328336,
   ' Blue': -8.82872,
   'The': -11.672349,
   '\n\n

In [24]:
log_probs.keys()

dict_keys(['tokens', 'token_logprobs', 'top_logprobs', 'text_offset'])

In [23]:
''.join(log_probs['tokens'])

'\n\nBlue is a color that is typically seen in the sky. It is a color that is typically seen in the sky because it is the color of the ocean.'

In [42]:
import pandas as pd
import numpy as np

pd.DataFrame({
        'tokens': log_probs['tokens'],
        'log_probs': log_probs['token_logprobs'],
    }).\
    assign(prob = lambda x: np.exp(x['log_probs'])).\
    drop(columns='log_probs').\
    iloc[2:].\
    style.\
    bar(color='lightgray').\
    hide(axis='index')

tokens,prob
Blue,0.636673
is,0.989998
a,0.852452
color,0.911115
that,0.49221
is,0.769927
typically,0.389713
seen,0.462734
in,0.8845
the,0.994316


In [52]:
sorted(log_probs['top_logprobs'][3].items(), key=lambda x: -x[1])

[(' is', -0.010052814),
 (' typically', -5.672439),
 (' can', -6.2759027),
 (' usually', -6.7093377),
 (' has', -6.880922)]

In [25]:
log_probs['text_offset']

[24,
 25,
 26,
 30,
 33,
 35,
 41,
 46,
 49,
 59,
 64,
 67,
 71,
 75,
 76,
 79,
 82,
 84,
 90,
 95,
 98,
 108,
 113,
 116,
 120,
 124,
 132,
 135,
 138,
 142,
 148,
 151,
 155,
 161]

---