In [20]:
import sys
sys.path.insert(0,'..')
from llmdh import *
import math

In [3]:
LLM.generate_openai('Hello ')

'Hello! How can I assist you today?'

In [34]:
@classmethod
def generate_openai_logprobs(
    cls,
    user_prompt,
    *args,
    model=LLM_DEFAULT_MODEL_OPENAI,
    verbose=False,
    max_tokens=MAX_TOKENS,
    name=None,
    temp=1.0,
    **kwargs,
):
    res = []
    with logmap(f"prompting LLM model {model}", announce=verbose) as lm:
        prompt = cls.format_prompt(user_prompt, *args, model=model, **kwargs)
        # if verbose: lm.log(f'PROMPT: {prompt}')
        try:
            chat_completion = cls.openai_api().chat.completions.create(
                messages=prompt, 
                model=model, 
                max_tokens=max_tokens,
                logprobs=True,
                top_logprobs=5,
                temperature=temp,
            )
            choices = chat_completion.choices
            word_probs = choices[0].logprobs.content
            line_i=0
            for i,word_d in enumerate(word_probs):
                chosentoken = word_d.token
                line_i+=chosentoken.count('\n')
                for ii,prob_d in enumerate(word_d.top_logprobs):
                    odx={
                        'line_i':line_i+1,
                        'token_i':i+1,
                        'token_str':chosentoken, 
                        'choice_i':ii+1, 
                        'choice_str':prob_d.token,
                        'logprob':prob_d.logprob,
                        'prob':math.exp(prob_d.logprob) * 100
                    }
                    res.append(odx)
        except Exception as e:
            logger.error(e)
        return res
        

LLM.generate_openai_logprobs = generate_openai_logprobs

In [37]:
res = LLM.generate_openai_logprobs('Write a poem with exactly 4 lines.')

In [55]:
def generate_poem_logprobs(prompt='Write a poem.'):
    res = LLM.generate_openai_logprobs(prompt)
    df=pd.DataFrame(res)
    d,d_r={},{}
    for g,gdf in df[df.choice_i==1].groupby('line_i'):
        gdf=gdf.loc[[i for i,x in zip(gdf.index, gdf.token_str) if any(y.isalpha() for y in x.strip())]]
        for i,tok in enumerate(gdf.token_i):
            d[tok]=i+1
            d_r[tok]=-1 * (len(gdf)-d[tok] + 1)
    df['line_token_i']=[d.get(i,0) for i in df.token_i]
    df['line_token_i_r']=[d_r.get(i,0) for i in df.token_i]
    return df

In [56]:
df=generate_poem_logprobs()

In [58]:
df[df.line_token_i_r==-1]

Unnamed: 0,line_i,token_i,token_str,choice_i,choice_str,logprob,prob,line_token_i,line_token_i_r
30,1,7,night,1,night,-0.009195,99.084755,7,-1
31,1,7,night,2,morning,-5.287839,0.505267,7,-1
32,1,7,night,3,dawn,-6.574666,0.139527,7,-1
33,1,7,night,4,early,-6.961297,0.094787,7,-1
34,1,7,night,5,evening,-7.463861,0.057344,7,-1
...,...,...,...,...,...,...,...,...,...
795,24,160,more,1,more,-0.009937,99.011204,5,-1
796,24,160,more,2,more,-4.616478,0.988756,5,-1
797,24,160,more,3,",",-16.059160,0.000011,5,-1
798,24,160,more,4,-more,-16.320387,0.000008,5,-1


In [60]:
df[df.token_i==160].to_dict(orient='records')

[{'line_i': 24,
  'token_i': 160,
  'token_str': 'more',
  'choice_i': 1,
  'choice_str': 'more',
  'logprob': -0.009937173,
  'prob': 99.0112037564092,
  'line_token_i': 5,
  'line_token_i_r': -1},
 {'line_i': 24,
  'token_i': 160,
  'token_str': 'more',
  'choice_i': 2,
  'choice_str': ' more',
  'logprob': -4.616478,
  'prob': 0.9887558790145988,
  'line_token_i': 5,
  'line_token_i_r': -1},
 {'line_i': 24,
  'token_i': 160,
  'token_str': 'more',
  'choice_i': 3,
  'choice_str': ',',
  'logprob': -16.05916,
  'prob': 1.0607069830661624e-05,
  'line_token_i': 5,
  'line_token_i_r': -1},
 {'line_i': 24,
  'token_i': 160,
  'token_str': 'more',
  'choice_i': 4,
  'choice_str': '-more',
  'logprob': -16.320387,
  'prob': 8.16856902793902e-06,
  'line_token_i': 5,
  'line_token_i_r': -1},
 {'line_i': 24,
  'token_i': 160,
  'token_str': 'more',
  'choice_i': 5,
  'choice_str': '.',
  'logprob': -16.553637,
  'prob': 6.469143657969858e-06,
  'line_token_i': 5,
  'line_token_i_r': -1}]