In [None]:
!python -m pip install "cappr[openai]"

## Utilities

copied over from https://github.com/kddubey/cappr/blob/main/demos/utils.py so that this
notebook can be run anywhere

In [2]:
from __future__ import annotations
from typing import Optional, Union

from IPython.display import display
import pandas as pd


def display_df(
    df: pd.DataFrame,
    columns: Optional[list[str]] = None,
    num_rows: Union[int, None] = 3,
):
    """
    Displays `df.head(num_rows)[columns]` without truncating columns. If
    possible, render any newlines.
    """
    if columns is None:
        columns = df.columns
    if num_rows is None:
        num_rows = len(df)
    df_head_styled = df.head(num_rows)[columns].style
    with pd.option_context("max_colwidth", -1):
        ## I'm not sure why try-except doesn't work w/ display(), so instead
        ## check the necessary uniqueness condition before running it
        if df.index.is_unique:
            display(
                df_head_styled.set_properties(
                    **{"text-align": "left", "white-space": "pre-wrap"}
                )
            )
        else:
            ## `Styler.apply` and `.applymap` are not compatible with non-unique
            ## index or columns
            display(df_head_styled)


def remove_prefix(string: str, prefix: str) -> str:
    if string.startswith(prefix):
        return string[len(prefix) :]
    return string

## Few-shot prompt

In [3]:
import pandas as pd

from cappr import openai

In [4]:
df = pd.read_csv("https://s.cleanlab.ai/stanford-politeness/fine-tuning/test.csv")
df = df.rename(columns={'prompt': 'text', 'completion': 'class'})
display_df(df)

Unnamed: 0,text,class
0,Excuse me? What are you accusing me of doing?,impolite
1,I don't understand Tasc0??,impolite
2,"Well, you leaved me also curious. Why was it not appropriate?",impolite


In [5]:
classes = df['class'].sort_values().unique().tolist()
classes

['impolite', 'neutral', 'polite']

In [6]:
len(df)

480

Just use the training data to manually select one example per class. In this case I
figured that the examples below seem good enough. Maybe there should be a package which
auto-selects the examples via cross-validation.

In [7]:
_train = pd.read_csv("https://s.cleanlab.ai/stanford-politeness/fine-tuning/train.csv")

In [8]:
examples_df = (_train
               .groupby('completion')
               .sample(1, random_state=42))
display_df(examples_df, num_rows=None)

Unnamed: 0,prompt,completion
1144,"I didn't see your internal link, I put it back. isn't orphaned, what are you talking about?",impolite
1625,"One to go. Shouldn't ""now welcome"" be ""not welcome""?",neutral
204,"Greetings, and thank you for your cogent remarks at . Would you be interested in voting in the straw poll as well?",polite


In [9]:
examples_str = '\n\n'.join([f"Text: {record['prompt']}\nTone: {record['completion']}"
                            for record in examples_df.to_dict('records')])
print(examples_str)

Text: I didn't see your internal link, I put it back.  <url> isn't orphaned, what are you talking about?
Tone: impolite

Text: One to go. Shouldn't "now welcome" be "not welcome"?
Tone: neutral

Text: Greetings, and thank you for your cogent remarks at <url>.  Would you be interested in voting in the straw poll <url> as well?
Tone: polite


In [10]:
def prompt_mc(text: str):
    return ( 'Here are examples of texts and their tones.\n\n'
            f'{examples_str}\n\n'
            f'The tone of this piece of text:\n'
            f'"{text}"\n'
             'is\n'
             'A. impolite\n'
             'B. neutral\n'
             'C. polite\n\n'
             'Answer A or B or C.')

In [11]:
df['prompt_mc'] = [prompt_mc(text) for text in df['text']]
display_df(df, columns=['prompt_mc', 'class'], num_rows=1)

Unnamed: 0,prompt_mc,class
0,"Here are examples of texts and their tones. Text: I didn't see your internal link, I put it back. isn't orphaned, what are you talking about? Tone: impolite Text: One to go. Shouldn't ""now welcome"" be ""not welcome""? Tone: neutral Text: Greetings, and thank you for your cogent remarks at . Would you be interested in voting in the straw poll as well? Tone: polite The tone of this piece of text: ""Excuse me? What are you accusing me of doing?"" is A. impolite B. neutral C. polite Answer A or B or C.",impolite


In [12]:
## $0.17
choices_chat = (openai.api
                .gpt_chat_complete(df['prompt_mc'],
                                   ask_if_ok=True,
                                   max_tokens=5))

Completing chats:   0%|          | 0/480 [00:00<?, ?it/s]

In [13]:
def process_completion(completion: str, class_chars,
                       prefix_remove: str='Answer ', strip_chars: str=' \n.',
                       default=-1) -> int:
    if any(len(class_char) != 1 for class_char in class_chars):
        raise ValueError('Elements of class_chars must be a single character.')
    completion = remove_prefix(completion, prefix_remove)
    completion_stripped = completion.strip(strip_chars)
    if not completion_stripped:
        return default
    completion_char_lower = completion_stripped[0].lower()
    class_chars_lower = [class_char.lower() for class_char in class_chars]
    try:
        return class_chars_lower.index(completion_char_lower)
    except ValueError:
        return default

In [14]:
completions_chat = pd.Series([choice['message']['content']
                              for choice in choices_chat],
                             index=df.index)

In [15]:
pred_classes_chat = pd.Series([classes[process_completion(completion, ('A', 'B', 'C'),
                                                          default=1)]
                               for completion in completions_chat],
                              index=df.index)

In [16]:
(pred_classes_chat == df['class']).mean()

0.7020833333333333

In [17]:
## rough dollar cost per classification
0.17/len(df) * 100

0.035416666666666666

In [18]:
# def prompt(text: str) -> str:
#     return (f'{examples_str}\n\n'
#             f'Text: {text}\n'
#              'Tone:')

# df['prompt'] = [prompt(text) for text in df['text']]
# display_df(df, columns=['prompt', 'class'], num_rows=1)

# ## $4.2
# preds = (openai.classify
#          .predict(prompts=df['prompt'].tolist(),
#                   completions=classes,
#                   model='text-davinci-003',
#                   ask_if_ok=True))
# (pd.Series(preds, index=df.index) == df['class']).mean()
# ## 0.70625