In [1]:
from __future__ import annotations
import logging
import os
import sys

import datasets as nlp_datasets
import pandas as pd
from sklearn.metrics import f1_score

from cappr import openai
sys.path.insert(1, os.path.join(sys.path[0], "..", ".."))
from utils import display_df

In [2]:
## When hitting the OpenAI endpoints, we'll log any server errors
logging.basicConfig(level=logging.INFO,
                    handlers=[logging.StreamHandler(stream=sys.stdout)],
                    format='%(asctime)s :: %(name)s :: %(levelname)s :: '
                           '%(message)s')
logger = logging.getLogger(__name__)

In [3]:
df = pd.DataFrame(nlp_datasets
                  .load_dataset('ought/raft', 'terms_of_service', split='train'))

Downloading and preparing dataset raft/terms_of_service (download: 9.30 MiB, generated: 949.98 KiB, post-processed: Unknown size, total: 10.23 MiB) to C:/Users/kushd/.cache/huggingface/datasets/ought___raft/terms_of_service/1.1.0/79c4de1312c1e3730043f7db07179c914f48403101f7124e2fe336f6f54d9f84...


Downloading data files:   0%|          | 0/11 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/11 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/50 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5000 [00:00<?, ? examples/s]

Dataset raft downloaded and prepared to C:/Users/kushd/.cache/huggingface/datasets/ought___raft/terms_of_service/1.1.0/79c4de1312c1e3730043f7db07179c914f48403101f7124e2fe336f6f54d9f84. Subsequent calls will reuse this data.


In [4]:
len(df)

50

In [5]:
df.head()

Unnamed: 0,Sentence,ID,Label
0,"Crowdtangle may change these terms of service,...",0,2
1,You acknowledge that any reliance upon any suc...,1,1
2,Because the law may or may not recognize certa...,2,1
3,These requirements can be found on the website...,3,1
4,Fitbit will not seek its attorneys' fees and c...,4,1


In [9]:
def prompt(sentence: str) -> str:
    return ( 'Label the sentence from a Terms of Service based on whether it is '
             'potentially unfair. If it seems clearly unfair, mark it as potentially '
             'unfair.\n'
             'Mark it as unfair if: 1) it has not been individually negotiated; and 2) '
             'contrary to the requirement of good faith, it causes a significant '
             'imbalance in the parties rights and obligations, to the detriment of the '
             'consumer.\n\n'
            f'Sentence from a Terms of Service: {sentence}\n\n'
             'Is this sentence potentiall unfair? Answer Yes or No:')

In [10]:
df['prompt'] = [prompt(sentence) for sentence in df['Sentence']]

In [11]:
display_df(df, columns=['prompt', 'Label'], num_rows=1)

Unnamed: 0,prompt,Label
0,"Label the sentence from a Terms of Service based on whether it is potentially unfair. If it seems clearly unfair, mark it as potentially unfair. Mark it as unfair if: 1) it has not been individually negotiated; and 2) contrary to the requirement of good faith, it causes a significant imbalance in the parties rights and obligations, to the detriment of the consumer. Sentence from a Terms of Service: Crowdtangle may change these terms of service, as described above, notwithstanding any provision to the contrary in any agreement between you and crowdtangle. Is this sentence potentiall unfair? Answer Yes or No:",2


In [12]:
prior = (df['Label']
         .value_counts(normalize=True)
         .sort_index()
         .to_numpy())
prior

array([0.82, 0.18])

In [13]:
## $0.28
pred_probs = (openai.classify
              .predict_proba(df['prompt'].tolist(),
                             completions=('No', 'Yes'),
                             model='text-davinci-003',
                             prior=prior,
                             ask_if_ok=True))

log-probs:   0%|          | 0/100 [00:00<?, ?it/s]

In [14]:
f1_score(df['Label']-1, pred_probs.argmax(axis=1), average='macro')

0.75