In [1]:
import openai

from fp_dataset_artifacts.utils import init_openai
from fp_dataset_artifacts.anli import map_finetune, get_response
from datasets import list_datasets, load_dataset, list_metrics, load_metric, concatenate_datasets

init_openai()

data = load_dataset('anli')
data

Reusing dataset anli (/home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b)


  0%|          | 0/9 [00:00<?, ?it/s]

DatasetDict({
    train_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 16946
    })
    dev_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    test_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    train_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 45460
    })
    dev_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    test_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    train_r3: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 100459
    })
    dev_r3: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 12

In [2]:
test_r1 = data['test_r1'].map(map_finetune)
test_r1

Loading cached processed dataset at /home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b/cache-2b8a4bc9533ab61a.arrow


Dataset({
    features: ['uid', 'premise', 'hypothesis', 'label', 'reason', 'prompt', 'completion'],
    num_rows: 1000
})

In [3]:
model = 'curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14'

def map_response(x):
    try:
        response = get_response(x['prompt'], model)
        return {
            'response': response['choices'][0]['text']
        }
    except e:
        print(e)
        return {
            'response': None
        }

In [4]:
map_response(test_r1[0])

{'response': 'Entailment'}

In [5]:
test = test_r1 = test_r1.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])
test

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 1000
})

In [6]:
test.map(map_response)

  0%|          | 0/1000 [00:00<?, ?ex/s]

Dataset({
    features: ['prompt', 'completion', 'response'],
    num_rows: 1000
})

In [7]:
test.to_json('../results/anli_responses.jsonl')

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

472963

In [8]:
test.to_pandas()

Unnamed: 0,prompt,completion
0,Premise: Ernest Jones is a British jeweller an...,Entailment\n
1,Premise: Old Trafford is a football stadium in...,Entailment\n
2,Premise: Magnus is a Belgian joint dance proje...,Entailment\n
3,Premise: Shadowboxer is a 2005 crime thriller ...,Neutral\n
4,"Premise: Takaaki Kajita (梶田 隆章 , Kajita Takaak...",Contradiction\n
...,...,...
995,Premise: L'Histoire du soldat (The Soldier's T...,Entailment\n
996,Premise: Gianluca Cologna (born 17 May 1990) i...,Neutral\n
997,Premise: Fido Dido is a cartoon character crea...,Entailment\n
998,Premise: Aspects of Andrew Lloyd Webber is the...,Neutral\n


In [13]:
responses = vars()['_oh'][6]
responses

Dataset({
    features: ['prompt', 'completion', 'response'],
    num_rows: 1000
})

In [15]:
responses.to_json('../results/anli_responses.jsonl')

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

496780

In [16]:
df = responses.to_pandas()
df

Unnamed: 0,prompt,completion,response
0,Premise: Ernest Jones is a British jeweller an...,Entailment\n,Entailment
1,Premise: Old Trafford is a football stadium in...,Entailment\n,Entailment
2,Premise: Magnus is a Belgian joint dance proje...,Entailment\n,Entailment
3,Premise: Shadowboxer is a 2005 crime thriller ...,Neutral\n,Entailment
4,"Premise: Takaaki Kajita (梶田 隆章 , Kajita Takaak...",Contradiction\n,Contradiction
...,...,...,...
995,Premise: L'Histoire du soldat (The Soldier's T...,Entailment\n,Contradiction
996,Premise: Gianluca Cologna (born 17 May 1990) i...,Neutral\n,Neutral
997,Premise: Fido Dido is a cartoon character crea...,Entailment\n,Entailment
998,Premise: Aspects of Andrew Lloyd Webber is the...,Neutral\n,Entailment


In [22]:
df['result'] = df.completion.str.strip() == df.response
df['result']

0       True
1       True
2       True
3      False
4       True
       ...  
995    False
996     True
997     True
998    False
999    False
Name: result, Length: 1000, dtype: bool

In [23]:
df.mean()

  df.mean()


result    0.566
dtype: float64

In [25]:
from datasets import load_metric

metric = load_metric('f1')

In [38]:
def label2int(x):
    return {
        'Entailment': 0,
        'Neutral': 1,
        'Contradiction': 2
    }[x]
    
df['references'] = df.completion.str.strip().map(label2int)
df['predictions'] = df.response.map(label2int)

In [39]:
df

Unnamed: 0,prompt,completion,response,result,references,predictions
0,Premise: Ernest Jones is a British jeweller an...,Entailment\n,Entailment,True,0,0
1,Premise: Old Trafford is a football stadium in...,Entailment\n,Entailment,True,0,0
2,Premise: Magnus is a Belgian joint dance proje...,Entailment\n,Entailment,True,0,0
3,Premise: Shadowboxer is a 2005 crime thriller ...,Neutral\n,Entailment,False,1,0
4,"Premise: Takaaki Kajita (梶田 隆章 , Kajita Takaak...",Contradiction\n,Contradiction,True,2,2
...,...,...,...,...,...,...
995,Premise: L'Histoire du soldat (The Soldier's T...,Entailment\n,Contradiction,False,0,2
996,Premise: Gianluca Cologna (born 17 May 1990) i...,Neutral\n,Neutral,True,1,1
997,Premise: Fido Dido is a cartoon character crea...,Entailment\n,Entailment,True,0,0
998,Premise: Aspects of Andrew Lloyd Webber is the...,Neutral\n,Entailment,False,1,0


In [42]:
metric.compute(references=list(df['references']), predictions=list(df['predictions']), average='weighted')

{'f1': 0.5639837240986442}