## Scoring AUT Data with Open Creativity Scoring

<a href="https://colab.research.google.com/github/massivetexts/llm_aut_study/blob/main/notebooks/OCS AUT Scoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/massivetexts/open-scoring
!cd open-scoring && python setup.py develop

In [None]:
import numpy as np
import random
import open_scoring as ocs
from gensim.models import KeyedVectors
import gensim.downloader as api
import os

from pathlib import Path
import json
import pandas as pd

In [None]:
#@title Download Models and Initiate Scorer
scorer = ocs.scoring.AUT_Scorer()
use_glove = True #@param {type:'boolean'}
if use_glove:
    print('Downloading GloVe')
    scorer._models['glove'] = api.load("glove-wiki-gigaword-300")

Downloading GloVe


In [23]:
#@title Params
base_dir = Path('drive/MyDrive/Grants/MOTES/') #@param { type: 'raw' }
gt_dir = base_dir / 'Data' / 'aut_ground_truth' #@param { type: 'raw' }
print("GT options", [x.name for x in gt_dir.glob('*tar.gz')])
data_subdir = "gt_main2" #@param ['gt_main2', 'gt_byprompt', 'gt_byparticipant']

!cp "{gt_dir}/{data_subdir}.tar.gz" .
!rm -rf data
!tar -xf {data_subdir}.tar.gz
data_dir = Path('data') / data_subdir
evaldir = base_dir / 'Data' / 'evaluation' / data_subdir #@param { type: 'raw' }
!mkdir -p {evaldir}
random_seed = 987 #@param {type:'number'}

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

set_seed(random_seed)

GT options ['gt_main.tar.gz', 'gt_bypart3.tar.gz', 'gt_byprompt4.tar.gz', 'gt_byparticipant.tar.gz', 'gt_byprompt.tar.gz', 'all.tar.gz', 'gt_main2.tar.gz', 'gt_main_std.tar.gz']


In [24]:
conditions = {
    'ocs-main': dict(term_weighting=True, exclude_target=True, stopword=True),
    'ocs-stop': dict(term_weighting=False, exclude_target=False, stopword=True),
    'ocs-weight': dict(term_weighting=True, exclude_target=False, stopword=False),
    'ocs-target': dict(term_weighting=False, exclude_target=True, stopword=False),
}

condition = 'ocs-main' #@param ['ocs-main', 'ocs-stop', 'ocs-weight', 'ocs-target']

y_pred = []
all_rows = []
nans = 0

def clean(x):
    x = x.replace('paperclip', 'paper clip')
    return x

for x in (data_dir / 'test').iterdir():
    y = json.loads(x.read_text())
    score = scorer.originality(clean(y['prompt']), y['response'], **conditions[condition])
    if score:
        all_rows.append(y)
        y_pred.append(score)
    else:
        nans += 1
print("Unscorable:", nans)
data = pd.DataFrame(all_rows)
data['predicted'] = y_pred
data.sample(10)

Unscorable: 26


Unnamed: 0,src,question,prompt,response,id,target,participant,response_num,count,predicted
277,betal18,What is a surprising use for a ROPE?,rope,lawnmower,betal18_rope-074d,3.3,betal182065,,,0.877061
2592,hmsl,What is a surprising use for a PAPERCLIP?,paperclip,Chewed on by a baby,hmsl_paperclip-69c1,2.5,hmslqZwO9maC,5.0,,0.83817
545,bs12,What is a surprising use for a BRICK?,brick,anchoring a boat,bs12_brick-bbb1,1.4,bs1245,,,0.947826
1331,snb17,What is a surprising use for a BOX?,box,garbage can,snb17_box-617f,1.1,snb17107,,3.0,0.78291
2854,snbmo09,What is a surprising use for a BOX?,box,transport stuff in,snbmo09_box-3753,1.0,snbmo09172,8.0,,0.837086
360,snbmo09,What is a surprising use for a BRICK?,brick,dice,snbmo09_brick-f77c,3.5,snbmo09163,6.0,,0.958768
2726,betal18,What is a surprising use for a ROPE?,rope,tracking system in order to avoid getting lost...,betal18_rope-3140,2.4,betal182162,,,0.884128
190,snb17,What is a surprising use for a BOX?,box,hide alcohol,snb17_box-4722,1.7,snb1738,,,0.931555
847,hmsl,What is a surprising use for a PAPERCLIP?,paperclip,x-mas orniment hook,hmsl_paperclip-2daf,2.5,hmslK883nL64,4.0,,0.885338
244,betal18,What is a surprising use for a ROPE?,rope,collect shells with,betal18_rope-f572,2.4,betal182072,,,0.946163


In [25]:
data['src'] = data['id'].apply(lambda x: x.split('_')[0].split('-')[0])
data.groupby('src')[['predicted', 'target']].corr()['target']
data['model'] = condition

In [26]:
data.corr().loc['target', 'predicted']

0.2561158316602781

In [28]:
output = data[['id', 'model', 'participant', 'prompt', 'target', 'predicted', 'src']]
output.to_csv(evaldir / f'{condition}.csv')