In [2]:
# IMPORTS
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd

In [3]:
# Load universal sentence encoder model
# model details: https://www.kaggle.com/models/google/universal-sentence-encoder/tensorFlow2/qa

module = hub.load('https://www.kaggle.com/models/google/universal-sentence-encoder/TensorFlow2/qa/2')


I0000 00:00:1732536229.422936 1605594 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6216 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:01:00.0, compute capability: 8.6


In [4]:
# creates a USE score for each phrase pair in a df
# promptcol: the prompt phrase
# origcol: the response phrase (in our case, from the Enron set)
# score_col: name of column in the df to save the score

def use_rate(df, promptcol, origcol, score_col, use_contexts=False):
    use_scores=[]
    for idx, row in df.iterrows():
        questions=[]
        questions.append(row[promptcol])
        responses=[row[origcol]]
        if not use_contexts:
            response_contexts=responses
        else:
            response_contexts=[]
            response_contexts.append(row[promptcol.split("_")[0]+"_story"])
        try:
            question_embeddings = module.signatures['question_encoder'](
                    tf.constant(questions))
            response_embeddings = module.signatures['response_encoder'](
                    input=tf.constant(responses),
                    context=tf.constant(response_contexts))
            result = np.inner(question_embeddings['outputs'], response_embeddings['outputs'])
            #print(result[0][0], np.linalg.norm(question_embeddings['outputs'][0]), np.linalg.norm(response_embeddings['outputs'][0]))
            #print(result[0][0])
            use_scores.append(result[0][0])
        except:
            print(row[promptcol], row[origcol])
            use_scores.append(-1)
    
    df[score_col]=use_scores
    return df

In [5]:
# read human scoring data
# prompts A & B generated by Luis and Andreas
# prompt preferences provided by Ioulia and Florian
# preference tiebreakers provided by Anna

humandf = pd.read_csv('Human Context.csv')
humandf.head(2)

Unnamed: 0,ID,Prompt A,Prompt B,original enron response,Florian,Ioulia,Anna,Annas preference
0,315,When can you give me some feedback on my article?,I hope you had time to read my report.,I will take a look at this today.,A,A,,
1,192,"Sorry I missed your call, how can I help?","Hey, just to let you know, my team made good p...",We need to talk about this month.,B,A,A,Strong


In [6]:
# generate USE scores for generating persons A and B
humandf = use_rate(humandf, 'Prompt A', 'original enron response', 'Prompt_A_use_score')
humandf = use_rate(humandf, 'Prompt B', 'original enron response', 'Prompt_B_use_score')

nan Ava, LA was Socal meeting.
nan Or if he comes down I-10N then?
nan Joel Ephross on issues reacquiring economic interests/swaps from Jedi2/Whitewing.
nan Coal switching to NG and power guys stimulating demand.


In [7]:
# save USE scores for generating persons A and B
humandf.to_csv('Human Context - rated.csv')

In [8]:
# analyse results

def select_human_prompt(row):
    try:
        if pd.isnull(row['Anna']):
            if row['Ioulia'] == 'A':
                return row['Prompt A']
            else:
                return row['Prompt B']
        else:
            return row['Prompt '+row['Anna']]
    except:
        print(row)

def select_human_prompt_use(row):
    try:
        if row['Prompt_A_use_score']>=row['Prompt_B_use_score']:
            return row['Prompt A']
        else:
            return row['Prompt B']
    except:
        print(row)

def select_human_prompt_use_score(row):
    try:
        if row['Prompt_A_use_score']>=row['Prompt_B_use_score']:
            return row['Prompt_A_use_score']
        else:
            return row['Prompt_B_use_score']
    except:
        print(row)

In [9]:
#select the "winning" prompts based on tie breaker, and then select "winning" prompts based on USE score
humandf['final_prompt'] = humandf.apply(lambda x: select_human_prompt(x), axis=1)
humandf['final_prompt_use'] = humandf.apply(lambda x: select_human_prompt_use(x), axis=1)
humandf['final_prompt_use_score'] = humandf.apply(lambda x: select_human_prompt_use_score(x), axis=1)
humandf = humandf[humandf['Prompt_B_use_score']!=-1]
print(len(humandf))

#calculate USE score differences
humandf['use_diff'] = humandf.apply(lambda x: abs(x['Prompt_A_use_score'] - x['Prompt_B_use_score']), axis=1)

169


In [10]:
#### measure alignment of human / USE selected prompt

In [11]:
# find cases where human  / use selected promt is the same, or where anna's preference is slight or no preference

In [12]:
def code_anna(row):
    if not pd.isnull(row['Annas preference']):
        if 'no preference' in row['Annas preference']:
            return 0
        if 'slight' in row['Annas preference']:
            return 1
        if 'strong' in row['Annas preference']:
            return 2 
    else:
        return 4

In [13]:
humandf['Anna_pref_coded']= humandf.apply(lambda x: code_anna(x), axis=1)
humandf.head(2)

Unnamed: 0,ID,Prompt A,Prompt B,original enron response,Florian,Ioulia,Anna,Annas preference,Prompt_A_use_score,Prompt_B_use_score,final_prompt,final_prompt_use,final_prompt_use_score,use_diff,Anna_pref_coded
0,315,When can you give me some feedback on my article?,I hope you had time to read my report.,I will take a look at this today.,A,A,,,0.160827,0.064508,When can you give me some feedback on my article?,When can you give me some feedback on my article?,0.160827,0.096319,4.0
1,192,"Sorry I missed your call, how can I help?","Hey, just to let you know, my team made good p...",We need to talk about this month.,B,A,A,Strong,0.117832,0.056796,"Sorry I missed your call, how can I help?","Sorry I missed your call, how can I help?",0.117832,0.061035,


In [14]:
# find cases where human  / use selected prompt is the same, or where
# human  / use selected promt is NOT the same, anna's preference exists, 
# and anna's preference is slight or no preference
aligndf = humandf[(humandf['final_prompt']==humandf['final_prompt_use']) | 
            ((humandf['final_prompt']!=humandf['final_prompt_use']) & (humandf['Anna_pref_coded']<=1))]

In [15]:
# how "aligned" are we?
print("Alignment:", len(aligndf)/len(humandf))

Alignment: 0.6686390532544378


In [16]:
# allow some leeway to the use scoring just like Anna

In [17]:
#calculate prompt diff angle in degrees (from rads)
humandf['use_diff_rad'] = humandf.apply(lambda x: np.degrees(abs(np.arccos(x['Prompt_A_use_score'])-np.arccos(x['Prompt_B_use_score']))), axis=1)

# check min-max of angle (difference)
print(humandf['use_diff_rad'].min(), humandf['use_diff_rad'].max())

# find percentile boundaries for "no pref", "slight pref." and "strong pref."
print(np.percentile(humandf['use_diff_rad'], 33))
print(np.percentile(humandf['use_diff_rad'], 66))

# relaxed alignment measure
# human and use choice is same OR 
# human and use choice is not same, and anna pref <=1 OR
# human and use choice is not same, and use pref difference degrees <= 66th percentile (4.37) 
aligndf = humandf[
            (humandf['final_prompt']==humandf['final_prompt_use']) | 
            ((humandf['final_prompt']!=humandf['final_prompt_use']) & (humandf['Anna_pref_coded']<=1)) |
            ((humandf['final_prompt']!=humandf['final_prompt_use']) & (humandf['use_diff_rad']<=4.37))
]


print("Relaxed alignment:", len(aligndf)/len(humandf))

0.0038896562088318373 16.430786675587918
1.9973651878070782
4.370202769986573
Relaxed alignment: 0.8994082840236687
