# Appraisal vs Metaphor

## Import Packages

In [1]:
import os
import pandas as pd
import re
from ast import literal_eval
from tabulate import tabulate
import random
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
nlp = English()
tokenizer = Tokenizer(nlp.vocab)

  from .autonotebook import tqdm as notebook_tqdm


## Reading and Cleaning Data

### Metaphor Annotations

In [2]:
# borrowed from Jodie
# helper function to build a list of lists containing the start and end indices
# and the difference between these indices
# where labels is the labels associated with a specific text
def labels_to_list(labels):
  annotations = []

  labels = literal_eval(labels)

  for label in labels:
    tags = []
    tags.append(int(label['start']))
    tags.append(int(label['end']))

    annotations.append(tags)

  return annotations

In [3]:
# check for duplicate files (then manually combine them in the json)
# load in metaphor annotation json and save it to a pandas dataframe
file_path = 'MIP-at-2025-06-11.json'
met_df = pd.read_json(file_path)

# extracting labels from the annotations column
met_df['labels'] = met_df.apply(lambda row: row.annotations[0]['result'], axis=1)

# changing file names so they match the appraisal folder names
met_df['filename'] = met_df.apply(lambda row: re.sub(r"^[^_]*-", '', row.file_upload), axis=1)
met_df['filename'] = met_df.apply(lambda row: re.sub(r"_fixed", '', row.filename[:-4]), axis=1)

# checking for duplicated files
met_df.loc[met_df.duplicated(subset=['filename'])].filename

250     aboriginal_11
311         belgium_2
791           uber_27
907          watch_74
924          watch_91
1042          pope_25
Name: filename, dtype: object

In [4]:
# load in EDITED metaphor annotation json and save it to a pandas dataframe
file_path = 'MIP-at-2025-06-11-edit.json'
met_df = pd.read_json(file_path)

# extracting labels from the annotations column
met_df['labels'] = met_df.apply(lambda row: row.annotations[0]['result'], axis=1)

# changing file names so they match the appraisal folder names
met_df['filename'] = met_df.apply(lambda row: re.sub(r"^[^_]*-", '', row.file_upload), axis=1)
met_df['filename'] = met_df.apply(lambda row: re.sub(r"_fixed", '', row.filename[:-4]), axis=1)
met_df['filename'] = met_df.apply(lambda row: re.sub(r"_NEW", '', row.filename), axis=1)

# checking for duplicated files
met_df.loc[met_df.duplicated(subset=['filename'])].filename

Series([], Name: filename, dtype: object)

In [5]:
# creating a dictionary of metaphor labels, where each key is a filename
met_labels = {}
for name in met_df['filename'].unique():
    # creating a new dataframe only containing labels corresponding to one file
    new_df = met_df[met_df['filename']==name][['filename', 'labels']].reset_index()
    # creating a list to save the labels in 
    labels_list = []
    for el in new_df['labels'][0]:
        # adding labels to the label list using the helper function
        labels_list.append(labels_to_list(str([el['value']]))[0])
    # saving the list of labels to the dictionary 
    met_labels[name] = labels_list

In [6]:
len(met_labels.keys())

1045

### Appraisal Annotations

In [7]:
def extractor(dic, col, txt):
    '''
    takes dic (a dictionary of dataframes), col (a string corresponding to column name), 
    and txt (a string containing the label type to extract) as input
    returns a dictionary with the same keys as dic, where the values are lists of label lists (e.g., [[1,7],[9,15]])
    '''
    # creating a dictionary of labels, where each key is a filename and each value is a list of labels
    labels_dic = {}
    # looping through all the keys and values in the input dictionary
    for name, df in dic.items():
        # creating an empty list of labels labels
        labels_list = []
        # dropping rows with no labels 
        cleaned_df = df[df[col] != '_'][['indices', col]].dropna()
        # if there are no labels, assigns an empty list
        if len(cleaned_df[col]) == 0:
            labels_dic[name]=labels_list
        else:
            # extracting rows that contain the input txt in the label column
            dic_labels = cleaned_df[cleaned_df[col].str.contains(txt)][col].unique()
            for d in dic_labels:
                cleaned_df_v2 = cleaned_df[cleaned_df[col]==d]
                # adding labels to the list of labels
                new_dic = {}
                new_dic[d] = {}
                # setting unreasonable values
                new_dic[d]['lowest'] = 1000000000
                new_dic[d]['highest'] = -1
                for r in cleaned_df_v2.indices:
                    # splitting the index string into a list
                    ind = [int(x) for x in r.split('-')]
                    # updating lowest index
                    if ind[0] < new_dic[d]['lowest']:
                        new_dic[d]['lowest'] = ind[0]
                    # updating highest index
                    if ind[1] > new_dic[d]['highest']:
                        new_dic[d]['highest'] = ind[1]
                # adding labels to list of labels
                labels_list.append([new_dic[d]['lowest'], new_dic[d]['highest']])
            # assigning list of labels to filename in the label dictionary
            labels_dic[name]=labels_list
    return labels_dic

In [8]:
# creating a dictionary of appraisal annotation dataframes, where each key is a filename
appraisal_dict = {}

# list of file names in the metaphor annotations
filenames = list(met_df['filename'].unique())

# looping through each file name in the metaphor annotations 
for folder_id in filenames:
    path = 'SOCC/annotated/Appraisal/Appraisal_annotations/curation/' + folder_id
    try:
        # loading in appraisal annotations in folders that end with '.txt'
        filename = os.listdir(path + '.txt')[0]
        # reading and saving the annotations to a pandas dataframe
        df = pd.read_csv(path + '.txt/' + filename, sep = '\t', header = None, 
                         skiprows=6, names=['no.','indices','text','attitude','label','polarity'])
        # saving the dataframe to the appraisal dictionary 
        appraisal_dict[folder_id] = df
    except:
        try:
            # loading in appraisal annotations in folders that end with '.tsv'
            filename = os.listdir(path + '.tsv')[0]
            # reading and saving the annotations to a pandas dataframe
            df = pd.read_csv(path + '.tsv/' + filename, sep = '\t', header = None, 
            skiprows=6, names=['no.','indices','text','attitude','label','polarity'])
            # saving the dataframe to the appraisal dictionary
            appraisal_dict[folder_id] = df
        except:
            # prints the name of any file for which there is no appraisal annotation
            print('DOES NOT EXIST:', folder_id)

DOES NOT EXIST: aboriginal_17


In [9]:
del met_labels['aboriginal_17']

In [10]:
# cleaning labels in the label and attitude columns by removing the annotation before the '|'
for filename in appraisal_dict.keys():
    appraisal_dict[filename]['label_final']=appraisal_dict[filename].apply(lambda row: str(row.label).split('|')[-1], axis=1)
    appraisal_dict[filename]['attitude_final']=appraisal_dict[filename].apply(lambda row: str(row.attitude).split('|')[-1], axis=1)

In [11]:
# a dictionary where every key is a file name and every value is a list of the character indices icluded in the negative labels 
neg_labels = extractor(appraisal_dict, 'label_final', 'neg')
# a dictionary where every key is a file name and every value is a list of the character indices included in the positive labels 
pos_labels = extractor(appraisal_dict, 'label_final', 'pos')
# a dictionary where every key is a file name and every value is a list of the character indices included in the Appreciation labels 
appr_labels = extractor(appraisal_dict, 'attitude_final', 'Appreciation')
# a dictionary where every key is a file name and every value is a list of the character indices included in the Judgement labels 
judg_labels = extractor(appraisal_dict, 'attitude_final', 'Judgment') # note spelling
# a dictionary where every key is a file name and every value is a list of the character indices included in the Affect labels 
aff_labels = extractor(appraisal_dict, 'attitude_final', 'Affect')

In [12]:
len(appraisal_dict.keys())

1044

## Corpus Analysis

In [13]:
# creating a new dataframe based on the metaphor dataframe, with fewer columns
df_analysis=met_df[['filename', 'data']]
# removing file from that is not in appraisal dataframe
df_analysis=df_analysis.loc[df_analysis.filename != 'aboriginal_17']
# creating a column that measures the length of the text associated with each filename
df_analysis['length'] = df_analysis.apply(lambda row: len(tokenizer(row.data['text'])), axis=1)

In [14]:
# shortest comments
df_analysis.loc[(df_analysis.length == min(df_analysis.length))]

Unnamed: 0,filename,data,length
147,watch_30,{'text': 'Baloney.'},1
154,watch_37,{'text': 'Exactly!'},1
420,china_39,{'text': 'LOL...!'},1
676,trump_38,{'text': 'Ha-Ha-Ha!!!'},1


In [15]:
# longest
df_analysis.loc[(df_analysis.length == max(df_analysis.length))]

Unnamed: 0,filename,data,length
847,uber_94,{'text': 'Uber drivers have filed a class acti...,793


In [16]:
# statistics
print('There are {} comments in the corpus. The comments range between {} and {} in number of tokens, with an average of {}. \
The total number of tokens in the corpus is {}.'.format(len(df_analysis), min(df_analysis.length), 
                                                        max(df_analysis.length), sum(df_analysis.length)/len(df_analysis), 
                                                        sum(df_analysis.length)))

There are 1044 comments in the corpus. The comments range between 1 and 793 in number of tokens, with an average of 62.15900383141762. The total number of tokens in the corpus is 64894.


## Comparison 
### what are we measuring? how?
We are deriving ten metrics to measure the overlap between metaphor and Appraisal and, within Appraisal, between negative evaluation and metaphor. The first five take the metaphor annotations as their starting point and calculate how many of those were also labelled in the Appraisal annotations, broken down into subcategories of Appraisal: 
1. Percentage of metaphorical units that are labelled as Affect 
2. Percentage of metaphorical units that are labelled as Judgement  
3. Percentage of metaphorical units that are labelled as Appreciation  
4. Percentage of metaphorical units that are labelled as positive 
5. Percentage of metaphorical units that are labelled as negative 
The next five metrics take an Appraisal label as their starting point and calculate what percentage of those have metaphor labels:
1. Percentage of Affect units that are labelled as metaphorical  
2. Percentage of Judgement units that are labelled as metaphorical 
3. Percentage of Appreciation units that are labelled as metaphorical 
4. Percentage of positive units that are labelled as metaphorical 
5. Percentage of negative units that are labelled as negative 
For (1), we first go through each label in the metaphor annotations, and check if 30% of the characters show up in a Affect label. If they do, we say that that metaphorical unit is labelled as Affect. We then calculate the percentage of Affect metaphorical units by dividing the number of metaphors that are labelled as Affect by the number of units labelled as metaphorical overall. The other metrics follow a similar methodology. 

In [17]:
def overlap_calculator(dic1, dic2):
    '''
    finds the percentages of overlap between two dictionaries of labels and returns a dictionary where the values are percentages
    '''
    # setting counter for number of labels overall
    dic1_total_labels, dic2_total_labels = 0, 0
    
    # number of labels in each dic that overlap (e.g., if dic1 has labels [1,3] and [4,9] for a file 
    # and dic2 has a label [1,9] for the same file, then dic1 has two overlapping labels with dic2 
    # and dic2 has 1 overlapping label with dic1)
    overall_overlap_cnt_dic1, overall_overlap_cnt_dic2 = 0, 0
    
    for file in list(dic1.keys()):
        # initializing overlap count for each file in each dictionary; setting it to 0
        overlap_cnt_dic1, overlap_cnt_dic2 = 0, 0

        # loops through each label list in the list of list of labels corresponding to each key in the dictionary 
        for label_dic1 in dic1[file]:
            # creating a set of the characters contained in the indices (e.g., [1,5] -> [1,2,3,4,5])
            label_dic1_characters = set(list(range(label_dic1[0],label_dic1[1]+1)))
            # loops through the labels in the second dictionary for the same key
            for label_dic2 in dic2[file]:
                # creating a set of the characters contained in the indices (e.g., [1,5] -> [1,2,3,4,5])
                label_dic2_characters = set(list(range(label_dic2[0],label_dic2[1]+1)))
                # variable representing the characters included in both labels (intersection)
                overlap = label_dic1_characters & label_dic2_characters
                # variable representing the elements included in either label (union)
                universe = label_dic2_characters | label_dic2_characters
                
                # calculating overlap % if more than one element exists in both labels 
                # setting overlap to > 1 accounts for punctuation, etc.
                if len(overlap) > 1:
                    # the percentage of the first label that is included in the second label
                    result_dic1 = float(len(overlap)) / len(label_dic1_characters) * 100
                    # the percentage of the second label that is included in the first label
                    result_dic2 = float(len(overlap)) / len(label_dic2_characters) * 100
                    # if at least 30% of the first label is included in the second label, it counts as overlap
                    if result_dic1 >= 30:
                        # increasing counters by 1
                        overlap_cnt_dic1+=1
                        overall_overlap_cnt_dic1+=1
                    # if at least 30% of the second label is included in the first label, it counts as overlap
                    if result_dic2 >= 30:
                        # increasing counters by 1
                        overlap_cnt_dic2+=1
                        overall_overlap_cnt_dic2+=1

        # adding to counter of total labels for dic1
        dic1_total_labels = dic1_total_labels + len(dic1[file])
        # adding to counter of total labels for dic2
        dic2_total_labels = dic2_total_labels + len(dic2[file])
    return {'percentage of dic1 units that are labelled in dic2': overall_overlap_cnt_dic1/dic1_total_labels * 100,
            'percentage of dic2 units that are labelled in dic1': overall_overlap_cnt_dic2/dic2_total_labels * 100,
           'total labels for dic1': dic1_total_labels,
           'total labels for dic2': dic2_total_labels,
           'overall overlap, given dic2': overall_overlap_cnt_dic2}

In [18]:
overlap_percentage_pos = overlap_calculator(pos_labels, met_labels).values()
print('percentage of positive units that are labelled as metaphors:', list(overlap_percentage_pos)[0])
print('percentage of metaphorical units that are labelled as positive:', list(overlap_percentage_pos)[1])

overlap_percentage_neg = overlap_calculator(neg_labels, met_labels).values()
print('percentage of negative units that are labelled as metaphors:', list(overlap_percentage_neg)[0])
print('percentage of metaphorical units that are labelled as negative:', list(overlap_percentage_neg)[1])

overlap_percentage_appr = overlap_calculator(appr_labels, met_labels).values()
print('percentage of Appreciation units that are labelled as metaphors:', list(overlap_percentage_appr)[0])
print('percentage of metaphorical units that are labelled as Appreciation:', list(overlap_percentage_appr)[1])

overlap_percentage_aff = overlap_calculator(aff_labels, met_labels).values()
print('percentage of Affect units that are labelled as metaphors:', list(overlap_percentage_aff)[0])
print('percentage of metaphorical units that are labelled as Affect:', list(overlap_percentage_aff)[1])

overlap_percentage_judg = overlap_calculator(judg_labels, met_labels).values()
print('percentage of Judgement units that are labelled as metaphors:', list(overlap_percentage_judg)[0])
print('percentage of metaphorical units that are labelled as Judgement:', list(overlap_percentage_judg)[1])

percentage of positive units that are labelled as metaphors: 9.033280507131538
percentage of metaphorical units that are labelled as positive: 7.489146164978293
percentage of negative units that are labelled as metaphors: 11.93124368048534
percentage of metaphorical units that are labelled as negative: 32.09117221418234
percentage of Appreciation units that are labelled as metaphors: 11.363636363636363
percentage of metaphorical units that are labelled as Appreciation: 18.12590448625181
percentage of Affect units that are labelled as metaphors: 7.792207792207792
percentage of metaphorical units that are labelled as Affect: 0.6150506512301013
percentage of Judgement units that are labelled as metaphors: 11.233307148468185
percentage of metaphorical units that are labelled as Judgement: 21.05643994211288


In [19]:
print('percentage of X units that are labelled as metaphors')
print(tabulate([['Positive', round(list(overlap_percentage_pos)[0],2)], ['Negative', round(list(overlap_percentage_neg)[0],2)],
               ['Appreciation', round(list(overlap_percentage_appr)[0],2)], ['Affect', round(list(overlap_percentage_aff)[0],2)],
               ['Judgement', round(list(overlap_percentage_judg)[0],2)]], headers=['X', '%']))

percentage of X units that are labelled as metaphors
X                 %
------------  -----
Positive       9.03
Negative      11.93
Appreciation  11.36
Affect         7.79
Judgement     11.23


In [20]:
print('percentage of metaphorical units that are labelled as X')
print(tabulate([['Positive', round(list(overlap_percentage_pos)[1],2)], ['Negative', round(list(overlap_percentage_neg)[1],2)],
               ['Appreciation', round(list(overlap_percentage_appr)[1],2)], ['Affect', round(list(overlap_percentage_aff)[1],2)],
               ['Judgement', round(list(overlap_percentage_judg)[1],2)]], headers=['X', '%']))

percentage of metaphorical units that are labelled as X
X                 %
------------  -----
Positive       7.49
Negative      32.09
Appreciation  18.13
Affect         0.62
Judgement     21.06


In [21]:
# raw numbers
appraisal_sum = (list(overlap_percentage_aff)[2] + list(overlap_percentage_judg)[2] + list(overlap_percentage_appr)[2])
appraisal_sum_given_met = (list(overlap_percentage_aff)[4] + list(overlap_percentage_judg)[4] + list(overlap_percentage_appr)[4])

print(f'There are {list(overlap_percentage_aff)[3]} metaphor labels and {appraisal_sum} Appraisal labels, with \
{list(overlap_percentage_aff)[2]} Affect, {list(overlap_percentage_judg)[2]} Judgement, \
{list(overlap_percentage_appr)[2]} Appreciation, {list(overlap_percentage_neg)[2]} negative, \
and {list(overlap_percentage_pos)[2]} positive labels. \n {appraisal_sum_given_met} ({appraisal_sum_given_met/list(overlap_percentage_aff)[3]}%) \
of the metaphor labels have an Appraisal label: {list(overlap_percentage_aff)[4]} Affect, {list(overlap_percentage_judg)[4]} Judgement, \
{list(overlap_percentage_appr)[4]} Appreciation, {list(overlap_percentage_neg)[4]} negative, and {list(overlap_percentage_pos)[4]} positive labels.')

There are 2764 metaphor labels and 2670 Appraisal labels, with 77 Affect, 1273 Judgement, 1320 Appreciation, 1978 negative, and 631 positive labels. 
 1100 (0.3979739507959479%) of the metaphor labels have an Appraisal label: 17 Affect, 582 Judgement, 501 Appreciation, 887 negative, and 207 positive labels.


## Overlap Examples

In [22]:
# selecting 50 files at random to check for overlap between metaphor and appraisal
random.seed(10)
example_files = random.sample(filenames, 50)

In [23]:
def overlap_examples(dic1, dic2, examples):
    for file in examples:
        for label_dic1 in dic1[file]:
            # creating a set of the characters contained in the indices (e.g., [1,5] -> [1,2,3,4,5])
            label_dic1_characters = set(list(range(label_dic1[0],label_dic1[1]+1)))
            # loops through the labels in the second dictionary for the same key
            for label_dic2 in dic2[file]:
                # creating a set of the characters contained in the indices (e.g., [1,5] -> [1,2,3,4,5])
                label_dic2_characters = set(list(range(label_dic2[0],label_dic2[1]+1)))
                # variable representing the characters included in both labels (intersection)
                overlap = label_dic1_characters & label_dic2_characters
                # variable representing the elements included in either label (union)
                universe = label_dic2_characters | label_dic2_characters
                
                # calculating overlap % if more than one element exists in both labels 
                # setting overlap to > 1 accounts for punctuation, etc.
                if len(overlap) > 1:
                    # the percentage of the first label that is included in the second label
                    result_dic1 = float(len(overlap)) / len(label_dic1_characters) * 100
                    # the percentage of the second label that is included in the first label
                    result_dic2 = float(len(overlap)) / len(label_dic2_characters) * 100
                    # if at least 30% of one label is included in the other label, it counts as overlap
                    if result_dic1 >= 30 or result_dic2 >= 30:
                        # increasing counters by 1
                        text = met_df[met_df['filename']==file].reset_index().data[0]['text']
                        print('file name: {}\n text: {}\n metaphor label: {}\n appraisal label: {}\n overlap: {}'.format(file, 
                              text,
                              text[label_dic1[0]:label_dic1[1]],
                              text[label_dic2[0]:label_dic2[1]],
                              text[list(overlap)[0]: list(overlap)[-1]]))

In [24]:
print("metaphor and negative overlap examples:")
overlap_examples(met_labels, neg_labels, example_files)
print("\n metaphor and positive overlap examples:")
overlap_examples(met_labels, pos_labels, example_files)
print("metaphor and Appreciation overlap examples:")
overlap_examples(met_labels, appr_labels, example_files)
print("\n metaphor and Judgement overlap examples:")
overlap_examples(met_labels, judg_labels, example_files)
print("metaphor and Affect overlap examples:")
overlap_examples(met_labels, aff_labels, example_files)

metaphor and negative overlap examples:
file name: budget_69
 text: Of course we all know, unfortunately, there is a segment of the voting public that worships the ground Harper slithers on. That's who this 'surplus' is aimed at. These numbers have been baked, cooked, fudged, pinched and squeezed to get the result that Dear Leader wanted. Anybody with any working brain cells understands that.
 metaphor label: slithers
 appraisal label: worships the ground Harper slithers on
 overlap: slithers
file name: budget_69
 text: Of course we all know, unfortunately, there is a segment of the voting public that worships the ground Harper slithers on. That's who this 'surplus' is aimed at. These numbers have been baked, cooked, fudged, pinched and squeezed to get the result that Dear Leader wanted. Anybody with any working brain cells understands that.
 metaphor label: slithers
 appraisal label: slithers
 overlap: slithers
file name: budget_69
 text: Of course we all know, unfortunately, there is