In [3]:
import sklearn
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import transformers
import numpy as np
import os
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import json

model = AutoModelForTokenClassification.from_pretrained("lwachowiak/Metaphor-Detection-XLMR")
tokenizer = AutoTokenizer.from_pretrained("lwachowiak/Metaphor-Detection-XLMR")
metaphor_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

def count_label_1(entities):
        count = 0
        for entity in entities:
            if entity['entity_group'] == 'LABEL_1':
                count += 1
        return count

df = pd.read_json('fitzgerald_babylon_annotated_v1.jsonl', lines=True)

df['model_labels'] = df['text'].apply(lambda x: metaphor_pipeline(x))

In [4]:
df['metaphor_counts'] = df['model_labels'].apply(count_label_1)
df['metaphor_yes_or_no'] = np.where(df['metaphor_counts'] > 0, 'm', 'l')

In [5]:
df.head()

Unnamed: 0,label,sentence_number,text,model_labels,metaphor_counts,metaphor_yes_or_no
0,l,1,And where's Mr. Campbell?,"[{'entity_group': 'LABEL_0', 'score': 0.999957...",0,l
1,l,2,Charlie asked.,"[{'entity_group': 'LABEL_0', 'score': 0.999983...",0,l
2,l,3,Gone to Switzerland.,"[{'entity_group': 'LABEL_0', 'score': 0.999958...",0,l
3,l,4,"Mr. Campbell's a pretty sick man, Mr. Wales.","[{'entity_group': 'LABEL_0', 'score': 0.998679...",0,l
4,l,5,I'm sorry to hear that.,"[{'entity_group': 'LABEL_0', 'score': 0.999972...",1,m


In [6]:
df['match'] = np.where(df['label'] == df['metaphor_yes_or_no'], 'Yes', 'No')

In [7]:
df.head()

Unnamed: 0,label,sentence_number,text,model_labels,metaphor_counts,metaphor_yes_or_no,match
0,l,1,And where's Mr. Campbell?,"[{'entity_group': 'LABEL_0', 'score': 0.999957...",0,l,Yes
1,l,2,Charlie asked.,"[{'entity_group': 'LABEL_0', 'score': 0.999983...",0,l,Yes
2,l,3,Gone to Switzerland.,"[{'entity_group': 'LABEL_0', 'score': 0.999958...",0,l,Yes
3,l,4,"Mr. Campbell's a pretty sick man, Mr. Wales.","[{'entity_group': 'LABEL_0', 'score': 0.998679...",0,l,Yes
4,l,5,I'm sorry to hear that.,"[{'entity_group': 'LABEL_0', 'score': 0.999972...",1,m,No


In [9]:
counts = df['match'].value_counts(normalize=True) * 100
counts

Yes    55.775578
No     44.224422
Name: match, dtype: float64