In [1]:
import time

import kagglehub

# Download latest version
path = kagglehub.dataset_download("melzohbi/metaphor-detection-vua-wsd-augmented")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\allex\.cache\kagglehub\datasets\melzohbi\metaphor-detection-vua-wsd-augmented\versions\1


In [2]:
import os
import pandas as pd

train_df = pd.read_csv(os.path.join(path, "data/VUA20/train.tsv"), sep='\t')
test_df = pd.read_csv(os.path.join(path, "data/VUA20/test.tsv"), sep='\t')

# get only 617 elements because testing would take too long with the whole dataset

In [3]:
tmp_train_df = train_df[train_df['index'].str.contains("a1e-fragment01")]

In [4]:
tmp_train_df

Unnamed: 0,index,label,sentence,POS,FGPOS,w_index,target,word_sense,definition
0,a1e-fragment01 1,0,Latest corporate unbundler reveals laid-back a...,ADJ,JJS,0,Latest,up to the immediate present; most recent or mo...,Near the end of a period of time.
1,a1e-fragment01 1,0,Latest corporate unbundler reveals laid-back a...,ADJ,JJ,1,corporate,of or belonging to a corporation,Of or relating to a corporation.
2,a1e-fragment01 1,0,Latest corporate unbundler reveals laid-back a...,PROPN,NNP,2,unbundler,unbundler,unbundler
3,a1e-fragment01 1,1,Latest corporate unbundler reveals laid-back a...,VERB,VBZ,3,reveals,make visible,To uncover; to show and display that which was...
4,a1e-fragment01 1,0,Latest corporate unbundler reveals laid-back a...,VERB,VBN,4,laid-back,laid-back,laid-back
...,...,...,...,...,...,...,...,...,...
612,a1e-fragment01 30,0,It would be a criticism if I was doing it to i...,PRON,PRP,9,it,it,it
613,a1e-fragment01 30,0,It would be a criticism if I was doing it to i...,ADP,IN,10,to,to,to
614,a1e-fragment01 30,0,It would be a criticism if I was doing it to i...,VERB,VB,11,impoverish,make poor,To make poor.
615,a1e-fragment01 30,0,It would be a criticism if I was doing it to i...,PRON,PRP,12,myself,myself,myself


# import the model using Ollama - I am using deepseek-r1 with 14B (using all VRAM resources)

In [5]:
from langchain_community.llms import Ollama

ollama = Ollama(
    base_url='http://localhost:11434',
    # model="llama3.2:1b"
    model="llama3.2:3b"
    # model="deepseek-r1:14b"    
) 

  ollama = Ollama(


In [6]:
print(ollama.invoke("why is the sky blue")) # TEST


KeyboardInterrupt



# Create prompt for each row from dataset

In [None]:
def create_prompt(row):
    return (
        f"Sentence: {row['sentence']}\n"
        f"Target Word: {row['target']} (POS: {row['POS']}, Word Index: {row['w_index']}, Word Sense: {row['word_sense']}, definition: {row['definition']})\n"
        "Is the target word used metaphorically? Provide a yes or no answer without explanation."
    )

tmp_train_df.loc[:, 'prompt'] = tmp_train_df.apply(create_prompt, axis=1)

# Get predictions

In [None]:
tmp_train_df['prediction'] = None

In [None]:
row = tmp_train_df.iloc[451]
print(ollama.invoke(row['prompt']))

In [None]:
tmp_train_df

In [63]:
from tqdm import tqdm

tqdm_progress_bar = tqdm(tmp_train_df.index)
for row_idx in tqdm_progress_bar:
    try:
        prompt = tmp_train_df.loc[row_idx, 'prompt']
        response = ollama.invoke(prompt)
        # print(response)
        # prediction = response.split("</think>")[1].strip() # Yes / No answer
        prediction = response.strip()
        tmp_train_df.loc[row_idx, 'prediction'] = prediction
        tqdm_progress_bar.update(1)
        tqdm_progress_bar.set_description(prediction)
        
    except Exception as e:
        print(e)
        break

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@:  50%|█████     | 311/617 [11:06<10:55,  2.14s/it]  


KeyboardInterrupt: 

# some predictions are @@@@@@@@@@@@@@@@@@@@@@@@@ Can't understand why, so we rerun those

# Convert Yes/No to labels

In [None]:
tmp_train_df['predictionIdx'] = None

In [None]:
for row_idx in tmp_train_df.index:
    prediction = tmp_train_df.loc[row_idx, 'prediction']
    
    if prediction is None:
        continue
    # convert to 1 or 0
    if "yes" in prediction.lower():
        predictionIdx = 1
    if "no" in prediction.lower():
        predictionIdx = 0
    tmp_train_df.loc[row_idx, 'predictionIdx'] = predictionIdx

In [None]:
tmp_train_df

In [None]:
# Calculate counts for each class
label_counts = tmp_train_df['label'].value_counts()
prediction_counts = tmp_train_df['predictionIdx'].value_counts()

# Confusion matrix components
TP = ((tmp_train_df['label'] == 1) & (tmp_train_df['predictionIdx'] == 1)).sum()
TN = ((tmp_train_df['label'] == 0) & (tmp_train_df['predictionIdx'] == 0)).sum()
FP = ((tmp_train_df['label'] == 0) & (tmp_train_df['predictionIdx'] == 1)).sum()
FN = ((tmp_train_df['label'] == 1) & (tmp_train_df['predictionIdx'] == 0)).sum()

# Accuracy
accuracy = (TP + TN) / len(tmp_train_df)

# Precision, Recall, F1-Score
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# Display results
results = {
    "Label Counts": label_counts.to_dict(),
    "Prediction Counts": prediction_counts.to_dict(),
    "True Positives": TP,
    "True Negatives": TN,
    "False Positives": FP,
    "False Negatives": FN,
    "Accuracy": accuracy,
    "Precision": precision,
    "Recall": recall,
    "F1-Score": f1_score
}

for metric, value in results.items():
    print(f"{metric}: {value}")