
#### In this notebook, all the evaluation on model performances on recall and similarity scores is shown

In [0]:
import pandas as pd
from collections import Counter
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import hamming_loss
from transformers import BertTokenizer, BertModel
import torch
from sklearn.manifold import TSNE
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from transformers import AutoTokenizer, AutoModel
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics.pairwise import cosine_similarity


##### Loading in results, and changing some column names to align the dataframes

In [0]:
#### BASELINE

results_baseline = spark.read.table("dev_data_science.mpst_dataset.results_baseline")
results_baseline = results_baseline.toPandas()
results_baseline.head()


Unnamed: 0,PROGRAM_ID,TITLE,Generated_Tags,Actual_Tags,Matching_Tags_Count
0,tt0033045,The Shop Around the Corner,"[comedy, romantic, entertaining, flashback]",[romantic],1
1,tt1937113,Call of Duty: Modern Warfare 3,"[violence, flashback, murder, action, suspense...",[good versus evil],0
2,tt0102007,The Haunted,"[paranormal, horror]","[haunting, horror, paranormal]",2
3,tt2005374,The Frozen Ground,"[murder, violence]","[dramatic, murder]",1
4,tt1411238,No Strings Attached,"[romantic, pornographic]","[adult comedy, boring, cute, entertaining, fla...",1


In [0]:
results_pretrained = spark.read.table("dev_data_science.mpst_dataset.results_pretrained2")
results_pretrained = results_pretrained.toPandas()
results_pretrained.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,synopsis_source,award_label,tag_count,Generated_Tags
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,imdb,No award,1,"[romantic, comedy, dramatic, mystery, sentimen..."
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,imdb,No award,1,"[action, suspenseful, dramatic, revenge, viole..."
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,imdb,No award,3,"[horror, paranormal, dark, haunting, mystery, ..."
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,wikipedia,No award,2,"[suspenseful, dark, murder, psychological, mys..."
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, romant...",test,imdb,No award,6,"[comedy, romantic, adult comedy, feel-good, dr..."


In [0]:
results_pretrained = results_pretrained.rename(columns={"tags": "Actual_Tags"})

In [0]:
results_pretrained = results_pretrained.rename(columns={'tag_count': 'Matching_Tags_Count'})

In [0]:
#### FINETUNED

results_finetuned = spark.read.table("dev_data_science.mpst_dataset.results_finetuned")
results_finetuned = results_finetuned.toPandas()
results_finetuned.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,synopsis_source,award_label,tag_count,Generated_Tags
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,imdb,No award,1,[romantic]
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,imdb,No award,1,"[action, violence, revenge, suspenseful, dark]"
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,imdb,No award,3,[horror]
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,wikipedia,No award,2,"[murder, suspenseful, psychological, dark, vio..."
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, romant...",test,imdb,No award,6,[comedy]


In [0]:
results_finetuned = results_finetuned.rename(columns={'tag_count': 'Matching_Tags_Count'})

In [0]:
results_finetuned = results_finetuned.rename(columns={'tags': 'Actual_Tags'})


In [0]:
# Load the BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to get embeddings using BERT
def get_bert_embeddings(text):
    # Tokenize and process input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Extract the `[CLS]` token embedding for sentence-level representation
    cls_embedding = outputs.last_hidden_state[:, 0, :]  # [batch_size, seq_length, hidden_size] -> [batch_size, hidden_size]
    return cls_embedding

# Function to calculate cosine similarity
def calculate_similarity_bert(tags1, tags2):
    # Convert tag lists to strings
    text1 = " ".join(tags1)
    text2 = " ".join(tags2)
    
    # Get BERT embeddings for each text
    emb1 = get_bert_embeddings(text1)
    emb2 = get_bert_embeddings(text2)
    
    # Compute cosine similarity
    similarity = cosine_similarity(emb1.numpy(), emb2.numpy())
    return similarity[0][0]

# Function to calculate matching tags count
def compute_matching_tags(row):
    return len(set(row["Generated_Tags"]).intersection(set(row["Actual_Tags"])))

# Main function to process any dataframe
def evaluate_tags(dataframe):
    # Calculate similarity score
    dataframe["Similarity_Score"] = dataframe.apply(
        lambda row: calculate_similarity_bert(row["Generated_Tags"], row["Actual_Tags"]),
        axis=1
    )
    
    # Calculate matching tags count
    dataframe["Matching_Tags_Count"] = dataframe.apply(compute_matching_tags, axis=1)
    
    # Calculate recall
    dataframe["Recall"] = dataframe["Matching_Tags_Count"] / dataframe["Actual_Tags"].apply(len)
    
    # Compute average recall
    average_recall = dataframe["Recall"].mean()

    average_sim_score = dataframe["Similarity_Score"].mean()

    
    return dataframe, average_recall, average_sim_score

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [0]:
#### USE THE FUNCTION ON EACH RESULTS DATAFRAME
eval_df, avg_recall, average_sim_score = evaluate_tags(results_finetuned)

print(f"Average Recall: {avg_recall:.6f}")
print(f"Average Sim Score: {average_sim_score:.6f}")

Average Recall: 0.422538
Average Sim Score: 0.863028
