In [1]:
from transformers import BertTokenizer, BertModel
import torch
import ast
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
from sklearn.metrics.pairwise import cosine_similarity


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

In [3]:
df = pd.read_excel('data/long_form_data1.xlsx')
df.shape

(150, 5)

In [4]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,ID,Speech,description,Labels
0,0,1471363,It's another pure gray morning. Don't know wha...,"The new MINI Countryman is the largest yet, pr...","[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ..."
1,1,1488315,The end of civilization is upon us. Hold your ...,What would you do if the end of the world was ...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ..."
2,2,1526213,Audi presens can help prepare for and in some ...,As a man speeds down a country road in his Aud...,"[0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, ..."
3,3,1548815,The new Honda Odyssey has tons of available sm...,"On an otherwise peaceful day, two giant monste...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, ..."
4,4,1624211,Hi guys. So this is the all new Chevy Equinox....,Chevy's spokesperson lists off all the feature...,"[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, ..."
5,5,1625396,I love you so much. We're going to be best fri...,When a woman finds a Lexus GX 460 parked outsi...,"[0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, ..."
6,6,1641167,In this home. We believe there are things that...,Pillsbury believes that there are some things ...,"[0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, ..."
7,7,1661301,Hurry. It's the final days of the Ford year en...,Ford announces that the final days of its Year...,"[1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, ..."
8,8,1667694,This is Charlie and this is Charlie not coughi...,Because Charlie took Delsym 12-Hour Cough Reli...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, ..."
9,9,1671240,Resolution. Number one binge more. Join the UN...,T-Mobile says its first resolution of the new ...,"[1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, ..."


In [5]:
# np.array(df['Labels'])[0]
# df['Labels'][0]
df['Labels'] = df['Labels'].apply(lambda x: ast.literal_eval(x))


In [6]:
# Predefined set of questions
questions = [
    "Is there a call to go online (e.g., shop online, visit the Web)?",
    "Is there online contact information provided (e.g., URL, website)?",
    "Is there a visual or verbal call to purchase (e.g., buy now, order now)?",
    "Does the ad portray a sense of urgency to act (e.g., buy before sales ends, order before ends)?",
    "Is there an incentive to buy (e.g., a discount, a coupon, a sale or 'limited time offer')?",
    "Is there offline contact information provided (e.g., phone, mail, store location)?",
    "Is there mention of something free?",
    "Does the ad mention at least one specific product or service (e.g., model, type, item)?",
    "Is there any verbal or visual mention of the price?",
    "Does the ad show the brand (logo, brand name) or trademark (something that most people know is the brand) multiple times?",
    "Does the ad show the brand or trademark exactly once at the end of the ad?",
    "Is the ad intended to affect the viewer emotionally, either with positive emotion or negative emotion?",
    "Does the ad give you a positive feeling about the brand?",
    "Does the ad have a story arc, with a beginning and an end?",
    "Does the ad have a reversal of fortune, where something changes for the better or worse?",
    "Does the ad have relatable characters?",
    "Is the ad creative/clever?",
    "Is the ad intended to be funny?",
    "Does this ad provide sensory stimulation?",
    "Is the ad visually pleasing?",
    "Does the ad have cute elements like animals, babies, animated characters, etc?"
]

question_embeddings = []
for question in questions:
    encoded_input = tokenizer(question, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**encoded_input)
        embeddings = output.last_hidden_state.mean(dim=1).squeeze().numpy()
    question_embeddings.append(embeddings)

# Convert to numpy array for consistency
question_embeddings = np.array(question_embeddings)

In [7]:
# Initialize lists to store evaluation results
speech_list = []
predicted_labels_list = []
recall_list = []
precision_list = []
f1_score_list = []
agreement_percentage_list = []
accuracy_list = []
roc_auc_list = []

In [8]:
# Initialize variable to accumulate total agreements
total_agreement = 0

# Iterate over each row in the dataframe
for index, row in df.iterrows():
    # Get user input text (Speech) and Labels
    user_input_text = row['Speech'] + row['description']
    labels = row['Labels']
    
    # Tokenize and encode the user input text
    encoded_user_input = tokenizer(user_input_text, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**encoded_user_input)
        user_input_embedding = output.last_hidden_state.mean(dim=1).squeeze().numpy()

    # Perform vector search using cosine similarity
    similarity_scores = cosine_similarity([user_input_embedding], question_embeddings).flatten()

    # Define a threshold for decision (you can adjust this based on your needs)
    threshold = 0.6

    # Determine answers based on similarity scores
    predicted_answers = []
    for score, question in zip(similarity_scores, questions):
        if score >= threshold:
            predicted_answers.append(1)  # YES
        else:
            predicted_answers.append(0)  # NO

  # Calculate evaluation metrics
    recall = recall_score(labels, predicted_answers, average='binary')
    precision = precision_score(labels, predicted_answers, average='binary')
    f1 = f1_score(labels, predicted_answers, average='binary')
    accuracy = accuracy_score(labels, predicted_answers)
    roc_auc = roc_auc_score(labels, predicted_answers)

    # Append results to lists
    speech_list.append(user_input_text)
    predicted_labels_list.append(predicted_answers)
    recall_list.append(recall)
    precision_list.append(precision)
    f1_score_list.append(f1)
    accuracy_list.append(accuracy)
    roc_auc_list.append(roc_auc)

    # Calculate agreement percentage
    agreement_count = sum([1 for true, pred in zip(labels, predicted_answers) if true == pred])
    total_agreement += agreement_count
    
    # Append agreement percentage to list
    agreement_percentage = (agreement_count / len(labels)) * 100
    agreement_percentage_list.append(agreement_percentage)


In [9]:
# Create a new dataframe with results
results_df = pd.DataFrame({
    'Speech': speech_list,
    'Predicted Labels': predicted_labels_list,
    'Recall': recall_list,
    'Precision': precision_list,
    'F1 Score': f1_score_list,
    'Accuracy': accuracy_list,
    'ROC AUC': roc_auc_list,
    'Agreement Percentage': agreement_percentage_list

})


In [10]:
results_df.head(6)

Unnamed: 0,Speech,Predicted Labels,Recall,Precision,F1 Score,Accuracy,ROC AUC,Agreement Percentage
0,It's another pure gray morning. Don't know wha...,"[1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, ...",0.6,0.428571,0.5,0.428571,0.436364,42.857143
1,The end of civilization is upon us. Hold your ...,"[1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.75,0.4,0.521739,0.47619,0.528846,47.619048
2,Audi presens can help prepare for and in some ...,"[1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.6,0.4,0.48,0.380952,0.390909,38.095238
3,The new Honda Odyssey has tons of available sm...,"[1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.727273,0.571429,0.64,0.571429,0.563636,57.142857
4,Hi guys. So this is the all new Chevy Equinox....,"[1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",0.6,0.428571,0.5,0.428571,0.436364,42.857143
5,I love you so much. We're going to be best fri...,"[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, ...",0.785714,0.647059,0.709677,0.571429,0.464286,57.142857


In [11]:
results_df['Agreement Percentage'].mean()

51.39682539682539

In [12]:
# results_df.to_clipboard()
results_df.shape

(150, 8)

In [13]:
label_df = pd.DataFrame(results_df['Predicted Labels'].tolist(), columns=questions)


In [14]:
score_df = pd.concat([results_df, label_df], axis=1)
score_df.shape


(150, 29)

In [14]:
# label_df.head()

In [17]:
# score_df.head(2)

In [15]:
score_df.to_excel('results/BERT_results_with_speech_and_description.xlsx', index=False)