# AdVision

In [1]:
import os
import csv
import cv2
from PIL import Image
import pytesseract
import pandas as pd
from collections import Counter
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer, BartForConditionalGeneration, BartTokenizer, pipeline
import torch
from sklearn.metrics import (precision_score, recall_score, f1_score, accuracy_score,
                             confusion_matrix, matthews_corrcoef)
import warnings


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

  warn(


## Part 1: Processing and extracting frames from the videos

Extracts 50 evenly spaced frames from each video file in a specified directory. It defines a function, extract_frames, which reads a video file, calculates the frame interval, and captures frames at regular intervals. The main script iterates through all .mp4 video files in a given directory, calls this function to extract frames, and then saves these frames as PNG images in a new subdirectory named after the video file.

In [2]:
# Function to extract 50 clear frames from a video file
def extract_frames(video_path, num_frames=50):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []
    
    # Calculate the interval between frames
    interval = max(1, total_frames // num_frames)
    
    for i in range(num_frames):
        # Set the position of the next frame to read
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * interval)
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    
    cap.release()
    return frames

# Directory with video files
video_dir = 'sample/sample/'
output_dir = 'extracted_frames/'

# Create a directory for extracted frames
os.makedirs(output_dir, exist_ok=True)

# Iterate over all video files in the directory
for video_file in os.listdir(video_dir):
    if video_file.endswith('.mp4'):  # Adjust the extension if needed
        video_path = os.path.join(video_dir, video_file)
        frames = extract_frames(video_path)
        
        # Save frames as images
        video_output_dir = os.path.join(output_dir, os.path.splitext(video_file)[0])
        os.makedirs(video_output_dir, exist_ok=True)
        for i, frame in enumerate(frames):
            frame_file = os.path.join(video_output_dir, f'frame_{i:04d}.png')
            cv2.imwrite(frame_file, frame)
        
        print(f"Extracted {len(frames)} frames from {video_file}")


Extracted 50 frames from 1471363.mp4
Extracted 50 frames from 1488315.mp4
Extracted 50 frames from 1526213.mp4
Extracted 50 frames from 1548815.mp4
Extracted 50 frames from 1624211.mp4
Extracted 50 frames from 1625396.mp4
Extracted 50 frames from 1641167.mp4
Extracted 50 frames from 1661301.mp4
Extracted 50 frames from 1667694.mp4
Extracted 50 frames from 1671240.mp4
Extracted 50 frames from 1671980.mp4
Extracted 50 frames from 1676138.mp4
Extracted 50 frames from 1678735.mp4
Extracted 50 frames from 1683011.mp4
Extracted 50 frames from 1696112.mp4
Extracted 50 frames from 1702594.mp4
Extracted 50 frames from 1702851.mp4
Extracted 50 frames from 1707220.mp4
Extracted 50 frames from 1708967.mp4
Extracted 50 frames from 1710855.mp4
Extracted 50 frames from 1710993.mp4
Extracted 50 frames from 1713794.mp4
Extracted 50 frames from 1723501.mp4
Extracted 50 frames from 1733728.mp4
Extracted 50 frames from 1739116.mp4
Extracted 50 frames from 1742915.mp4
Extracted 50 frames from 1744482.mp4
E

## Part 2: Captioning and Text Recognition from the indivisual frames

This code extracts captions and recognized text from image frames of multiple videos and saves the results to a CSV file. It begins by loading a pre-trained image captioning model, a feature extractor, and a tokenizer. It defines three functions: generate_caption to produce captions for frames using the model, recognize_text to extract text from frames using Tesseract OCR, and process_frames to handle frame processing and result collection. It then iterates through video folders, processes the frames, and writes the video ID, generated captions, and recognized text to a CSV file named captioned_frames.csv.

In [3]:
# Load the image captioning model
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to generate captions for a frame
def generate_caption(frame):
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    inputs = feature_extractor(images=image, return_tensors="pt")
    pixel_values = inputs.pixel_values.to(device)

    output_ids = model.generate(pixel_values, max_new_tokens=50)
    caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return caption

# Function to recognize text in a frame using Tesseract OCR
def recognize_text(frame):
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    text = pytesseract.image_to_string(rgb_frame)
    return text

# Function to process frames and generate captions and recognized text
def process_frames(frames_path):
    results = []

    for frame_file in sorted(os.listdir(frames_path)):
        if frame_file.endswith('.png'):  # Assuming frames are saved as PNG
            frame_path = os.path.join(frames_path, frame_file)
            frame = cv2.imread(frame_path)
            caption = generate_caption(frame)
            recognized_text = recognize_text(frame)
            results.append({
                "caption": caption,
                "recognized_text": recognized_text
            })

    return results

# Function to save the results to a CSV file
def save_results_to_csv(results, video_id, csv_writer):
    for result in results:
        csv_writer.writerow([video_id, result['caption'], result['recognized_text']])

# Directory with extracted frames
extracted_frames_dir = 'extracted_frames'
output_csv = 'captioned_frames.csv'

# Create the CSV file and write headers
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerow(['video_id', 'caption', 'recognized_text'])

    # Iterate over all video folders in the directory
    for video_id in os.listdir(extracted_frames_dir):
        video_frames_path = os.path.join(extracted_frames_dir, video_id)
        if os.path.isdir(video_frames_path):
            results = process_frames(video_frames_path)
            
            # Save the results to the CSV file
            save_results_to_csv(results, video_id, csv_writer)
            
            print(f"Processed and saved captions for video ID {video_id}")


config.json:   0%|          | 0.00/4.61k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Processed and saved captions for video ID 1471363
Processed and saved captions for video ID 1488315
Processed and saved captions for video ID 1526213
Processed and saved captions for video ID 1548815
Processed and saved captions for video ID 1624211
Processed and saved captions for video ID 1625396
Processed and saved captions for video ID 1641167
Processed and saved captions for video ID 1661301
Processed and saved captions for video ID 1667694
Processed and saved captions for video ID 1671240
Processed and saved captions for video ID 1671980
Processed and saved captions for video ID 1676138
Processed and saved captions for video ID 1678735
Processed and saved captions for video ID 1683011
Processed and saved captions for video ID 1696112
Processed and saved captions for video ID 1702594
Processed and saved captions for video ID 1702851
Processed and saved captions for video ID 1707220
Processed and saved captions for video ID 1708967
Processed and saved captions for video ID 1710855


## Part 3: Summarizing Individual Captions Generated and Texts from Frames and Joining Them with the Video Description and Audio Transcripts

This code generates summaries for combined captions and recognized text from video frames and saves the results to a new CSV file. It starts by loading a pre-trained BART model and tokenizer for text summarization. The generate_summary function processes text inputs to create concise summaries. The script reads an existing CSV file (captioned_frames.csv) containing captions and recognized text, groups the data by video_id, and combines the text for each video. It then generates two summaries per video: one for the combined captions and another for the combined recognized text. Finally, it saves the video IDs along with their summaries into a new CSV file named new.csv.

In [4]:
# Load pre-trained BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Function to generate summary
def generate_summary(text):
    # Check if the text is empty
    if not text.strip():
        return "No text available for summarization."
    
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# input CSV file and the output CSV file
input_csv_path = "captioned_frames.csv"
output_csv_path = "new.csv"

df = pd.read_csv(input_csv_path)

# Group the DataFrame by 'video_id' and combine 'caption' and 'recognized_text' for each group
grouped = df.groupby('video_id').agg({
    'caption': lambda x: ' '.join(x),
    'recognized_text': lambda x: ' '.join(x.dropna().fillna(''))  # Drop NaNs and replace empty entries with ''
}).reset_index()

# Generate summaries for each combined text
summaries_caption = []
summaries_text_recognition = []
video_ids = []
i=0
# Iterate over each row in the grouped DataFrame
for index, row in grouped.iterrows():
    video_id = row['video_id']
    combined_caption = row['caption']
    combined_text_recognition = row['recognized_text']
    
    # Generate summaries
    summary1 = generate_summary(combined_caption)
    summary2 = generate_summary(combined_text_recognition)
    
    # Append results to lists
    video_ids.append(video_id)
    summaries_caption.append(summary1)
    summaries_text_recognition.append(summary2)
    print('Processed for video:',i)
    i=i+1

# Create a new DataFrame with VideoID and Summary
summary_df = pd.DataFrame({
    'VideoID': video_ids,
    'Caption Summary': summaries_caption,
    'Text Summary': summaries_text_recognition
})

# Save the results to a new CSV file
summary_df.to_csv(output_csv_path, index=False)

print(f"Summaries have been saved to {output_csv_path}")



Processed for video: 0
Processed for video: 1
Processed for video: 2
Processed for video: 3
Processed for video: 4
Processed for video: 5
Processed for video: 6
Processed for video: 7
Processed for video: 8
Processed for video: 9
Processed for video: 10
Processed for video: 11
Processed for video: 12
Processed for video: 13
Processed for video: 14
Processed for video: 15
Processed for video: 16
Processed for video: 17
Processed for video: 18
Processed for video: 19
Processed for video: 20
Processed for video: 21
Processed for video: 22
Processed for video: 23
Processed for video: 24
Processed for video: 25
Processed for video: 26
Processed for video: 27
Processed for video: 28
Processed for video: 29
Processed for video: 30
Processed for video: 31
Processed for video: 32
Processed for video: 33
Processed for video: 34
Processed for video: 35
Processed for video: 36
Processed for video: 37
Processed for video: 38
Processed for video: 39
Processed for video: 40
Processed for video: 41
Pr

### Part 3-A: Join the Columns from Sample.csv to the Summarized Captions

In [5]:
data = pd.read_csv('Sample.csv')
# Merge with the description and speech DataFrames
summary_df = summary_df.merge(data[['creative_data_id', 'creative_data_description']], left_on='VideoID', right_on='creative_data_id', how='left')
summary_df = summary_df.merge(data[['creative_data_id', 'speech']], left_on='VideoID', right_on='creative_data_id', how='left')

# Drop the redundant 'video_id' columns from the merged DataFrames
summary_df.drop(columns=['creative_data_id_x', 'creative_data_id_y'], inplace=True)

# Rename columns for clarity
summary_df.rename(columns={'description': 'Description', 'speech': 'Speech'}, inplace=True)

# Save the results to a new CSV file
summary_df.to_csv(output_csv_path, index=False)

print(f"Summaries have been saved to {output_csv_path}")

Summaries have been saved to new.csv


## Part 4: Text Classification, Question Answering and converting Confidence score to Yes/No format

This code classifies text data from a CSV file using a zero-shot classification model and saves the results to two separate CSV files. It first reads a CSV file containing video IDs and various text summaries. For each video, it prepares a combined text from the video description, speech, caption summary, and text summary, and then applies a zero-shot classification model to answer a set of predefined questions about the content. The code initializes a BART-based classification pipeline, processes each videoâ€™s text to generate scores and labels for each question, and finally saves the classification results (scores and labels) to results_scores.csv and results_labeled.csv, respectively.

In [6]:
# Load a text classification model
classifier = pipeline('text-classification', model='distilbert-base-uncased-finetuned-sst-2-english')

# Define a function to classify text
def classify_text(text, question):
    prompt = f"{question}\n{text}"
    result = classifier(prompt)
    return result[0]['label']


In [7]:
# Suppress warnings
warnings.filterwarnings('ignore')

# Load your CSV file
df = pd.read_csv('new.csv')

# Prepare text for classification
data_for_classification = []
for index, row in df.iterrows():
    video_id = row['VideoID']
    description = row['creative_data_description']
    speech = row['Speech']
    caption_summary = row['Caption Summary']
    text_summary = row['Text Summary']

    text = f"Video Description: {description}\nSpeech: {speech}\nCaption Summary: {caption_summary}\nText Summary: {text_summary}"
    data_for_classification.append({'video_id': video_id, 'text': text})

# questions
questions = {
    'call_to_online': "Does the text contain a call to go online (e.g., shop online, visit the Web)?",
    'online_contact_info': "Does the text provide online contact information (e.g., URL, website)?",
    'visual_or_verbal_call_to_purchase': "Does the text include a visual or verbal call to purchase (e.g., buy now, order now)?",
    'urgency_to_act': "Does the text portray a sense of urgency to act (e.g., buy before sales end, order before it ends)?",
    'incentive_to_buy': "Does the text provide an incentive to buy (e.g., a discount, a coupon, a sale, or 'limited time offer')?",
    'offline_contact_info': "Does the text provide offline contact information (e.g., phone, mail, store location)?",
    'mention_of_something_free': "Does the text mention something free?",
    'specific_product_or_service': "Does the text mention at least one specific product or service (e.g., model, type, item)?",
    'mention_of_price': "Does the text mention a price?",
    'brand_shown_multiple_times': "Does the text indicate that the brand (logo, brand name) or trademark is shown multiple times?",
    'brand_shown_once_at_end': "Does the text indicate that the brand or trademark is shown exactly once at the end of the ad?",
    'emotional_intent': "Is the ad intended to affect the viewer emotionally, either with positive or negative emotions, based on the text?",
    'positive_feeling_about_brand': "Does the text give a positive feeling about the brand?",
    'story_arc': "Does the ad have a story arc, with a beginning and an end, based on the text?",
    'reversal_of_fortune': "Does the ad have a reversal of fortune, where something changes for the better or worse?",
    'relatable_characters': "Does the ad have relatable characters based on the text?",
    'creative_or_clever': "Is the ad creative or clever based on the text?",
    'funny_intent': "Is the ad intended to be funny based on the text?",
    'sensory_stimulation': "Does the ad provide sensory stimulation (e.g., cool visuals, arousing music, mouth-watering) based on the text?",
    'visually_pleasing': "Is the ad visually pleasing based on the text?",
    'cute_elements': "Does the ad have cute elements like animals, babies, animated characters, etc., based on the text?"
}

# Initialize classification pipeline
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')

# Initialize DataFrames for scores and labels
scores_df = pd.DataFrame()
labels_df = pd.DataFrame()

for data in data_for_classification:
    video_id = data['video_id']
    text = data['text']

    scores = {'video_id': video_id}
    labels = {'video_id': video_id}
    for question_key, question_text in questions.items():
        result = classifier(text, [question_text])
        # Extract the score
        score = result['scores'][0]
        scores[question_key] = score
        # Label score as 'Yes' or 'No'
        label = 'Yes' if score > 0.72 else 'No'
        labels[question_key] = label
    
    scores_df = scores_df.append(scores, ignore_index=True)
    labels_df = labels_df.append(labels, ignore_index=True)
    print('Processed for ', data['video_id'])

# Save scores and labels to separate CSV files
scores_df.to_csv('results_scores.csv', index=False)
labels_df.to_csv('results_labeled.csv', index=False)


Processed for  1471363
Processed for  1488315
Processed for  1526213
Processed for  1548815
Processed for  1624211
Processed for  1625396
Processed for  1641167
Processed for  1661301
Processed for  1667694
Processed for  1671240
Processed for  1671980
Processed for  1676138
Processed for  1678735
Processed for  1683011
Processed for  1696112
Processed for  1702594
Processed for  1702851
Processed for  1707220
Processed for  1708967
Processed for  1710855
Processed for  1710993
Processed for  1713794
Processed for  1723501
Processed for  1733728
Processed for  1739116
Processed for  1742915
Processed for  1744482
Processed for  1747914
Processed for  1749291
Processed for  1768584
Processed for  1776082
Processed for  1788954
Processed for  1825984
Processed for  1913310
Processed for  1913929
Processed for  1930720
Processed for  1934234
Processed for  1942611
Processed for  1942695
Processed for  1951792
Processed for  1958530
Processed for  1958838
Processed for  1959692
Processed f

## Part 5: Processing the Ground Truth Data

### Part 5-A: Removing Unecessary Columns

In [8]:
input_excel_path = "ground_truth.xlsx"  # Update this with the path to your Excel file
df = pd.read_excel(input_excel_path)
# Drop specific columns
columns_to_drop = ['Timestamp',
    'If "yes" to the above, which of the following emotions is closest to the emotion that the ad was intending the viewer to feel? (Select all that apply.)', 
                   'If yes to the above, did the ad successfully affect you emotionally, as intended?',
                   'If yes to the above, was the ad successfully funny, as intended?',
                   'Was there a famous person in this ad? ',
       'If yes to the above, write the name of the famous person, if known.',
       'What happened in this ad? (Answer in 2-3 sentences each)',
       'What was/were the company\'s goal(s) with this ad? Choose (potentially multiple) from:',
       'How successful was the ad in achieving its goal(s)?',
       'How much did you like the ad? (1. Strongly dislike, 2. Dislike, 3. Neither Like or Dislike, 4. Like, 5. Strongly Like)',
       'What was the slogan presented in the ad, if any?',
       'After addressing the specific survey items, write a general description of the ad. You can use answers to the questions above to formulate your answer. Your description should include:\nBrand and Product Identification: \nSpecify the brand and whether a product is being advertised. (1 sentence)\nVisual Elements: Describe what is seen on the screen, including setting, characters, and any text or graphics. (max 2 sentences)\nAuditory Elements: Note what is heard, such as dialogue, voice-over, music, or sound effects. (max 2 sentences)\n',
       'Any additional feedback or things we should be aware of? ',
       'Please enter the video identifier one more time (e.g. 123456789.mp4)'
                  ]  # Update this with the columns you want to drop
df.drop(columns=columns_to_drop, inplace=True)
print(df.columns)


Index(['creative_data_id',
       'Is there a call to go online (e.g., shop online, visit the Web)? ',
       'Is there online contact information provided (e.g., URL, website)? ',
       'Is there a visual or verbal call to purchase (e.g., buy now, order now)?',
       'Does the ad portray a sense of urgency to act (e.g., buy before sales ends, order before ends)? ',
       'Is there an incentive to buy (e.g., a discount, a coupon, a sale or "limited time offer")? ',
       'Is there offline contact information provided (e.g., phone, mail, store location)?',
       'Is there mention of something free? ',
       'Does the ad mention at least one specific product or service (e.g., model, type, item)? ',
       'Is there any verbal or visual mention of the price?',
       'Does the ad show the brand (logo, brand name) or trademark (something that most people know is the brand) multiple times?\n\nFor example, Nike ads often have the "swoosh" logo prominently displayed on shoes and apparel

### Part 5-B: Handeling values and Renaming of columns

In [9]:
# Get the list of remaining columns from the 2nd column onwards
remaining_columns = df.columns[1:]

# Create new column names
new_column_names = [f'question_{i+1}' for i in range(len(remaining_columns))]

# Rename the remaining columns
df.rename(columns=dict(zip(remaining_columns, new_column_names)), inplace=True)

# replcing values like 'Yes,Both', 'Yes, Visual' to simply 'Yes' and 'No'
def replace_values(cell):
    if cell.lower().startswith("yes"):
        return "Yes"
    elif cell.lower() == "no":
        return "No"
    return cell

# Apply the function to the relevant columns
for col in new_column_names:
    df[col] = df[col].apply(replace_values)

# Save the updated CSV
df.to_csv('updated_file.csv', index=False)

### Part 5-C: Assigning Values(Yes/No) based on majority vote 

In [10]:
for col in new_column_names:
    df[col] = df[col].apply(replace_values)

# Function to determine ground truth for each video
def determine_ground_truth(group):
    ground_truth = {}
    for col in new_column_names:
        votes = group[col].tolist()
        vote_count = Counter(votes)
        if vote_count['Yes'] > vote_count['No']:
            ground_truth[col] = 'Yes'
        elif vote_count['No'] > vote_count['Yes']:
            ground_truth[col] = 'No'
        else:
            ground_truth[col] = 'Yes'  # Tie resolved in favor of 'Yes'
    return pd.Series(ground_truth)

# Group by 'creative_data_id' and apply the ground truth determination
ground_truth_df = df.groupby('creative_data_id').apply(determine_ground_truth).reset_index()

# Save the updated CSV
ground_truth_df.to_csv('ground_truth_file.csv', index=False)

print(ground_truth_df)

     creative_data_id question_1 question_2 question_3 question_4 question_5  \
0             1471363         No        Yes         No         No         No   
1             1488315         No         No         No         No         No   
2             1526213         No         No         No         No        Yes   
3             1548815         No         No         No         No         No   
4             1624211         No        Yes         No         No         No   
..                ...        ...        ...        ...        ...        ...   
145           3351059         No         No        Yes         No        Yes   
146           3361032         No         No         No         No         No   
147           3414303         No         No         No         No         No   
148           3415261         No        Yes         No         No         No   
149           3422482         No         No        Yes        Yes        Yes   

    question_6 question_7 question_8 qu

## Part 6 : Evaluation of Classification Model

### Part 6-A: Overall Model Evaluation

Evaluates the performance of a zero-shot classification model by comparing its predicted answers against ground truth values. It starts by loading predicted and ground truth CSV files, renaming columns for consistency, and ensuring that the video IDs and columns match between the two datasets. The script then compares predictions and true values for each question, calculating metrics such as precision, recall, F1 score, accuracy, specificity, NPV (negative predictive value), FPR (false positive rate), FNR (false negative rate), and MCC (Matthews correlation coefficient). It also computes the percentage of agreements between predictions and ground truth. Finally, it prints out these evaluation metrics.

In [11]:
# Load the predicted answers and ground truth values
predicted_df = pd.read_csv('results_labeled.csv')
ground_truth_df = pd.read_csv('ground_truth_file.csv')

# Rename the ID column in ground_truth_df to match predicted_df
ground_truth_df.rename(columns={'creative_data_id': 'video_id'}, inplace=True)

# Get the list of remaining columns from the 2nd column onwards
remaining_columns = predicted_df.columns[1:]

# Create new column names for the predicted DataFrame
new_column_names = [f'question_{i+1}' for i in range(len(remaining_columns))]

# Rename the remaining columns in the predicted DataFrame
predicted_df.rename(columns=dict(zip(remaining_columns, new_column_names)), inplace=True)

# Rename the corresponding columns in the ground truth DataFrame
ground_truth_df.rename(columns=dict(zip(remaining_columns, new_column_names)), inplace=True)
assert (predicted_df.columns == ground_truth_df.columns).all(), "Columns do not match"
assert (predicted_df['video_id'] == ground_truth_df['video_id']).all(), "Video IDs do not match"

# Extract the question columns, skipping the 'video_id' column
questions = predicted_df.columns[1:]  # All columns except 'video_id'

# Initialize lists to store all the predictions and true values
all_y_pred = []
all_y_true = []
all_agreements = 0  # Initialize a counter for agreements

# Iterate over each question column
for question in questions:
    # Extract the predicted and true values for the current question
    y_pred = predicted_df[question].map({'Yes': 1, 'No': 0}).values
    y_true = ground_truth_df[question].map({'Yes': 1, 'No': 0}).values
    
    # Calculate the number of agreements for this question
    agreements = (y_pred == y_true).sum()
    all_agreements += agreements
    
    # Append the values to the overall lists
    all_y_pred.extend(y_pred)
    all_y_true.extend(y_true)

# Calculate the overall metrics
overall_precision = precision_score(all_y_true, all_y_pred)
overall_recall = recall_score(all_y_true, all_y_pred)
overall_f1 = f1_score(all_y_true, all_y_pred)
overall_accuracy = accuracy_score(all_y_true, all_y_pred)
conf_matrix = confusion_matrix(all_y_true, all_y_pred)
tn, fp, fn, tp = conf_matrix.ravel()
overall_specificity = tn / (tn + fp)
overall_npv = tn / (tn + fn)
overall_fpr = fp / (fp + tn)
overall_fnr = fn / (fn + tp)
overall_mcc = matthews_corrcoef(all_y_true, all_y_pred)

# Calculate the agreement percentage
total_comparisons = len(all_y_pred)  # Total number of comparisons (predictions)
agreement_percentage = (all_agreements / total_comparisons) * 100  # Convert to percentage

# Print the overall scores
print(f"Overall Precision: {overall_precision:.2f}")
print(f"Overall Recall: {overall_recall:.2f}")
print(f"Overall F1 Score: {overall_f1:.2f}")
print(f"Overall Accuracy: {overall_accuracy:.2f}")
print(f"Overall Specificity: {overall_specificity:.2f}")
print(f"Overall NPV: {overall_npv:.2f}")
print(f"Overall FPR: {overall_fpr:.2f}")
print(f"Overall FNR: {overall_fnr:.2f}")
print(f"Overall MCC: {overall_mcc:.2f}")
print(f"Agreement Percentage: {agreement_percentage:.2f}%")


Overall Precision: 0.59
Overall Recall: 0.83
Overall F1 Score: 0.69
Overall Accuracy: 0.64
Overall Specificity: 0.46
Overall NPV: 0.75
Overall FPR: 0.54
Overall FNR: 0.17
Overall MCC: 0.32
Agreement Percentage: 63.90%


### Part 6-B: Questionwise Evaluation

In [13]:
# Load the predicted answers and ground truth values
predicted_df = pd.read_csv('results_labeled.csv')
ground_truth_df = pd.read_csv('ground_truth_file.csv')

# Rename the ID column in ground_truth_df to match predicted_df
ground_truth_df.rename(columns={'creative_data_id': 'video_id'}, inplace=True)

# Get the list of remaining columns from the 2nd column onwards
remaining_columns = predicted_df.columns[1:]

# Create new column names for the predicted DataFrame
new_column_names = [f'question_{i+1}' for i in range(len(remaining_columns))]

# Rename the remaining columns in the predicted DataFrame
predicted_df.rename(columns=dict(zip(remaining_columns, new_column_names)), inplace=True)

# Rename the corresponding columns in the ground truth DataFrame
ground_truth_df.rename(columns=dict(zip(remaining_columns, new_column_names)), inplace=True)
assert (predicted_df.columns == ground_truth_df.columns).all(), "Columns do not match"
assert (predicted_df['video_id'] == ground_truth_df['video_id']).all(), "Video IDs do not match"

# Extract the question columns, skipping the 'video_id' column
questions = predicted_df.columns[1:]  # All columns except 'video_id'

# Define the list of questions corresponding to the columns
questions_list = [
    "Is there a call to go online (e.g., shop online, visit the Web)?",
    "Is online contact information provided (e.g., URL, website)?",
    "Is there a visual or verbal call to purchase (e.g., buy now, order now)?",
    "Does the ad portray a sense of urgency to act (e.g., buy before sales end, order before it ends)?",
    "Is there an incentive to buy (e.g., a discount, a coupon, a sale, or 'limited time offer')?",
    "Is offline contact information provided (e.g., phone, mail, store location)?",
    "Is there mention of something free?",
    "Does the ad mention at least one specific product or service (e.g., model, type, item)?",
    "Is there any verbal or visual mention of the price?",
    "Does the ad show the brand (logo, brand name) or trademark (something that most people know is the brand) multiple times?",
    "Does the ad show the brand or trademark exactly once at the end of the ad?",
    "Is the ad intended to affect the viewer emotionally, either with positive emotion (fun, joy), negative emotion (sad, anxious) or another type of emotion? (Note: You may not personally agree, but assess if that was the intention.)",
    "Does the ad give you a positive feeling about the brand?",
    "Does the ad have a story arc, with a beginning and an end?",
    "Does the ad have a reversal of fortune, where something changes for the better, or changes for the worse?",
    "Does the ad have relatable characters?",
    "Is the ad creative/clever?",
    "Is the ad intended to be funny? (Note: You may not personally agree, but assess if that was the intention.)",
    "Does this ad provide sensory stimulation (e.g., cool visuals, arousing music, mouth-watering)?",
    "Is the ad visually pleasing?",
    "Does the ad have cute elements like animals, babies, animated characters, etc?"
]

# Initialize variables for overall metrics
total_agreements = 0
total_comparisons = 0

# Iterate over each question column
for i, question in enumerate(questions):
    # Extract the predicted and true values for the current question
    y_pred = predicted_df[question].map({'Yes': 1, 'No': 0}).values
    y_true = ground_truth_df[question].map({'Yes': 1, 'No': 0}).values
    
    # Calculate metrics for the current question
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    # Calculate the number of agreements for the current question
    agreements = (y_pred == y_true).sum()
    total_agreements += agreements
    total_comparisons += len(y_pred)
    
    # Print the scores for the current question
    print(f"Metrics for Question {i+1}: {questions_list[i]}")
    print(f"  Precision: {precision:.2f}")
    print(f"  Recall: {recall:.2f}")
    print(f"  F1 Score: {f1:.2f}")
    print(f"  Agreement Percentage: {(agreements / len(y_pred)) * 100:.2f}%")
    print()

# Calculate the overall agreement percentage
overall_agreement_percentage = (total_agreements / total_comparisons) * 100

# Print the overall agreement percentage
print(f"Overall Agreement Percentage: {overall_agreement_percentage:.2f}%")


Metrics for Question 1: Is there a call to go online (e.g., shop online, visit the Web)?
  Precision: 0.43
  Recall: 0.60
  F1 Score: 0.50
  Agreement Percentage: 65.33%

Metrics for Question 2: Is online contact information provided (e.g., URL, website)?
  Precision: 0.55
  Recall: 0.44
  F1 Score: 0.49
  Agreement Percentage: 58.00%

Metrics for Question 3: Is there a visual or verbal call to purchase (e.g., buy now, order now)?
  Precision: 0.40
  Recall: 0.93
  F1 Score: 0.56
  Agreement Percentage: 46.00%

Metrics for Question 4: Does the ad portray a sense of urgency to act (e.g., buy before sales end, order before it ends)?
  Precision: 0.39
  Recall: 0.55
  F1 Score: 0.45
  Agreement Percentage: 64.67%

Metrics for Question 5: Is there an incentive to buy (e.g., a discount, a coupon, a sale, or 'limited time offer')?
  Precision: 0.57
  Recall: 0.92
  F1 Score: 0.71
  Agreement Percentage: 66.00%

Metrics for Question 6: Is offline contact information provided (e.g., phone, mai

In [19]:
import pandas as pd

# Load your CSV file
df = pd.read_csv('results_labeled.csv')

# Rename specific columns
df.rename(columns={
    df.columns[0]: 'creative_data_id',
    df.columns[1]: 'Is there a call to go online (e.g., shop online, visit the Web)?',
    df.columns[2]: 'Is online contact information provided (e.g., URL, website)?',
    df.columns[3]: 'Is there a visual or verbal call to purchase (e.g., buy now, order now)?',
    df.columns[4]: 'Does the ad portray a sense of urgency to act (e.g., buy before sales end, order before it ends)?',
    df.columns[5]: 'Is there an incentive to buy (e.g., a discount, a coupon, a sale, or "limited time offer")?',
    df.columns[6]: 'Is offline contact information provided (e.g., phone, mail, store location)?',
    df.columns[7]: 'Is there mention of something free?',
    df.columns[8]: 'Does the ad mention at least one specific product or service (e.g., model, type, item)?',
    df.columns[9]: 'Is there any verbal or visual mention of the price?',
    df.columns[10]: 'Does the ad show the brand (logo, brand name) or trademark (something that most people know is the brand) multiple times?',
    df.columns[11]: 'Does the ad show the brand or trademark exactly once at the end of the ad?',
    df.columns[12]: 'Is the ad intended to affect the viewer emotionally, either with positive emotion (fun, joy), negative emotion (sad, anxious) or another type of emotion? (Note: You may not personally agree, but assess if that was the intention.)',
    df.columns[13]: 'Does the ad give you a positive feeling about the brand?',
    df.columns[14]: 'Does the ad have a story arc, with a beginning and an end?',
    df.columns[15]: 'Does the ad have a reversal of fortune, where something changes for the better, or changes for the worse?',
    df.columns[16]: 'Does the ad have relatable characters?',
    df.columns[17]: 'Is the ad creative/clever?',
    df.columns[18]: 'Is the ad intended to be funny? (Note: You may not personally agree, but assess if that was the intention.)',
    df.columns[19]: 'Does this ad provide sensory stimulation (e.g., cool visuals, arousing music, mouth-watering)?',
    df.columns[20]: 'Is the ad visually pleasing?',
    df.columns[21]: 'Does the ad have cute elements like animals, babies, animated characters, etc?'
}, inplace=True)

# Save the changes to a new CSV file
df.to_csv('raut.kar_answers.csv', index=False)

