In [None]:
!pip install -q -U google-generativeai
!pip install pillow scikit-learn pickle  

In [None]:
import pathlib
import textwrap

import numpy as np
import pickle
import time

from IPython.display import display
from IPython.display import Markdown

from sklearn.metrics import roc_auc_score
import re

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

import google.generativeai as genai

### Download link for dataset
https://uark-my.sharepoint.com/:u:/g/personal/haovan_uark_edu/EU3T0FTfNTtBhKNNMo0ZHW0BEn1ZouV9wd1SrcSkW9csBw?e=MESUhd

In [None]:
genai.configure(api_key='<gemini-api-key>')

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

In [None]:
safety_settings = [{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                   {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, 
                   {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                   {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]

In [None]:
model = genai.GenerativeModel('gemini-1.5-flash-latest', safety_settings = safety_settings)

In [None]:
import PIL.Image
import json

with open("/hateful_memes/test_seen" + '.jsonl', 'r') as file:
    test_data_meta = list(file)

img_dir = '/hateful_memes/'

In [None]:
# prompt = lambda ocr_text: f"""Hatefulness definition: Hate speech is strictly defined as a direct or indirect attack on people based on characteristics, including ethnicity, race, nationality, immigration status, religion, caste, sex, gender identity, sexual orientation, disability, or disease. The definition of attack includes violent or dehumanizing speech, statements of inferiority, and calls for exclusion or segregation. Mocking hate crime is also considered hate speech.
# Classification criteria: A meme is considered hateful if either the image or text is hateful on its own. A meme is also considered hateful if both the image and text are not hateful individually, but together convey a hateful message. If neither the image nor text is hateful, and they don't create a hateful meaning together, the meme is not hateful.

# You consider visual and textual meanings to classify the meme as hateful or non-hateful based on the hatefulness definition and classification criteria.
# The text written in the image: "{ocr_text}"
# After giving a short explanation, provide the answer with following format:
# Classification:
# Probability of the meme being hateful (from 0 to 1):"""

In [None]:
prompt = lambda ocr_text: f"""Hatefulness definition: Hate speech is strictly defined as a direct or indirect attack on people based on characteristics, including ethnicity, race, nationality, immigration status, religion, caste, sex, gender identity, sexual orientation, disability, or disease. The definition of attack includes violent or dehumanizing speech, statements of inferiority, and calls for exclusion or segregation. Mocking hate crime is also considered hate speech.
Classification criteria: A meme is considered hateful if either the image or text is hateful on its own. A meme is also considered hateful if both the image and text are not hateful individually, but together convey a hateful message. If neither the image nor text is hateful, and they don't create a hateful meaning together, the meme is not hateful.

You consider visual and textual meanings to classify the meme as hateful or non-hateful based on the hatefulness definition and classification criteria.
After giving a short explanation, provide the answer with following format:
Classification:
Probability of the meme being hateful (from 0 to 1):"""

In [None]:
last_processed_idx = -1
unprocessed_ids = []
responses = []

In [None]:
for idx, json_str in enumerate(test_data_meta[:50]):
    if idx <= last_processed_idx:
        continue
    test_sample = json.loads(json_str)
    image_path = img_dir + test_sample['img']
    img = PIL.Image.open(image_path)
    print(f"Processing image: {test_sample['img']}, actual label: {test_sample['label']}")
    
    response = model.generate_content([img, prompt(test_sample['text'])])
    try:
        answer = response.text
    except Exception as e:
        unprocessed_ids.append(test_sample['img'])
        answer = "MODEL ERROR"
    responses.append(answer)
    last_processed_idx = idx
    print(answer)
    print()
    time.sleep(5)

In [None]:
print(f'Number of responses: {len(responses)}')
print(f'Number of failed responses: {len(unprocessed_ids)}')

In [None]:
from datetime import datetime
import pickle
now_time = datetime.now()

file_name  = f'results_gemini_seen_test_{now_time.strftime("%H_%M_%S__%d_%m_%Y")}.pkl'
with open(file_name, 'wb') as f:
    pickle.dump({"responses": responses, "unprocessed_ids": unprocessed_ids}, f)

In [None]:
with open(file_name, 'rb') as f:
    data = pickle.load(f)
    responses = data['responses']

In [None]:
actuals = []
predictions = []
prob_list = []
threshold = 0.5

for idx, json_str in enumerate(test_data_meta):
    test_sample = json.loads(json_str)
    actual = test_sample['label']
    actuals.append(actual)

    lower_response = responses[idx].lower()
    all_found_vals = re.findall("probability of the meme being hateful.*?\:.*?\n?[0-9]+\.?[0-9]*.*?", lower_response)
    
    if len(all_found_vals) == 0:
        hateful_keywords = ["classification: hateful", "classified as hateful"]
        print(f'>>>>> Could not find probability in response - Idx: {idx} - Res: {lower_response}')

        is_hateful = False
        for kw in hateful_keywords:
            if kw in lower_response:
                is_hateful = True
                break
        if is_hateful:
            predicted_class = 1
            prob = 1.0
        else:
            predicted_class = 0
            prob = 0.0
        print(f'Assigned probability: {prob} <<<<<\n\n' )
    else:
        num_str = re.sub('[^0-9\.]', '', (all_found_vals[0].split(" "))[-1])
        if num_str[-1] == '.':
            num_str = num_str[:-1]
        if float(num_str) >= threshold:
            predicted_class = 1
        else:
            predicted_class = 0
        prob = float(num_str)
    prob_list.append(prob)
    predictions.append(predicted_class)

In [None]:
acc = np.mean(np.array(actuals) == np.array(predictions))
print(f"Accuracy: {acc}")
print(f"AUROC: {roc_auc_score(actuals, prob_list)}")