# GPT API to generate meme description

In [None]:
!pip install openai==0.28

In [None]:
# Download the EXIST 2024 Memes Dataset
# Please manually download the dataset from the official source and place it in the project directory
# Expected file: EXIST_2024_Memes_Dataset.zip

In [None]:
!unzip EXIST_2024_Memes_Dataset.zip -d ./data/

In [None]:
# split the Spanish memes and English memes
import os
import shutil

# create two directories to store the memes
os.makedirs('./data/Spanish_memes', exist_ok=True)
os.makedirs('./data/English_memes', exist_ok=True)

memes_path = './data/EXIST 2024 Memes Dataset/training/memes'

for filename in os.listdir(memes_path):

    if filename.startswith('1'):

        shutil.copy(os.path.join(memes_path, filename), os.path.join('./data/Spanish_memes', filename))
    elif filename.startswith('2'):

        shutil.copy(os.path.join(memes_path, filename), os.path.join('./data/English_memes', filename))

In [None]:
# create two directories to store the memes
os.makedirs('./data/Spanish_memes_test', exist_ok=True)

memes_path = './data/EXIST 2024 Memes Dataset/test/memes'

for filename in os.listdir(memes_path):

    if filename.startswith('3'):

        shutil.copy(os.path.join(memes_path, filename), os.path.join('./data/Spanish_memes_test', filename))

In [None]:
import openai

In [None]:
# Replace with your actual OpenAI API key
API_key = 'YOUR_OPENAI_API_KEY_HERE'

In [None]:
openai.api_key = API_key

In [None]:
import base64
import requests

# encode the image from the local file
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def analyze_image_for_sexist_content(image_path, API_key):
    base64_image = encode_image(image_path)
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_key}"
    }
    if 'English' in image_path:
      payload = {
          "model": "gpt-4-turbo",
          "temperature": 0.75,
          "seed": 1234,
          "messages": [
                  {"role": "system",
                  "content": "You are an expert in identifying the presence of sexism within memes. Your task is to assess whether the content of a meme portrays or criticizes sexism through marginalization, prejudice against women based on gender stereotypes, beliefs in male superiority, or misogynistic views."},

                  {"role": "user",
                  "content": [
                      {
                          "type": "text",
                          "text": "Briefly describe and analyze the content of the meme in one sentence, then label it as 'YES' if the meme embodies or criticizes sexism, or 'NO' if it does not. Please use the following format: 1. Description: [Your description]. 2. Label: [YES/NO]."
                      },
                      {
                          "type": "image_url",
                          "image_url":
                              "url": f"data:image/jpeg;base64,{base64_image}",
                              "detail": "low"
                          }
                      }
                  ]
              }
          ],
      }
      response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    elif 'Spanish' in image_path:
      payload = {
        "model": "gpt-4-turbo",
        "temperature": 0.75,
        "seed": 4321,
        "messages": [
                {"role": "system",
                "content": "Eres un experto en identificar la presencia de sexismo en los memes. Tu tarea es evaluar si el contenido de un meme retrata o critica el sexismo a través de la marginación, prejuicios contra las mujeres basados en estereotipos de género, creencias en la superioridad masculina o vistas misóginas."},

                {"role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describa y analice brevemente el contenido del meme en una frase, luego etiquételo como 'YES' si el meme encarna o critica el sexismo, o 'NO' si no lo hace. Por favor, utilice el siguiente formato: 1. Description: [Tu descripción]. 2. Label: [YES/NO]."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "low"  # use low resolution to process the image
                        }
                    }
                ]
            }
        ],
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    return response.json()['choices'][0]['message']['content']


In [None]:
example_path = './data/Spanish_memes_test/310531.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
example_path = './data/Spanish_memes_test/310532.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
example_path = './data/Spanish_memes_test/310533.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
example_path = './data/Spanish_memes_test/310535.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
example_path = './data/Spanish_memes_test/310539.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
example_path = './data/Spanish_memes_test/310540.jpeg'
example_result = analyze_image_for_sexist_content(example_path, API_key)
example_result

In [None]:
parts = example_result.split('\n')
parts

In [None]:
len(parts)

In [None]:
import os
import pandas as pd

def create_df(path):
  images = [os.path.join(path, f) for f in os.listdir(path)]
  print(f"Total images: {len(images)}")

  sorted_images = sorted(images, key=lambda x: int(os.path.basename(x).split('.')[0]))

  answers = dict()
  results = []

  for image_path in sorted_images:
      try:
          response = analyze_image_for_sexist_content(image_path, API_key)
          print(os.path.basename(image_path), response)
          answers[os.path.basename(image_path)] = response
          if response:
              segments = response.split('\n')
              if len(segments) >= 2:
                  description = segments[0].replace('1. Description: ', '').strip()
                  label_info = segments[1].split(': ')
                  label = label_info[1].strip('.') if len(label_info) == 2 else 'Format error'
              else:
                  description = 'Response format error'
                  label = 'Unknown'
          else:
              description = 'No content returned'
              label = 'Unknown'
      except Exception as e:
          print(f"Error processing {image_path}: {str(e)}")
          description = 'Error in processing'
          label = 'Unknown'
      meme_id = os.path.basename(image_path)

      results.append({'id_EXIST': meme_id, 'description': description, 'reference': label})

  df = pd.DataFrame(results)
  return df, answers

In [None]:
# process English meme and Spanish meme seperately
english_path = "./data/English_memes/"
english_memes_df, english_answers = create_df(english_path)

In [None]:
spanish_test = "./data/Spanish_memes_test/"
spanish_memes_df_test, spanish_answers_test = create_df(spanish_test)

In [None]:
spanish_memes_df_test.set_index('id_EXIST', inplace=True)

file_path = './data/Spanish_Meme_GPT_Description_test.json'

spanish_memes_df_test.to_json(file_path, orient='index', indent=4)

print(f'Data saved to {file_path}')

In [None]:
import json

spanish_answers_test = {key: {"answer": value} for key, value in spanish_answers_test.items()}

with open('./data/Spanish_Meme_GPT_Initial_Answers_test.json', 'w') as json_file:
    json.dump(spanish_answers_test, json_file, indent=4)

In [None]:
# process English meme and Spanish meme seperately
spanish_path = "./data/Spanish_memes/"
spanish_memes_df, spanish_answers = create_df(spanish_path)

In [None]:
spanish_memes_df.set_index('id_EXIST', inplace=True)


file_path = './data/Spanish_Meme_GPT_Description.json'


spanish_memes_df.to_json(file_path, orient='index', indent=4)

print(f'Data saved to {file_path}')

In [None]:
import json

spanish_answers = {key: {"answer": value} for key, value in spanish_answers.items()}

with open('./data/Spanish_Meme_GPT_Initial_Answers.json', 'w') as json_file:
    json.dump(spanish_answers, json_file)

Process the test data and save it in json file

In [None]:
# create two directories to store the memes
os.makedirs('./data/Spanish_memes_test', exist_ok=True)
os.makedirs('./data/English_memes_test', exist_ok=True)

memes_path = './data/EXIST 2024 Memes Dataset/test/memes'
for filename in os.listdir(memes_path):

    if filename.startswith('3'):

        shutil.copy(os.path.join(memes_path, filename), os.path.join('./data/Spanish_memes_test', filename))
    elif filename.startswith('4'):

        shutil.copy(os.path.join(memes_path, filename), os.path.join('./data/English_memes_test', filename))

In [None]:
english_test_path = "./data/English_memes_test"
english_test_memes_df, english_test_answers = create_df(english_test_path)

In [None]:
english_test_memes_df.set_index('id_EXIST', inplace=True)

file_path = './data/English_Meme_GPT_Description_test.json'

english_test_memes_df.to_json(file_path, orient='index', indent=4)

print(f'Data saved to {file_path}')

In [None]:
import json
english_test_answers = {key: {"answer": value} for key, value in english_test_answers.items()}
with open('./data/English_Meme_GPT_Initial_Answers_test.json', 'w') as json_file:
    json.dump(english_test_answers, json_file)

In [None]:
# process English meme and Spanish meme seperately
spanish_test_path = "./data/English_memes_test"
spanish_test_memes_df, spanish_test_answers = create_df(spanish_test_path)

In [None]:
spanish_test_memes_df.set_index('id_EXIST', inplace=True)
file_path = './data/Spanish_Meme_GPT_Description_test.json'

spanish_test_memes_df.to_json(file_path, orient='index', indent=4)

print(f'Data saved to {file_path}')

In [None]:
import json

spanish_test_answers = {key: {"answer": value} for key, value in spanish_answers.items()}

with open('./data/Spanish_Meme_GPT_Initial_Answers_test.json', 'w') as json_file:
    json.dump(spanish_test_answers, json_file)