# Install dependencies

In [None]:
import os
import google.generativeai as genai
import re
import requests
import torch
import json
from transformers import AutoProcessor, AutoModelForCausalLM
import time
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Define variables to deal with Gemini's request time limit

In [None]:
from datetime import datetime

global TIME_LAST_REQUEST
global TIME_BETWEEN_REQUESTS

TIME_LAST_REQUEST = datetime.now()
TIME_BETWEEN_REQUESTS = 4.5 #seconds
# Gemini 1.5 Flash -> 15 RPM (1 req. each 4 s)

# Gemini queries

In [None]:
def load_llm():

  global large_language_model

  genai.configure(api_key="AIzaSyB1vpKTeuUfYr5Ad3OqUxvJy6vAPA-4SCc")
  large_language_model = genai.GenerativeModel("gemini-1.5-flash")

load_llm()

In [None]:
def general_prompt(descriptions):
    # Join the descriptions into a single string
    descriptions_text = "\n".join([f"- {desc}" for desc in descriptions])

    # Define the prompt template
    prompt_template = (
        "A 'Meme' is a representation of a shared thought or idea, expressed through a combination of image and text, "
        "that encapsulates a relatable, humorous, or culturally significant situation. It serves as a 'Template' for creating variations, "
        "called 'Domain Memes', that convey specific instances of the broader concept, enabling collective understanding, communication, and humor.\n\n"
        "I have the descriptions of several Domain Memes of the same Meme:\n\n"
        "{descriptions}\n\n"
        "Based on the descriptions, examples, and keywords, identify the unifying concept ('Meme') that connects these 'Domain Memes'. "
        "The 'Meme' should capture the overarching theme or idea that applies to all the 'Domain Memes', highlighting their shared emotional tone, "
        "social context, or cultural relevance. Focus on the meaning, ignore the name of the 'Meme' and its description. "
    )

    # Insert the descriptions into the template
    return prompt_template.format(descriptions=descriptions_text)

In [None]:
def domain_memes_to_meme(prompt):

    global TIME_LAST_REQUEST, TIME_BETWEEN_REQUESTS

    current_time = datetime.now()
    time_from_last_request = current_time - TIME_LAST_REQUEST

    if time_from_last_request.seconds < TIME_BETWEEN_REQUESTS:
        time_to_wait = TIME_BETWEEN_REQUESTS - time_from_last_request.seconds
        time.sleep(time_to_wait)

    TIME_LAST_REQUEST = datetime.now()

    response = large_language_model.generate_content(
    prompt,
    generation_config=genai.types.GenerationConfig(
        temperature=0.5,
        )
    )

    return response.text

In [None]:
def get_thought(meme):

  global TIME_LAST_REQUEST, TIME_BETWEEN_REQUESTS

  current_time = datetime.now()
  time_from_last_request = current_time - TIME_LAST_REQUEST

  if time_from_last_request.seconds < TIME_BETWEEN_REQUESTS:
    time_to_wait = TIME_BETWEEN_REQUESTS - time_from_last_request.seconds
    time.sleep(time_to_wait)

  TIME_LAST_REQUEST = datetime.now()

  summarization = large_language_model.generate_content(
    f"Get the thought behind this meme in a triplet of 3 emotions: {meme}. Ignore concepts which are by nature associated with any meme (for example Irony, Humor, Relatability). The structure of the answer must be: Word1, Word2, Word3",
    generation_config=genai.types.GenerationConfig(
        #candidate_count=1,
        #stop_sequences=["x"],
        #max_output_tokens=100,
        temperature=1.5,
        )
    )
  return summarization.text

# Group by Meme and Language, storing Domain Memes in a list

In [None]:
results_path = "results.json"

with open(results_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

grouped_data = {}
for item in data:
    meme_name = item.get("Meme name")
    language = item.get("Language")
    explanation = item.get("Explaination")

    # Create nested dictionary structure for grouping
    if meme_name not in grouped_data:
        grouped_data[meme_name] = {}
    if language not in grouped_data[meme_name]:
        grouped_data[meme_name][language] = []

    # Add explanation to the list
    grouped_data[meme_name][language].append(explanation)

grouped_results_path = "grouped_results.json"
with open(grouped_results_path, "w", encoding="utf-8") as json_file:
    json.dump(grouped_data, json_file, ensure_ascii=False, indent=4)
print(f"Results saved to {grouped_results_path}")

Results saved to grouped_results.json


# Retrieve the Thought for each Meme and Language

In [None]:
instances_langauges_grouped = []
columns = ['Meme name', 'Language', 'Meme', 'Thought']

for meme in grouped_data:
  for language in grouped_data[meme]:
    explainations = grouped_data[meme][language]
    prompt = general_prompt(explainations)
    dm_to_meme = domain_memes_to_meme(prompt)
    thought = get_thought(dm_to_meme)
    instances_langauges_grouped.append([meme, language, dm_to_meme, thought])

df = pd.DataFrame(instances_langauges_grouped, columns=columns)

In [None]:
df.to_csv('memes_thoughts.csv', index=False)