## 1 - Install Packages and Setup Env

In [None]:
!pip install transformers accelerate langchain sentencepiece requests pillow
!pip install git+https://github.com/huggingface/transformers.git
!pip install duckduckgo-search

# (BLIP-2 requires timm)
!pip install timm


In [None]:
!pip install langchain langchain-community

In [None]:
!rm -rf ~/.cache/huggingface
!pip install --upgrade huggingface_hub transformers accelerate

In [None]:
import requests
import re, random

In [None]:
# !pip install huggingface_hub
from huggingface_hub import login
login("XXXXXXXXXXXXXX")   # Replace with your actual token

## 2 - Install Models

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import HuggingFacePipeline   

# model_name = "HuggingFaceH4/zephyr-7b-alpha"   
model_name = "TheBloke/vicuna-7B-1.1-HF"  
# model_name = "google/gemma-2b"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
llm = HuggingFacePipeline(pipeline=pipe)


In [None]:
IMGFLIP_USERNAME = "your_username"   # Imgflip username here
IMGFLIP_PASSWORD = "your_password"     # Imgflip password here

def search_template(keyword):
    """Search meme template from IMGFlip"""
    r = requests.get("https://api.imgflip.com/get_memes")
    memes = r.json()["data"]["memes"]
    matches = [m for m in memes if keyword.lower() in m["name"].lower()]
    return matches[0] if matches else memes[0]

def generate_meme(template_id, top_text, bottom_text):
    """Generates meme with selected template"""
    payload = {
        "template_id": template_id,
        "username": IMGFLIP_USERNAME,
        "password": IMGFLIP_PASSWORD,
        "text0": top_text,
        "text1": bottom_text
    }
    r = requests.post("https://api.imgflip.com/caption_image", data=payload)
    return r.json()["data"]["url"]


## 3 - Set Tools and Create Agent

In [None]:
from langchain.agents import initialize_agent, Tool
tools = [
     Tool(
          name="SearchTemplate",
          func=search_template,
          description="Returns an Imgflip meme template based on the searched keyword."
     ),
     Tool(
          name="GenerateMeme",
          func=generate_meme,
          description="Takes Template ID, Top Text, and Bottom Text, and returns a meme URL."
     )
]


In [None]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    tools,
    llm,
    agent="zero-shot-react-description",
    verbose=True
)


In [None]:
# from transformers import Blip2Processor, Blip2ForConditionalGeneration
# from PIL import Image
# import torch

# processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
# blip_model = Blip2ForConditionalGeneration.from_pretrained(
#     "Salesforce/blip2-flan-t5-xl",
#     torch_dtype=torch.float16,
#     device_map={"": "cpu"}   # Use "cuda" if you have a GPU available, otherwise use "cpu" for CPU inference
# )

## 4 - Meme describer functions

In [None]:
# Function that generates a detailed meme description
def describe_image(image_url, style=None):
    if style is None:
        style = (
            "Look carefully at the image and describe exactly what is happening. "
            "Mention the characters, their facial expressions, emotions, and actions. "
            "Do NOT explain what a meme is or talk about memes in general. "
            "Be specific about who is doing what."
        )

    raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')

    inputs = processor(raw_image, style, return_tensors="pt").to("cpu")

    out = blip_model.generate(**inputs, max_new_tokens=400)
    caption = processor.decode(out[0], skip_special_tokens=True)

    return caption

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cpu")

def get_base_caption(image_url):
    img = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
    inputs = processor(img, return_tensors="pt").to("cpu")
    out = blip_model.generate(**inputs, max_length=60)
    return processor.decode(out[0], skip_special_tokens=True)

In [30]:
def expand_caption_with_llm(base_caption, llm):
    prompt = f"""
      Short image caption: "{base_caption}"

      Rewrite this into a **detailed scene description** for someone who cannot see the image.

      STRICT RULES:
      - DO NOT mention memes or say "this is a meme".
      - DO NOT write code, DO NOT include functions, DO NOT import anything.
      - Only describe what is visually present.
      - Mention if there are panels (left side vs right side).
      - Describe what each person or animal is doing.
      - Include facial expressions and emotions (angry, confused, smug, etc.).
      - Mention animals, objects, and their positions.
      - Only return one clean paragraph in plain English.

      ONLY return the scene description. NOTHING else.
      """
    return llm(prompt, temperature=0.4)

In [None]:
def describe_image2(image_url, llm):
    # 1️- Light caption
    short_caption = get_base_caption(image_url)
    # 2️- detail caption with LLM
    detailed_caption = expand_caption_with_llm(short_caption, llm)
    return detailed_caption


## 5- Caption Parsers

In [None]:
def extract_top_bottom(text):
    # Find all top and bottom candidates
    all_tops = re.findall(r"Top text:\s*(.+)", text)
    all_bottoms = re.findall(r"Bottom text:\s*(.+)", text)

    if all_tops and all_bottoms:
        # Pick the LAST pair (most likely to be clean)
        top = all_tops[-1].strip()
        bottom = all_bottoms[-1].strip()

        # 1 Remove leading/trailing quotes (single or double)
        top = re.sub(r'^[\'"]+|[\'"]+$', '', top)
        bottom = re.sub(r'^[\'"]+|[\'"]+$', '', bottom)

        # 2 Remove talker prefixes like Me:, Cat:, John:
        top = re.sub(r'^\s*\w+\s*:\s*', '', top)
        bottom = re.sub(r'^\s*\w+\s*:\s*', '', bottom)

        # 3 Strip again for safety
        return top.strip(), bottom.strip()

    return None, None


In [None]:
def generate_clean_captions(llm, prompt, max_retries=3):
    """
    Generates a meme caption using the LLM, extracts the top and bottom text with extract_top_bottom,
    and retries if the top/bottom text contains rule text or unnecessary parts.
    """

    banned_fragments = [
        "MUST", "RULES", "DO NOT", "Top text", "Bottom text",
        "line", "Only return", "STRICT", "Example",
        "First line", "Second line"
    ]

    def is_bad_caption(text):
        return any(bad in text for bad in banned_fragments)

    for attempt in range(max_retries):
        meme_text = llm(prompt, temperature=0.95, top_p=0.95)
        print(f"+++ Model Output (Attempt {attempt+1}): {meme_text}")

        top, bottom = extract_top_bottom(meme_text)

        # If there is no top/bottom text or if it contains parts of the prompt, retry
        if (not top or not bottom or is_bad_caption(top) or is_bad_caption(bottom)):
            print(f"+++ Bad caption detected → Retrying... ({attempt+1}/{max_retries})")
            continue
        else:
            print(f"+++ Clean Caption Found!\nTop: {top}\nBottom: {bottom}")
            return top, bottom

    print("+++ Could not generate a clean meme caption after retries.")
    return None, None


## 6 - Agent

In [None]:
NUM_MEMES = 5

def meme_agent(keyword, retry_limit=3, num_memes=1):
  meme_urls = []
  for MEME_NUM in range(num_memes):
    print(f"--- Meme {MEME_NUM+1} / {num_memes}")
    for attempt in range(retry_limit):
        print(f"--- Attempt {attempt+1} / {retry_limit}")

        # Select a meme template based on the keyword
        template = search_template(keyword)
        print(f"--> Selected Template: {template['name']}")

        # BLIP caption
        template_url = template["url"]

        # Change the prompt style if needed
        # prompt_style = "Describe this meme in a sarcastic and funny way, focusing on the characters and the situation."

        base_caption = get_base_caption(template_url)
        print("--- base caption:", base_caption)
        caption_v2 = expand_caption_with_llm(base_caption, llm)
        print("--- caption_v2:", caption_v2)
        image_caption = describe_image2(template_url, llm)
        print("--- image_caption:", image_caption)


        # Random select a style hint for the meme
        style_hint = random.choice([
        "Make it sarcastic",
        "Make it absurd",
        "Make it dark humor",
        "Make it wholesome but funny",
        "Make it chaotic and silly"
        ])

        prompt = f"""
        You are a witty meme creator. {style_hint}.

        Image description: "{image_caption}"
        Template name: '{template['name']}'

        Write ONE funny meme about '{keyword}' using this image and template.

        RULES:
        - Only return exactly TWO lines.
        - First line MUST start with: Top text:
        - Second line MUST start with: Bottom text:
        - No explanations, no code, no HTML, no hashtags.
        - No quotes around the sentences.
        - Do NOT copy the example.

        EXAMPLE (do not copy!):
        Top text: When Monday hits too hard
        Bottom text: And coffee hasn’t kicked in yet

        Now write your own meme in that exact format.
        """

        # Increase diversity with temperature and top_p
        meme_text = llm(prompt, temperature=0.95, top_p=0.95)
        print("--> Model Output:", meme_text)

        top, bottom = extract_top_bottom(meme_text)

        if not top or not bottom:
            print("--> Couldn’t parse Top/Bottom text (model gave junk). Retrying…")
            continue

        print(f"--> Final Top text: {top}")
        print(f"--> Final Bottom text: {bottom}")

        # Generate the meme using the Imgflip API
        meme_url = generate_meme(template["id"], top, bottom)
        meme_urls.append(meme_url)
        print("Meme URL:", meme_url)

        # Calculate humor score
        score_prompt = f"""You wrote these meme lines:

        Top text: "{top}"
        Bottom text: "{bottom}"

        How funny and fitting are these two lines together as a meme, on a scale from 1 (not funny at all) to 10 (extremely funny)?
        Only reply with the number score."""

        score_text = llm(score_prompt, temperature=0.3)
        print("--> Raw humor score response:", score_text)

        matches = re.findall(r"\b([1-9]|10)\b", score_text)
        score = int(matches[-1]) if matches else 0

        print(f"--> Final Score: {score}")

        if score >= 7:
            print("--> A funny meme has been found!\n")
            break  # 
        else:
            print("--> The meme isn’t funny enough, trying again...\n")

    print("--> No funny meme could be generated.\n")

  return meme_urls



In [None]:
NUM_MEMES = 5
meme_links = meme_agent("cat", retry_limit=3, num_memes=NUM_MEMES)


In [None]:
for memelink in meme_links:
  print(memelink)

In [None]:
# import gc
# gc.collect()
# torch.cuda.empty_cache()