In [3]:
!pip3 install -q -U google-generativeai

In [7]:
from dotenv import load_dotenv
import os
load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [8]:
import google.generativeai as genai
from IPython.display import Image, display
import PIL.Image

genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

### Prompts for few shot prompting

In [22]:
prompt1 = """
You are given a pair of text and image. Your task is to determine whether sarcasm is present using the following guidelines:

1. First, ignore the image and analyze the text alone to see if it is sarcastic.
2. Next, ignore the text and analyze the image alone to determine if it conveys sarcasm.
3. Finally, consider the text and image together to see if they reinforce each other to create an overall sarcastic impression.

Use 1 to indicate sarcasm and 0 if no sarcasm is detected.

Here are a few examples:
"""

prompt2 = """
You are given a pair consisting of a text and an image. The text can be of two types:
1. Internal Text: Text embedded within the image, considered as part of the image.
2. External Text: Separate text that accompanies the image but is independent of it.

Sarcasm in External Text:
    - The external text may be sarcastic on its own, without any influence from the image (with or without internal text).
    - Alternatively, the sarcastic nature of the external text may only become apparent when combined with the image (with or without internal text).
Sarcasm in the Image:
    - The image (with or without internal text) may be independently sarcastic, without any influence from the external text.
    - Alternatively, the sarcastic nature of the image (with or without internal text) may only become apparent when combined with the external text.
Sarcasm in the (Text, Image) Pair:
    - Consider both the external text, the internal text (if any), and the image together to determine if they collectively create a sarcastic impression.

Your task is to determine whether sarcasm is present using the following guidelines:

1. First, ignore the image(and any internal text) and analyze the external text alone to see if it is sarcastic.
2. Next, ignore the external text and analyze the image(with or without internal text) alone to determine if it conveys sarcasm.
3. Finally, consider the external text, internal text (if any), and the image together to see if they reinforce each other to create an overall sarcastic impression.

Use 1 to indicate sarcasm and 0 if no sarcasm is detected.

Here are a few examples:
"""

prompt3 = """
You are given a pair of text and image. Your task is to determine whether sarcasm is present using the following guidelines:

1. First, ignore the image and analyze the text alone to see if it is sarcastic.
2. Next, ignore the text and analyze the image alone to determine if it conveys sarcasm.
3. Finally, consider the text and image together to see if they reinforce each other to create an overall sarcastic impression.

Use 1 to indicate sarcasm and 0 if no sarcasm is detected.

Possible Cases:
Case 1: Text is sarcastic, Image is sarcastic, Text-Image combination is sarcastic.
{"isTextSarcastic?": 1, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}

Case 2: Text is not sarcastic, Image is sarcastic, Text-Image combination is sarcastic.
{"isTextSarcastic?": 0, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}

Case 3: Text is sarcastic, Image is not sarcastic, Text-Image combination is sarcastic.
{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

Case 4: Text is not sarcastic, Image is not sarcastic, Text-Image combination is sarcastic.
{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

Case 5: Text is not sarcastic, Image is not sarcastic, Text-Image combination is not sarcastic.
{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 0} 

Here are a few examples:
"""
prompt4 = """
You are given a pair of text and image. Your task is to determine whether sarcasm is present using the following guidelines:

1. First, analyze the text alone to see if it is sarcastic.
    If text is sarcastic, then isTextSarcastic? = 1. Else, isTextSarcastic? = 0
2. Next, ignore the text and analyze the image alone to determine if it conveys sarcasm.
    If image is sarcastic, then isImageSarcastic? = 1. Else, isImageSarcastic? = 0
3. Finally, consider the text and image together to see if they reinforce each other to create an overall sarcastic impression.
    If combination is sarcastic, then isTogetherSarcastic? = 1. Else, isTogetherSarcastic? = 0

Use 1 to indicate sarcasm and 0 if no sarcasm is detected.
Here are a few examples:
"""

In [23]:
def provide_prompt() -> list[str]:
    examples = [
        (
            "Text associated with image is: strange bedfellows why is this so relatable?",
            "Sample Images/ayezy8.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}"""
        ),
        (
            "Text associated with image is: calm down 50 points for actually putting a camera in",
            "Sample Images/epymwl.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}"""
        ),
        (
            "Text associated with image is:  the cable guy installed the cable through our hula hoop that we left out. makes sense. cable companies popularity ranks somewhere between flying cockroaches and dog shit stuck to your shoe.",
            "Sample Images/cable.png",
            """{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}"""
        ),
        (
            "Text associated with image is: my husband keeps opening new jars of jam before he's even finished the last one. 1 or 2 i could understand, but at this point i would seek help",
            "Sample Images/jam.png",
            """{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}"""
        ),
        (
            "Text associated with image is: snake violently comitted suicide",
            "Sample Images/snake.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}"""
        ),
        (
            "Text associated with image is: Ahh, such a shortage of cabs in the city.",
            "Sample Images/Traffic_bounded.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}""",
        ),
        (
            "Text associated with image is: i put that shit right there, said the jesus calmly god was calling them to heaven!",
            "Sample Images/tumor.png",
            """{"isTextSarcastic?": 1, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}""",
        ),
        (
            "Text associated with image is: in a time before selfies my apologies, it hath already been snap-chatted to thine peeps milady",
            "Sample Images/oil_painting.png",
            """{"isTextSarcastic?": 1, "isImageSarcastic?": 1, "isTogetherSarcastic?": 1}""",
        ),
        (
            "Text associated with image is: had a great visit today at east texas baptist university!",
            "Sample Images/university.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 0}""",
        ),
        (
            "Text associated with image is: january 's plant of the month is lotus flowers , a symbol of how great beauty can arise from the most abject of conditions . happy saturday !",
            "Sample Images/lotus.png",
            """{"isTextSarcastic?": 0, "isImageSarcastic?": 0, "isTogetherSarcastic?": 0}""",
        )
    ]

    message = [prompt4]


    for caption, image, resp in examples:
        message = message + [
            caption, 
            PIL.Image.open(image), 
            resp
        ]

    return message

In [28]:
response = model.generate_content(
    [
        "Is this image sarcastic?",
        PIL.Image.open("Sample Images/beard.png")
    ]
)
response.text

"Yes, the image is sarcastic. The caption implies that the man's beard has moved to his chin to hear the music, which is obviously not possible. This humorous exaggeration creates a sarcastic tone. \n"

In [24]:
def detect_sarcasm(image_path: str, text: str) -> str:
    response = model.generate_content(
        provide_prompt() + [
            f"Text associated with image is: {text}",
            PIL.Image.open(image_path)
        ]
    )
    return response.text


In [30]:
def process_image_text_pairs(image_text_pairs: list[ tuple[ str, str] ]) -> list[ tuple[str,str,str] ]:
    results = []
    for image_path, text in image_text_pairs:
        result = detect_sarcasm(image_path, text)
        results.append((image_path, text, result))
    return results

In [12]:
def main():

    image_text_pairs= [
        ('Sample Images/bill.png', "helicopter ride to the hospital if they showed me the bill mid flight i would have jumped out."),
        ('Sample Images/airpod.png', "this rock looks like an airpod case it is obviously for rock music"),
        ('Sample Images/beard.png', "so the advice here is if you want to keep your hair on your head you shouldn't play guitar i think this is a compliment, friend"),
        ('Sample Images/sleep.png', "dirt nap sleep is just death being shy"),
    ]

    results = process_image_text_pairs(image_text_pairs)

    for image_path, text, result in results:
        print(f"Image: {image_path}")
        print(f"Text assciated with image: {text}")
        print("Response:")
        print(result)
        print("\n---\n")

In [13]:
main()

I0000 00:00:1723186080.942543  496525 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Image: Sample Images/bill.png
Text assciated with image: helicopter ride to the hospital if they showed me the bill mid flight i would have jumped out.
Response:
{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

---

Image: Sample Images/airpod.png
Text assciated with image: this rock looks like an airpod case it is obviously for rock music
Response:
{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

---

Image: Sample Images/beard.png
Text assciated with image: so the advice here is if you want to keep your hair on your head you shouldn't play guitar i think this is a compliment, friend
Response:
{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

---

Image: Sample Images/sleep.png
Text assciated with image: dirt nap sleep is just death being shy
Response:
{"isTextSarcastic?": 1, "isImageSarcastic?": 0, "isTogetherSarcastic?": 1}

---

