In [None]:
import json
import os

from tqdm import tqdm
from PIL import Image
import torch
import argparse
import openai
from models.ModelExperiment import ModelExperiment
from data.Benchmark import Benchmark
import concurrent.futures
from util import *
import time
%load_ext autoreload
%autoreload 2

import openai
import requests
from PIL import Image
from io import BytesIO
import random

### prompting

In [None]:
benchmark = Benchmark(with_metadata=True)
puzzles = benchmark.get_puzzles()

In [None]:
prompt_template = {
    "1": "Which word/phrase is conveyed in this image from the following options (either A, B, C, or D)?\n(A) {} (B) {} (C) {} (D) {}",
    "2": "You are given a rebus puzzle. It consists of text or icons that is used to convey a word or phrase. It needs to be solved through creative thinking. Which word/phrase is conveyed in this image from the following options (either A, B, C, or D)?\n(A) {} (B) {} (C) {} (D) {}",
    "3": "You are given an image of a rebus puzzle. It consists of text or icons that is used to convey a word or phrase. It needs to be solved through creative thinking. You are also given a description of the graph representation of the puzzle. The nodes are elements that contain text or icons, which are then manipulated through the attributes of their node. The description is as follows:\n{}\nWhich word/phrase is conveyed in this image and description from the following options (either A, B, C, or D)?\n(A) {} (B) {} (C) {} (D) {}",
    "4": "You are given an image of a rebus puzzle. It consists of text or icons that is used to convey a word or phrase. It needs to be solved through creative thinking. You are also given a description of the graph representation of the puzzle. The nodes are elements that contain text or icons, which are then manipulated through the attributes of their node. The edges define spatial relationships between these elements. The description is as follows:\n{}\nWhich word/phrase is conveyed in this image and description from the following options (either A, B, C, or D)?\n(A) {} (B) {} (C) {} (D) {}"
}

In [None]:
def generation_lopp(args):
    for index,puzzle in enumerate(puzzles):
        query_model(puzzle, index,args)
        if index == args.count:
            break

def generation(args):
    with concurrent.futures.ProcessPoolExecutor(max_workers=20) as executor:
        for index,puzzle in enumerate(puzzles):
            query_model(puzzle, index,args)
            if index == args.count:
                break

In [None]:
parser = argparse.ArgumentParser(description='Process Data.')
parser.add_argument('--prompt_type', type=str, default='1', help='dataset name')
parser.add_argument('--folder_path',type = str, default = 'result/GPT', help = 'output folder')
parser.add_argument('--count', type=int, default=100, help='number of samples')

argstring = ['--prompt_type', '3',
                '--folder_path', 'results/Gemini_Pro',
                '--count', '1008']
args = parser.parse_args(argstring)
print("Starting Generation...")
time1 = time.time()
generation(args)
time2 = time.time()
print("Generation Completed")
print(f"Generation Time: {time2-time1}")

### foward chaining

In [None]:
system_prompt = "These image is a rebus puzzle which contain more than one item. An item can be text, icon or shapes. Describe all items in the image in a json file. Each item should have a sepearte json file. The json file should have the following format: {'id': 'temperal_id', 'name' : 'name_of_the_item', 'description': 'description_of_the_item', 'relation': 'location_relation_to_other_items','specialy': 'specialy_of_the_item_if_any'}. For the specialy field should be something you found speical or different from normal items."

In [None]:
reason_template = """The image is a rebus puzzle which contain a word or phrases. Here is a json file describing the item in the image.
{}
Which word/phrase is conveyed in this image from the following options (either A, B, C, or D)? \n(A) {} (B) {} (C) {} (D) {}
Start your reasoning by mapping each json file to the word in the answer choices. Only pick the answer choice that is consistent with all the json file."""

In [None]:
with concurrent.futures.ProcessPoolExecutor(max_workers=20) as executor:
    for i,puzzle in enumerate(puzzles):
        try:
            JSON_FOLDER = 'results/GPT/json_test/'
            json_file_path = os.path.join(JSON_FOLDER, f"{i}.json")
            image_path = puzzle['image']
            ### load json file
            with open(json_file_path, 'r') as f:
                data = json.load(f)

            options = data['options']
            assert data['image'] == puzzle['image']
            prompt_options = [data['response']]+list(options.values())
            system_prompt = reason_template.format(*prompt_options)
            executor.submit(query_model_simple_json, puzzle,system_prompt,i)
        except Exception as e:
            print(e)
            print(f"Error at {i}")
