In [8]:
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
import numpy as np
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer
from pathlib import Path
import json

from urllib.parse import urlparse
import requests
import math

In [2]:
LLAVA_MODEL = "llava-hf/llava-v1.6-mistral-7b-hf"
MISTRAL_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" 

MISTRAL_DEVICE = "cuda:1"
LLAVA_DEVICE = "cuda:2"

loadedmodels = None

In [29]:
def is_url(url):
    return urlparse(url).scheme != ""

## DEFINE CAPTIONER

In [4]:
class Captioner:
    def __init__(self, llavamodel=LLAVA_MODEL, mistralmodel=MISTRAL_MODEL, llavadevice=LLAVA_DEVICE, mistraldevice=MISTRAL_DEVICE, loadedmodels=None):
        self.mistraldevice = mistraldevice
        self.llavadevice = llavadevice
        if loadedmodels is None:
            loadedmodels = self.loadmodels(llavamodel, mistralmodel, llavadevice, mistraldevice)
        self.llavaprocessor, self.llavamodel, self.mistralprocessor, self.mistralmodel = loadedmodels
        
    def get_loadedmodels(self):
        return self.llavaprocessor, self.llavamodel, self.mistralprocessor, self.mistralmodel
        
    def loadmodels(self, llavamodel, mistralmodel, llavadevice, mistraldevice):
        llavaprocessor = LlavaNextProcessor.from_pretrained(llavamodel)
        llavamodel = LlavaNextForConditionalGeneration.from_pretrained(llavamodel, torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map=llavadevice) 
        
        mistralprocessor = AutoTokenizer.from_pretrained(mistralmodel)
        mistralmodel = AutoModelForCausalLM.from_pretrained(mistralmodel, torch_dtype=torch.float16, device_map=mistraldevice)
        
        return llavaprocessor, llavamodel, mistralprocessor, mistralmodel
    
    def text_to_bits(self, text):
        numvoc = len(self.mistralprocessor.vocab)
        numbitsperword = math.ceil(math.log(numvoc) / math.log(2))
        tokens = self.mistralprocessor(text)["input_ids"][1:]
        bitstr = ""
        for token in tokens:
            tokenstr = f"{token:b}"
            tokenstr = "0"*(numbitsperword - len(tokenstr)) + tokenstr
            bitstr += tokenstr
        return bitstr, tokens
    
    def bits_to_text(self, bits):
        numvoc = len(self.mistralprocessor.vocab)
        numbitsperword = math.ceil(math.log(numvoc) / math.log(2))
        tokens = []
        while len(bits) > 0:
            wordbits = bits[:numbitsperword]
            tokenid = int(wordbits, 2)
            tokens.append(tokenid)
            bits = bits[numbitsperword:]
        text = self.mistralprocessor.decode([1] + tokens, skip_special_tokens=True)
        return text
        
    def describe_image(self, imagepath, short=False, shortlen=16, prompt=None):
        if is_url(imagepath):
            image = Image.open(requests.get(imagepath, stream=True).raw)
        else:
            image = Image.open(imagepath)

        if prompt is None:
            if short:
                prompt = f"[INST] <image> \n Can you please provide a short description of maximum {shortlen} words of the provided image? [/INST]"
            else:
                prompt = f"[INST] <image> \n Can you please describe in a sentence what you see in this image? Remember to mention all objects in the scene, as well as the background.[/INST]"
                
        processed = self.llavaprocessor(prompt, image, return_tensors="pt").to(self.llavadevice)
        # print("input: ", prompt)
        
        out = self.llavamodel.generate(**processed, max_new_tokens=100)

        output = self.llavaprocessor.decode(out[0], skip_special_tokens=True)
        # print("output: ", output)
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        return reply
    
    def shorten_description(self, text:str, length=14, prompt=None):
        # how = "a comma-separated list of keywords"
        # messages = [
        #     {"role": "user", "content": 
        #         f"Can you please rephrase the following image description as {how}, with up to {length} words. Provide just one option. Put most important keywords first. Do not put the result in quotes. The image description is as follows: {description}"}
        # ]
        if prompt is None:
            how = "a compact headline"
            prompt = f"Gimme a short caption of maximum {length} words from the following image caption. Provide just one option. Do not put the result in quotes. The image caption is as follows: \"{text}\" "
        messages = [
            {"role": "user", "content": prompt}
        ]

        model_inputs = self.mistralprocessor.apply_chat_template(messages, return_tensors="pt").to(self.mistraldevice)

        generated_ids = self.mistralmodel.generate(model_inputs, max_new_tokens=100, do_sample=True)
        output = self.mistralprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        
        # print("Output length: ", len(reply.split()))
        return reply
    
    def reshorten_description(self, text:str, length=14):      # NOT USED
        # how = "a comma-separated list of keywords"
        how = "a compact headline"
        # messages = [
        #     {"role": "user", "content": 
        #         f"Can you please rephrase the following image description as {how}, with up to {length} words. Provide just one option. Put most important keywords first. Do not put the result in quotes. The image description is as follows: {description}"}
        # ]
        messages = [
            {"role": "user", "content": 
                f"Can you please make the provided image caption slightly shorter by omitting less important details? Provide just one option. Do not put the result in quotes. The image caption is as follows: \"{description}\" "}
        ]

        model_inputs = self.mistralprocessor.apply_chat_template(messages, return_tensors="pt").to(self.mistraldevice)

        generated_ids = self.mistralmodel.generate(model_inputs, max_new_tokens=100, do_sample=True)
        output = self.mistralprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        
        # print("Output length: ", len(reply.split()))
        return reply
    
    def check_description(self, imagepath, text:str, prompt=None):
        if is_url(imagepath):
            image = Image.open(requests.get(imagepath, stream=True).raw)
        else:
            image = Image.open(imagepath)
            
        # extra = "Remember to look at the different objects and their characteristics (such as colors, shapes). "
        extra = ""

        if prompt is None:
            # prompt = f"[INST] <image> \n Does the following description describe the content of the provided image well: \"{text}\" . Answer with \"yes\" or \"no\" only. {extra}[/INST]"
            # prompt = f"[INST] <image> \n Are there any significant differences between the provided image and the following description: \"{text}\"? Answer with \"yes\" or \"no\" only. {extra}[/INST]"
            prompt = f"[INST] <image> \n Rate the similarity of the provided image with the following description: \"{text}\" . Answer only with a similarity rating between 1 (=lowest similarity) and 5 (=highest similarity). {extra}[/INST]"
        
        processed = self.llavaprocessor(prompt, image, return_tensors="pt").to(self.llavadevice)
        # print("input: ", prompt)
        
        out = self.llavamodel.generate(**processed, max_new_tokens=100)

        output = self.llavaprocessor.decode(out[0], skip_special_tokens=True)
        # print("output: ", output)
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        return reply
    
    def check_description_text(self, imagepath=None, text:str=None, imagetext=None):
        assert text is not None
        textlen = len(text.split())
        if imagepath is None:
            assert imagetext is not None
        else:
            assert imagetext is None
            imagetext = self.describe_image(imagepath, short=True, shortlen=textlen)
        
        rating = self.compare_descriptions(imagetext, text)
        return rating
    
    def compare_descriptions(self, textA, textB):
        messages = [
            {"role": "user", "content": 
                f"Are the following two image captions describing the same image or are there things that are different? First caption is \"{textA}\" and the second caption is \"{textB}\". Answer only with a similarity rating between 1 (=lowest similarity) and 5 (=highest similarity)!"}
        ]

        model_inputs = self.mistralprocessor.apply_chat_template(messages, return_tensors="pt").to(self.mistraldevice)

        generated_ids = self.mistralmodel.generate(model_inputs, max_new_tokens=100, do_sample=True)
        output = self.mistralprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        splits = output.split("[/INST]")
        if len(splits) != 2:
            print(len(splits), output)
        reply = splits[-1].strip()
        return reply
    
    def describe_and_check(self, imagepath, text:str):
        if is_url(imagepath):
            image = Image.open(requests.get(imagepath, stream=True).raw)
        else:
            image = Image.open(imagepath)
            
        # extra = "Remember to look at the different objects and their characteristics (such as colors, shapes). "
        extra = ""
            
        prompt = f"[INST] <image> \n Check and describe the differences between the provided image and the following image description: {text}. Conclude your reply with \"yes\" or \"no\" whether the provided image matches the provided description. [/INST]"
        # prompt = f"[INST] <image> \n Are there any significant differences between the provided image and the following description: \"{text}\"? Answer with \"yes\" or \"no\" only. {extra}[/INST]"
        
        processed = self.llavaprocessor(prompt, image, return_tensors="pt").to(self.llavadevice)
        # print("input: ", prompt)
        
        out = self.llavamodel.generate(**processed, max_new_tokens=100)

        output = self.llavaprocessor.decode(out[0], skip_special_tokens=True)
        # print("output: ", output)
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        return reply
    
    def explain_difference(self, imagepath, text:str):
        if is_url(imagepath):
            image = Image.open(requests.get(imagepath, stream=True).raw)
        else:
            image = Image.open(imagepath)

        # extra = "Remember to look at the different objects and their characteristics (such as colors, shapes). "
        extra = ""            
        
        prompt = f"[INST] <image> \n Does the following description describe the provided image well: \"{text}\" . Make a list of things you checked and elaborate. Conclude your reply with a clear \"yes\" or \"no\". {extra} [/INST]"
        # prompt = f"[INST] <image> \n Are there any significant differences between the provided image and the following description: \"{text}\"? Answer with \"yes\" or \"no\" and make a list of things you checked and elaborate. {extra}[/INST]"
        
        processed = self.llavaprocessor(prompt, image, return_tensors="pt").to(self.llavadevice)
        # print("input: ", prompt)
        
        out = self.llavamodel.generate(**processed, max_new_tokens=200)

        output = self.llavaprocessor.decode(out[0], skip_special_tokens=True)
        # print("output: ", output)
        
        splits = output.split("[/INST]")
        assert( len(splits) == 2)
        reply = splits[1].strip()
        return reply
    
    def check_description_explain(self, imagepath, text:str):
        diff = self.explain_difference(imagepath, text)
        
        messages = [
            {"role": "user", "content": 
                f"Please answer with \"yes\" or \"no\" whether the following paragraph says the image matches the description or not: \"{diff}\". Limit your reply to one word only: either \"Yes\" or \"No\"!"}
        ]

        model_inputs = self.mistralprocessor.apply_chat_template(messages, return_tensors="pt").to(self.mistraldevice)

        generated_ids = self.mistralmodel.generate(model_inputs, max_new_tokens=100, do_sample=True)
        output = self.mistralprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        splits = output.split("[/INST]")
        if len(splits) != 2:
            print(len(splits), output)
        reply = splits[-1].strip()
        return reply, diff
        

In [5]:
## LOAD MODELS FOR CAPTIONER

if loadedmodels is None:
    loadedmodels = Captioner().get_loadedmodels()

Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. 


model.safetensors.index.json:   0%|          | 0.00/70.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/380M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [33]:
## DEFINE COMPARISON FUNCTIONS

captioner = Captioner(loadedmodels=loadedmodels)


def compare_images(impath1, impath2):
    description = captioner.describe_image(impath1)
    short_description = captioner.shorten_description(description)
    
    checked_short = captioner.check_description(impath2, short_description)
    diff = captioner.explain_difference(impath2, short_description)
    return checked_short, diff

def compare_folder(folder="testimages"):
    descriptions = {}
    shortdescriptions = {}
    for impath in Path(folder).glob("*"):
        impath = str(impath)
        description = captioner.describe_image(impath)
        short_description = captioner.shorten_description(description)
        descriptions[impath] = description
        shortdescriptions[impath] = short_description
        
    # print(shortdescriptions)
    cnt = 0
    
    matrix = {}    
    for impath1 in list(Path(folder).glob("*")):
        impath1 = str(impath1)
        matrix[impath1] = {}
        shortdesc1 = shortdescriptions[impath1]
        for impath2 in list(Path(folder).glob("*")):
            impath2 = str(impath2)
            same = captioner.check_description(impath2, shortdesc1)
            matrix[impath1][impath2] = same
            
        print(f"{cnt}", matrix[impath1])
        cnt += 1
        
    return matrix, descriptions, shortdescriptions

def compare_folder_explain(folder="testimages"):
    descriptions = {}
    shortdescriptions = {}
    for impath in Path(folder).glob("*"):
        impath = str(impath)
        description = captioner.describe_image(impath)
        short_description = captioner.shorten_description(description)
        descriptions[impath] = description
        shortdescriptions[impath] = short_description
        
    print(shortdescriptions)

    matrix = {}   
    diffmatrix = {} 
    for impath1 in list(Path(folder).glob("*")):
        impath1 = str(impath1)
        matrix[impath1] = {}
        diffmatrix[impath1] = {}
        shortdesc1 = shortdescriptions[impath1]
        for impath2 in list(Path(folder).glob("*")):
            impath2 = str(impath2)
            same, diff = captioner.check_description_explain(impath2, shortdesc1)
            matrix[impath1][impath2] = same
            diffmatrix[impath1][impath2] = diff
            
        print(matrix[impath1])
        
    return matrix, diffmatrix, descriptions, shortdescriptions

def compare_folder_text(folder="testimages"):
    descriptions = {}
    shortdescriptions = {}
    shortdescriptions2 = {}
    for impath in Path(folder).glob("*"):
        impath = str(impath)
        description = captioner.describe_image(impath)
        short_description = captioner.shorten_description(description)
        short_description2 = captioner.describe_image(impath, short=True)
        descriptions[impath] = description
        shortdescriptions[impath] = short_description
        shortdescriptions2[impath] = short_description2
        
    # print(shortdescriptions)
    matrix = {}    
    for impath1 in list(Path(folder).glob("*")):
        impath1 = str(impath1)
        matrix[impath1] = {}
        shortdesc1 = shortdescriptions[impath1]
        for impath2 in list(Path(folder).glob("*")):
            impath2 = str(impath2)
            shortdesc2 = shortdescriptions2[impath2]
            same = captioner.check_description_text(text=shortdesc1, imagetext=shortdesc2)
            matrix[impath1][impath2] = same
            
        print(matrix[impath1])
        
    return matrix, descriptions, shortdescriptions, shortdescriptions2

def print_matrix(matrix):
    for k in matrix:
        print("Recognize self: ", k, matrix[k][k])
        others = []
        for kk, v in matrix[k].items():
            try:
                if int(v) > 3 and kk != k:
                    others.append(f"{kk}: {v}")
            except Exception as e:
                pass
        print("Others: ", others)



255 000010001000110000001000100001000101010111010000000100101001000110000000011001010001000100111000000110101110000010011101000000110110110000000110011101000101010101000000000100101101100110110110111000000110001011001001110110101000001000011101111000000110011
['▁An', 'ime', '▁girl', '▁in', '▁white', '▁dress', ',', '▁smiles', '▁at', '▁E', 'iff', 'el', '▁Tower', '▁with', '▁blue', '▁can', '.']
Anime girl in white dress, smiles at Eiffel Tower with blue can.


In [11]:
# RUN COMPARISON ON A FOLDER

matrix, desc, shortdesc = compare_folder("testimages")
print_matrix(matrix)

You may have used the wrong order for inputs. `images` should be passed before `text`. The `images` and `text` inputs will be swapped. This behavior will be deprecated in transformers v4.47.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Expanding inputs for image tokens in LLaVa-NeXT should be done in processing. Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input bec

{'testimages/edgerunners.png': 'Two characters in dark room with neon lights, man wears orange jacket and sunglasses, woman in black and white outfit, man holds pink device.', 'testimages/emmanime.png': 'Anime girl in white dress, blue can, Eiffel Tower: Smiling at viewer.', 'testimages/montstmichel.jpg': 'Majestic castle overlooking serene lake under starry night sky.', 'testimages/castle.jpg': 'Ancient castle on hillside, serene lake, majestic mountain range, partly cloudy sky.', 'testimages/hut.png': 'Cozy cottage in lush forest with lit path and towering trees.', 'testimages/emma2.png': 'Woman in red holding yogurt at Eiffel Tower.', 'testimages/vangogh.jpg': 'Man in coat, beard and mustache against blue-orange dots background.', 'testimages/team.jpg': 'Smiling group poses at restaurant table, wine glasses, cups, pizza box, brick wall, shelf with bottles.', 'testimages/shrooms.jpg': 'Mushrooms of various colors and sizes against a dark backdrop.', 'testimages/paperman1.png': 'Woman

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '5', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '5', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '5', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '5'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '3', 'testimages/castle.jpg': '5', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '5'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '5', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '5', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '5', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '5', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '5', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '5', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '5', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '5', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '5', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '3', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '5', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '5', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '5', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '5', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '5', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '1', 'testimages/castle.jpg': '1', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '5', 'testimages/castlebridge.jpg': '1'}


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin

{'testimages/edgerunners.png': '1', 'testimages/emmanime.png': '1', 'testimages/montstmichel.jpg': '5', 'testimages/castle.jpg': '5', 'testimages/hut.png': '1', 'testimages/emma2.png': '1', 'testimages/vangogh.jpg': '1', 'testimages/team.jpg': '1', 'testimages/shrooms.jpg': '1', 'testimages/paperman1.png': '1', 'testimages/midway.jpg': '1', 'testimages/emma1.png': '1', 'testimages/horse.png': '1', 'testimages/water.jpg': '1', 'testimages/nightmare.png': '1', 'testimages/canyon.jpg': '1', 'testimages/monet.jpg': '1', 'testimages/atat.png': '1', 'testimages/trump.jpg': '1', 'testimages/paperman2.png': '1', 'testimages/car.jpg': '1', 'testimages/castlebridge.jpg': '5'}
Recognize self:  testimages/edgerunners.png 5
Others:  []
Recognize self:  testimages/emmanime.png 5
Others:  []
Recognize self:  testimages/montstmichel.jpg 5
Others:  ['testimages/castlebridge.jpg: 5']
Recognize self:  testimages/castle.jpg 5
Others:  ['testimages/castlebridge.jpg: 5']
Recognize self:  testimages/hut.png 

## EVALUATION METRICS FOR THE FOLDER MATRIX

In [32]:
# how many of images predicted as matching are actually matching (on diagonal)?
def precision(m, minlevel=1, maxlevel=5):   # diagonal_dominance
    m = (m - minlevel) / (maxlevel - minlevel)     # normalize from 1-5 to 0-1
    return m.diag().abs().sum() / m.abs().sum()


# like precision, but with norm
def frobenius_diagonal_ratio(m, minlevel=1, maxlevel=5):
    m = (m - minlevel) / (maxlevel - minlevel)     # normalize from 1-5 to 0-1
    diag_norm = m.diag().norm()
    total_norm = m.norm()
    return diag_norm/total_norm


# how many of the diagonal entries are classified as matching (should be all of them)
def diag_acc(m, minlevel=1, maxlevel=5):
    m = (m - minlevel) / (maxlevel - minlevel)     # normalize from 1-5 to 0-1
    return m.diag().mean()


def evaluate_matrix(matrix):
    """ matrix should ideally be diagonal (5 on the diagonal, 1 everywhere else) """
    name_to_position = {k: v for k, v in zip(matrix.keys(), range(len(matrix.keys())))}
    m = torch.zeros(max(name_to_position.values())+1, max(name_to_position.values())+1)
    for fr, row in matrix.items():
        for to, score in row.items():
            m[name_to_position[fr], name_to_position[to]] = int(score)

    ret = {
        "precision": precision(m),
        "diag_acc": diag_acc(m),
        "frob_diag_ratio": frobenius_diagonal_ratio(m)
    }
    return ret


m = evaluate_matrix(matrix)
print(m)


{'precision': tensor(0.8000), 'diag_acc': tensor(0.9091), 'frob_diag_ratio': tensor(0.9035)}


## DONE

In [None]:



rate, diff = compare_images("testimages2/redforest.png", "testimages2/redforest2.png")
print(rate)
print(diff)

In [16]:
matrix

{'testimages/edgerunners.png': {'testimages/edgerunners.png': '5',
  'testimages/emmanime.png': '1',
  'testimages/montstmichel.jpg': '1',
  'testimages/castle.jpg': '1',
  'testimages/hut.png': '1',
  'testimages/emma2.png': '1',
  'testimages/vangogh.jpg': '1',
  'testimages/team.jpg': '1',
  'testimages/shrooms.jpg': '1',
  'testimages/paperman1.png': '1',
  'testimages/midway.jpg': '1',
  'testimages/emma1.png': '1',
  'testimages/horse.png': '1',
  'testimages/water.jpg': '1',
  'testimages/nightmare.png': '1',
  'testimages/canyon.jpg': '1',
  'testimages/monet.jpg': '1',
  'testimages/atat.png': '1',
  'testimages/trump.jpg': '1',
  'testimages/paperman2.png': '1',
  'testimages/car.jpg': '1',
  'testimages/castlebridge.jpg': '1'},
 'testimages/emmanime.png': {'testimages/edgerunners.png': '1',
  'testimages/emmanime.png': '5',
  'testimages/montstmichel.jpg': '1',
  'testimages/castle.jpg': '1',
  'testimages/hut.png': '1',
  'testimages/emma2.png': '1',
  'testimages/vangogh.j

In [14]:
for k, v in shortdesc.items():
    print(k, len(v.split()), v)

testimages/edgerunners.png 24 Two characters in dark room with neon lights, man wears orange jacket and sunglasses, woman in black and white outfit, man holds pink device.
testimages/emmanime.png 12 Anime girl in white dress, blue can, Eiffel Tower: Smiling at viewer.
testimages/montstmichel.jpg 9 Majestic castle overlooking serene lake under starry night sky.
testimages/castle.jpg 12 Ancient castle on hillside, serene lake, majestic mountain range, partly cloudy sky.
testimages/hut.png 11 Cozy cottage in lush forest with lit path and towering trees.
testimages/emma2.png 8 Woman in red holding yogurt at Eiffel Tower.
testimages/vangogh.jpg 10 Man in coat, beard and mustache against blue-orange dots background.
testimages/team.jpg 16 Smiling group poses at restaurant table, wine glasses, cups, pizza box, brick wall, shelf with bottles.
testimages/shrooms.jpg 10 Mushrooms of various colors and sizes against a dark backdrop.
testimages/paperman1.png 7 Woman in room holds red-marked paper 