### NOUN Dataset + BLIP-2 Multimodal Model Pipeline
#### This notebook contains the pipeline for loading the BLIP2 Opt-2.7b model and running inference on the NOUN Dataset

Note that for this pipeline it is recommended to use a GPU with sufficient RAM.

##### Imports
Import modules, requires the installation of bitsandbytes and accelerate

In [None]:
%pip install bitsandbytes accelerate Pillow git+https://github.com/huggingface/transformers tqdm

In [1]:
import csv
from PIL import Image
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


##### Load model
uses bitsandbytes to allow int8 quanitization for greatly reduced memory usage, allowing the model to be run on Google Colab.

In [2]:
# load processor
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")

# load in float16 # load in int8
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b",
                                                      load_in_8bit=True, device_map="auto")
# setup device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"




Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin\cudart64_110.dll
CUDA SETUP: Highest compute capability among GPUs detected: 5.2
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary c:\Users\Juell\AppData\Local\Programs\Python\Python39\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117_nocublaslt.dll...


  warn(msg)
  warn(msg)
Loading checkpoint shards: 100%|██████████| 2/2 [00:55<00:00, 27.80s/it]


##### Perform inference on NOUN Dataset
Currently uses default hyperparameters

In [None]:
import pandas as pd
from tqdm import tqdm
import pandas as pd
from evaluate import check_colors_and_textures

# Define path to input and output files
input_file = 'data/datasets/dataset_full.csv'
output_file = 'data/datasets/dataset_all_questions.csv'


# Define question for checking textures (unused for now)
QUESTION_1 = "Q: Only answer with yes or no, do you recognize this object? \n A:"
QUESTION_2 = "Q: what do you call the object in this image? \n A:"
QUESTION_3 = "Q: What do you really think this is? \n A:"

# Load data from input file into a pandas DataFrame
data = pd.read_csv(output_file)

tqdm.pandas()

# https://github.com/huggingface/transformers/issues/22146
# the above link contains more information on param tweaking
# beam search:
# model.generate(**inputs, num_beams=5, max_new_tokens=30, repetition_penalty=1.0, length_penalty=1.0, temperature=1)
# nucleus sampling:
# model.generate(**inputs, do_sample=True, top_p=0.9)
# TODO: research how beam search and nucleus sampling work and what other params can be changed

# Define function to generate text using the model


def generate_text(row, decode='greedy'):
    raw_image = Image.open(row[0].replace("\\", "/")).convert("RGB")
    inputs1 = processor(raw_image, return_tensors="pt").to(
        DEVICE, torch.float16)
    inputs2 = processor(raw_image, text=QUESTION_1,
                        return_tensors="pt").to(DEVICE, torch.float16)
    inputs3 = processor(raw_image, text=QUESTION_2,
                        return_tensors="pt").to(DEVICE, torch.float16)
    inputs4 = processor(raw_image, text=QUESTION_3,
                        return_tensors="pt").to(DEVICE, torch.float16)

    if decode == 'greedy':
        # generated_ids1 = model.generate(**inputs1, max_new_tokens=20)
        # generated_ids2 = model.generate(**inputs2, max_new_tokens=20)
        generated_ids3 = model.generate(**inputs3, max_new_tokens=20)
        # generated_ids4 = model.generate(**inputs4, max_new_tokens=20)
    elif decode == 'nucleus':
        generated_ids1 = model.generate(
            **inputs1, do_sample=True, top_p=0.9, max_new_tokens=20)
        generated_ids2 = model.generate(
            **inputs2, do_sample=True, top_p=0.9, max_new_tokens=20)
        generated_ids3 = model.generate(
            **inputs3, do_sample=True, top_p=0.9, max_new_tokens=20)
        generated_ids4 = model.generate(
            **inputs4, do_sample=True, top_p=0.9, max_new_tokens=20)
    elif decode == 'beam':
        #generated_ids1 = model.generate(
            #**inputs1, num_beams=5, max_new_tokens=20, repetition_penalty=1.0, length_penalty=1.0, temperature=1)
        #gen#erated_ids2 = model.generate(
            #**inputs2, num_beams=5, max_new_tokens=20, repetition_penalty=1.0, length_penalty=1.0, temperature=1)
        generated_ids3 = model.generate(
            **inputs3, num_beams=5, max_new_tokens=20, repetition_penalty=1.0, length_penalty=1.0, temperature=1)
        #generated_ids4 = model.generate(
            #**inputs4, num_beams=5, max_new_tokens=20, repetition_penalty=1.0, length_penalty=1.0, temperature=1)

    #generated_text1 = processor.batch_decode(
        #generated_ids1, skip_special_tokens=True)[0].strip()
    #generated_text2 = processor.batch_decode(
        #generated_ids2, skip_special_tokens=True)[0].strip()
    generated_text3 = processor.batch_decode(
        generated_ids3, skip_special_tokens=True)[0].strip()
    # generated_text4 = processor.batch_decode(
    #     generated_ids4, skip_special_tokens=True)[0].strip()
    #match = check_colors_and_textures(generated_text1)

    #print(f"{row[0]} has generated: {generated_text1}")
    return generated_text3 #, generated_text2, generated_text3, generated_text4, match


# Add new columns with generated text using the apply() method and a lambda function
#data['BLIP-2, greedy, name'] = zip(*data.progress_apply(lambda row: generate_text(row, decode='greedy'), axis=1))

# data['BLIP-2, nucleus, caption'], data['BLIP-2, nucleus, bool'], data['BLIP-2, OPT-2.7b nucleus, name'], data[
#     'BLIP-2, nucleus, real'], data['BLIP-2,nucleus, color and textures'] = zip(*data.progress_apply(lambda row: generate_text(row, decode='nucleus'), axis=1))

data['BLIP-2, beam, name'] = zip(*data.progress_apply(lambda row: generate_text(row, decode='beam'), axis=1))

# Write updated data to output file
# data.to_csv(output_file, index=False)

In [None]:
data = pd.read_csv(output_file)
display(data)

In [None]:
display(data)

##### Display dataset

In [14]:
import glob
import random
import base64
import pandas as pd

from PIL import Image
from io import BytesIO
from IPython.display import HTML


def get_thumbnail(path):
    i = Image.open(path)
    i.thumbnail((150, 150), Image.LANCZOS)
    return i

def image_base64(im):
    if isinstance(im, str):
        im = get_thumbnail(im)
    with BytesIO() as buffer:
        im.save(buffer, 'jpeg')
        return base64.b64encode(buffer.getvalue()).decode()

def image_formatter(im):
    return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'

In [15]:
import re
import pandas as pd

data = pd.read_csv('data/datasets/dataset_all_questions.csv')
data.rename(columns={'image_path': 'image'}, inplace=True)
data['image'] = data.image.map(lambda f: get_thumbnail(f))
data['BLIP-2, greedy, color and textures'] = data['BLIP-2, greedy, color and textures'].apply(lambda x: re.sub(r'[^\w]', ' ', x))
data['BLIP-2, nucleus, color and textures'] = data['BLIP-2, nucleus, color and textures'].apply(lambda x: re.sub(r'[^\w]', ' ', x))
data['BLIP-2, beam, color and textures'] = data['BLIP-2, beam, color and textures'].apply(lambda x: re.sub(r'[^\w]', ' ', x))

In [36]:
html_content = data2.to_html(formatters={'image': image_formatter}, escape=False)

with open('data/datasets/sorted_results.html', 'w') as file:
    file.write(html_content)

##### Evaluate model results

In [16]:
display(data)

Unnamed: 0,image,number label,actual name,familiarity score,nameability score,color saliency,texture saliency,"BLIP-2, greedy, caption","BLIP-2, greedy, bool","BLIP-2, greedy, name",...,"BLIP-2, greedy, color and textures","BLIP-2, nucleus, caption","BLIP-2, nucleus, bool","BLIP-2, nucleus, name","BLIP-2, nucleus, real","BLIP-2, nucleus, color and textures","BLIP-2, beam, caption","BLIP-2, beam, bool","BLIP-2, beam, real","BLIP-2, beam, color and textures"
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2001,bee have trap,19,50,66,14,a pair of orange plastic toys on a white backg...,"Yes, it is a plastic egg",the object in this image is an orange plastic ...,...,Orange Plastic,puzzle puzzle toy for puppies,1 x plastic yellow egg - yes,a toilet paper roll,"I think it's a big, old, shiny gold plated cop...",,a pair of orange plastic toys on a white backg...,"No, I do not recognize this object.",This is an orange plastic ball with a hole in ...,Orange Plastic
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2002,bookend,22,83,59,9,a red metal sculpture with a curved shape,"Yes, I recognize this object.",a red vase,...,Red,a small red sculpture with a curved shape,Yes,a vase,It's an empty vase.,Red,a red metal sculpture on a white background,Unknown,It's a red vase.,Red
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2003,fidget toy,59,74,19,0,a set of colorful wooden blocks arranged in a ...,"Yes, it is a wooden block",a stack of multi-colored blocks on a white bac...,...,Wooden,a set of wooden blocks made up of different co...,Yes ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ,the letter k ik-pok-ka-shab,A picture of a line of letters,Red Wooden,a set of colorful wooden blocks on a white bac...,"Yes, I recognize this object.",This is a stack of wooden blocks.,Wooden
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2004,pencil sharpener,41,70,48,52,a yellow and orange plastic egg shaped object,"Yes, it is a plastic ball with a hole in the m...",the object in this image is an orange and yell...,...,Orange Yellow Plastic,small okey egg orange yellow,"No, this is not a phone, it is a phone with th...",a yellow and orange object,An orange rubber ball that has been polished b...,Orange Yellow,an orange and yellow plastic ball with a hole ...,Unknown,This is an orange and yellow plastic ball with...,Orange Yellow Plastic
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2005,fish tank stone,6,25,81,13,a pink and blue tie dye headband,"Yes, I recognize this object.",a rainbow tie-dye tie-dye tie-dye tie-dye tie-,...,Blue Pink,a pink and yellow tie dye unicorn head,this is a feather headband,rainbow unicorn head,Pink unicorn horn,Yellow Pink,a pink and blue tie dye headband sitting on to...,Unknown,It's a tie-dye headband.,Blue Pink White
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2060,dog toy with removed rope,38,32,55,29,a red plastic toy with four balls inside,"Yes, it is a red jelly bean",a red ball with a bunch of small balls inside ...,...,Red Plastic,a large red heart shaped toy is placed on a wh...,Red ball shaped object with six balls on top,a red flower,A hot red bottle of water.,Red White Lace,a close up of a red plastic toy that is shaped...,"Yes, I recognize this object.",It’s a red jelly bean.,Red Plastic
60,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2061,dog toy,53,53,59,13,a green donut with colorful dots on it,Yes,a donut,...,Green,a toy of a donut made with green colored rubber,"the ring has 6 balls, 3 blue balls and 3 red b...",a rubber donut,A green donut,Red Green Rubber,a green dog toy with colorful dots on it,"Yes, I recognize this object.",It's a donut shaped like a virus.,Green
61,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2062,dog toy,44,56,29,42,an orange plastic ring with spikes on it,"Yes, it is a rubber ball with spikes",an orange rubber ring with spikes on it,...,Orange Plastic,an orange plastic toy is shown on a white back...,This is an animal toy with rubber toys,The 'Orange Spiky Dog Collar' by QwikClip.com,A rubber tire shaped item.,Orange Plastic,an orange rubber ring on a white background,Unknown,This is an orange rubber ring with spikes on it,Orange Rubber
62,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2063,magic loops,25,38,13,19,a metal sculpture with colorful beads on it,"Yes, it is a planet",a solar system.,...,,kiddos magnet system,this is the universe,a planet,The sun.,,a metal sculpture of a globe with multi colore...,"No, I do not recognize this object.",I think it's a planet.,Red


In [None]:
import pandas as pd
from tqdm import tqdm
from evaluate import check_colors_and_textures

# Load dataset into DataFrame
df = pd.read_csv('dataset.csv')

# Add new column name to header row
df.rename(columns={df.columns[-1]: 'BLIP-2, OPT-2.7b evaluation: color and texture'}, inplace=True)

# Add new column data to remaining rows
for i, row in tqdm(df.iterrows(), total=len(df)):
    if pd.notnull(row.iloc[-2]):
        colors, textures = check_colors_and_textures(row.iloc[-2])
        colors = ", ".join(colors) if len(colors) > 0 else None
        textures = ", ".join(textures) if len(textures) > 0 else None
        df.at[i, 'BLIP-2, OPT-2.7b evaluation: color and texture'] = f"{colors}; {textures}"

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199
display(df)

##### Create visualization

In [17]:
import evaluate
import importlib
importlib.reload(evaluate)
from evaluate import colors_to_boolean, textures_to_boolean

data['color greedy'] = data['BLIP-2, greedy, color and textures'].apply(lambda x: colors_to_boolean(x))
data['color nucleus'] = data['BLIP-2, nucleus, color and textures'].apply(lambda x: colors_to_boolean(x))
data['color beam'] = data['BLIP-2, beam, color and textures'].apply(lambda x: colors_to_boolean(x))

data['texture greedy'] = data['BLIP-2, greedy, color and textures'].apply(lambda x: textures_to_boolean(x))
data['texture nucleus'] = data['BLIP-2, nucleus, color and textures'].apply(lambda x: textures_to_boolean(x))
data['texture beam'] = data['BLIP-2, beam, color and textures'].apply(lambda x: textures_to_boolean(x))

get_token_count = lambda text: len(text.split())

data['length greedy'] = data['BLIP-2, greedy, caption'].apply(get_token_count)
data['length nucleus'] = data['BLIP-2, nucleus, caption'].apply(get_token_count)
data['length beam'] = data['BLIP-2, beam, caption'].apply(get_token_count)

In [32]:
column_names = data.columns.tolist()
column_names
order = ['image', 'number label', 'actual name', 'familiarity score', 'nameability score', 'color saliency', 'texture saliency', 'BLIP-2, greedy, caption', 'BLIP-2, beam, caption', 'BLIP-2, nucleus, caption', 'BLIP-2, greedy, bool', 'BLIP-2, beam, bool', 'BLIP-2, nucleus, bool', 'BLIP-2, greedy, name', 'BLIP-2, nucleus, name',
         'BLIP-2, greedy, real', 'BLIP-2, beam, real', 'BLIP-2, nucleus, real', 'BLIP-2, greedy, color and textures', 'BLIP-2, nucleus, color and textures', 'BLIP-2, beam, color and textures', 'color greedy', 'color nucleus', 'color beam', 'texture greedy', 'texture nucleus', 'texture beam', 'length greedy', 'length nucleus', 'length beam']


print(len(order))
print(len(column_names))
print(column_names)
data2 = data.reindex(columns=order)

30
30
['image', 'number label', 'actual name', 'familiarity score', 'nameability score', 'color saliency', 'texture saliency', 'BLIP-2, greedy, caption', 'BLIP-2, greedy, bool', 'BLIP-2, greedy, name', 'BLIP-2, greedy, real', 'BLIP-2, greedy, color and textures', 'BLIP-2, nucleus, caption', 'BLIP-2, nucleus, bool', 'BLIP-2, nucleus, name', 'BLIP-2, nucleus, real', 'BLIP-2, nucleus, color and textures', 'BLIP-2, beam, caption', 'BLIP-2, beam, bool', 'BLIP-2, beam, real', 'BLIP-2, beam, color and textures', 'color greedy', 'color nucleus', 'color beam', 'texture greedy', 'texture nucleus', 'texture beam', 'length greedy', 'length nucleus', 'length beam']


In [35]:
test = 'data/datasets/dataset_full_sorted.csv'
data2.to_csv(test, index=False)

In [34]:
display(data2)

Unnamed: 0,image,number label,actual name,familiarity score,nameability score,color saliency,texture saliency,"BLIP-2, greedy, caption","BLIP-2, beam, caption","BLIP-2, nucleus, caption",...,"BLIP-2, beam, color and textures",color greedy,color nucleus,color beam,texture greedy,texture nucleus,texture beam,length greedy,length nucleus,length beam
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2001,bee have trap,19,50,66,14,a pair of orange plastic toys on a white backg...,a pair of orange plastic toys on a white backg...,puzzle puzzle toy for puppies,...,Orange Plastic,True,False,True,True,False,True,10,5,10
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2002,bookend,22,83,59,9,a red metal sculpture with a curved shape,a red metal sculpture on a white background,a small red sculpture with a curved shape,...,Red,True,True,True,False,False,False,8,8,8
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2003,fidget toy,59,74,19,0,a set of colorful wooden blocks arranged in a ...,a set of colorful wooden blocks on a white bac...,a set of wooden blocks made up of different co...,...,Wooden,False,True,False,True,True,True,10,11,10
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2004,pencil sharpener,41,70,48,52,a yellow and orange plastic egg shaped object,an orange and yellow plastic ball with a hole ...,small okey egg orange yellow,...,Orange Yellow Plastic,True,True,True,True,False,True,8,5,12
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2005,fish tank stone,6,25,81,13,a pink and blue tie dye headband,a pink and blue tie dye headband sitting on to...,a pink and yellow tie dye unicorn head,...,Blue Pink White,True,True,True,False,False,False,7,8,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2060,dog toy with removed rope,38,32,55,29,a red plastic toy with four balls inside,a close up of a red plastic toy that is shaped...,a large red heart shaped toy is placed on a wh...,...,Red Plastic,True,True,True,True,True,True,8,12,14
60,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2061,dog toy,53,53,59,13,a green donut with colorful dots on it,a green dog toy with colorful dots on it,a toy of a donut made with green colored rubber,...,Green,True,True,True,False,True,False,8,10,9
61,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2062,dog toy,44,56,29,42,an orange plastic ring with spikes on it,an orange rubber ring on a white background,an orange plastic toy is shown on a white back...,...,Orange Rubber,True,True,True,True,True,True,8,10,8
62,<PIL.JpegImagePlugin.JpegImageFile image mode=...,2063,magic loops,25,38,13,19,a metal sculpture with colorful beads on it,a metal sculpture of a globe with multi colore...,kiddos magnet system,...,Red,False,False,True,False,False,False,8,3,12


In [7]:
%pip install plotly

Defaulting to user installation because normal site-packages is not writeable
Collecting plotly
  Downloading plotly-5.14.1-py2.py3-none-any.whl (15.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.3/15.3 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tenacity>=6.2.0 (from plotly)
  Downloading tenacity-8.2.2-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.14.1 tenacity-8.2.2
Note: you may need to restart the kernel to use updated packages.


In [10]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import json

# Assuming you have a DataFrame called 'data' with columns 'color saliency', 'color beam', and 'Model'

# Determine the category bins based on the range of values in the 'color saliency' column
num_bins = 5
category_bins_texture = pd.cut(data['texture saliency'], bins=num_bins)
category_bins_color = pd.cut(data['color saliency'], bins=num_bins)


# Create a new column in the DataFrame to represent the category bins
data['Category Bin Color'] = category_bins_color.astype(str)
data['Category Bin Texture'] = category_bins_texture.astype(str)

# Group the data by the category bins, boolean values, and model, and calculate the counts for each group

# color
color_nucleus = data.groupby(['Category Bin Color', 'color nucleus']).size().unstack().reset_index().fillna(0)
color_greedy = data.groupby(['Category Bin Color', 'color greedy']).size().unstack().reset_index().fillna(0)
color_beam = data.groupby(['Category Bin Color', 'color beam']).size().unstack().reset_index().fillna(0)

print(color_beam)

color_nucleus['Percentage'] = color_nucleus.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
color_greedy['Percentage'] = color_greedy.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
color_beam['Percentage'] = color_beam.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)


# texture
texture_nucleus = data.groupby(['Category Bin Texture', 'texture nucleus']).size().unstack().reset_index().fillna(0)
texture_greedy = data.groupby(['Category Bin Texture', 'texture greedy']).size().unstack().reset_index().fillna(0)
texture_beam = data.groupby(['Category Bin Texture', 'texture beam']).size().unstack().reset_index().fillna(0)


texture_nucleus['Percentage'] = texture_nucleus.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
texture_greedy['Percentage'] = texture_greedy.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
texture_beam['Percentage'] = texture_beam.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)


# Create the grouped bar chart
fig = go.Figure(layout_yaxis_range=[0,1])


bar_trace_nucleus = go.Bar(
    x=color_nucleus['Category Bin Color'],
    y=color_nucleus['Percentage'],
    name=f'Nucleus Sampling',
)

bar_trace_greedy = go.Bar(
    x=color_nucleus['Category Bin Color'],
    y=color_greedy['Percentage'],
    name=f'Greedy',

)

bar_trace_beam = go.Bar(
    x=color_nucleus['Category Bin Color'],
    y=color_beam['Percentage'],
    name=f'Beam Search',

)

fig.add_trace(bar_trace_nucleus)
fig.add_trace(bar_trace_greedy)
fig.add_trace(bar_trace_beam)

# Create the layout for the grouped bar chart
fig.update_layout(
    width=1000,
    title='Color term usage rate in % per saliency bin',
    xaxis=dict(title='Color saliency in %', tickmode = 'array',
               tickvals = [0, 1, 2, 3, 4],
        ticktext = ['0-20', '21-40', '41-60', '61-80', '81-100']),
    yaxis=dict(title='Rate of using color terms'),
    barmode='group'
)

# Show the plot
fig.show()

# # Save the plot as JSON
fig_json = fig.to_json()
with open('grouped_texture.json', 'w') as file:
    file.write(fig_json)


color beam Category Bin Color  False  True
0               (2.909, 21.2]    2.0   2.0
1                (21.2, 39.4]    3.0  11.0
2                (39.4, 57.6]    2.0  19.0
3                (57.6, 75.8]    2.0  15.0
4                (75.8, 94.0]    0.0   8.0


In [13]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import json

# Assuming you have a DataFrame called 'data' with columns 'color saliency', 'color beam', and 'Model'

# Determine the category bins based on the range of values in the 'color saliency' column
num_bins = 5
category_bins_texture = pd.cut(data['texture saliency'], bins=num_bins)
category_bins_color = pd.cut(data['color saliency'], bins=num_bins)


# Create a new column in the DataFrame to represent the category bins
data['Category Bin Color'] = category_bins_color.astype(str)
data['Category Bin Texture'] = category_bins_texture.astype(str)

# Group the data by the category bins, boolean values, and model, and calculate the counts for each group

# color
color_nucleus = data.groupby(['Category Bin Color', 'color nucleus']).size().unstack().reset_index().fillna(0)
color_greedy = data.groupby(['Category Bin Color', 'color greedy']).size().unstack().reset_index().fillna(0)
color_beam = data.groupby(['Category Bin Color', 'color beam']).size().unstack().reset_index().fillna(0)

print(color_beam)

color_nucleus['Percentage'] = color_nucleus.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
color_greedy['Percentage'] = color_greedy.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
color_beam['Percentage'] = color_beam.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)


# texture
texture_nucleus = data.groupby(['Category Bin Texture', 'texture nucleus']).size().unstack().reset_index().fillna(0)
texture_greedy = data.groupby(['Category Bin Texture', 'texture greedy']).size().unstack().reset_index().fillna(0)
texture_beam = data.groupby(['Category Bin Texture', 'texture beam']).size().unstack().reset_index().fillna(0)


texture_nucleus['Percentage'] = texture_nucleus.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
texture_greedy['Percentage'] = texture_greedy.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)
texture_beam['Percentage'] = texture_beam.apply(lambda row: row[True] / (row[True] + row[False]), axis=1)


# Create the grouped bar chart
fig = go.Figure(layout_yaxis_range=[0,1])


bar_trace_nucleus = go.Bar(
    x=texture_nucleus['Category Bin Texture'],
    y=texture_nucleus['Percentage'],
    name=f'Nucleus Sampling',
)

bar_trace_greedy = go.Bar(
    x=texture_nucleus['Category Bin Texture'],
    y=texture_greedy['Percentage'],
    name=f'Greedy',

)

bar_trace_beam = go.Bar(
    x=texture_nucleus['Category Bin Texture'],
    y=texture_beam['Percentage'],
    name=f'Beam Search',

)

fig.add_trace(bar_trace_nucleus)
fig.add_trace(bar_trace_greedy)
fig.add_trace(bar_trace_beam)

# Create the layout for the grouped bar chart
fig.update_layout(
    width=1000,
    title='Texture term usage rate in % per saliency bin',
    xaxis=dict(title='Texture saliency in %', tickmode = 'array',
               tickvals = [0, 1, 2, 3, 4],
        ticktext = ['0-20', '21-40', '41-60', '61-80', '81-100']),
    yaxis=dict(title='Rate of using texture terms'),
    barmode='group'
)

# Show the plot
fig.show()

# # Save the plot as JSON
fig_json = fig.to_json()
with open('grouped_texture2.json', 'w') as file:
    json.dump(fig_json, file, ensure_ascii=True)


color beam Category Bin Color  False  True
0               (2.909, 21.2]    2.0   2.0
1                (21.2, 39.4]    3.0  11.0
2                (39.4, 57.6]    2.0  19.0
3                (57.6, 75.8]    2.0  15.0
4                (75.8, 94.0]    0.0   8.0


In [60]:
import plotly.graph_objects as go
import pandas as pd

# Assuming your dataframe is called 'df'
# You can load your data into a dataframe using pd.read_csv() or any other method

# Create a list of row indices to use as labels for x-axis
labels = data['number label'].tolist()

# Create a figure object
fig = go.Figure()

# Add the bars for each length column
fig.add_trace(go.Bar(x=labels, y=data['length greedy'], name='Length Greedy'))
fig.add_trace(go.Bar(x=labels, y=data['length nucleus'], name='Length Nucleus'))
fig.add_trace(go.Bar(x=labels, y=data['length beam'], name='Length Beam'))

# Update the layout
fig.update_layout(
    width=1000,
    title='Grouped Bar Chart of token lengths',
    xaxis_title='Object label',
    yaxis_title='Token length',
    barmode='group',  # Set the bar mode to 'group' for grouped bars
    bargap=0.1,  # Adjust the spacing between bars within each group
    bargroupgap=0.3  # Adjust the spacing between groups
)

# Show the plot
fig.show()

# # Save the plot as JSON
fig_json = fig.to_json()
with open('token_length.json', 'w') as file:
    file.write(fig_json)

In [14]:
import pandas as pd
import plotly.express as px


# Count the occurrences of "yes" in each column
count_column1 = data["BLIP-2, greedy, bool"].str.lower().str.count(r'\byes\b').sum() / 64
count_column2 = data["BLIP-2, beam, bool"].str.lower().str.count(r'\byes\b').sum() / 64
count_column3 = data["BLIP-2, nucleus, bool"].str.lower().str.count(r'\byes\b').sum() / 64

# Create a DataFrame for the pie chart
data_plot = pd.DataFrame({
    "Column": ["BLIP-2, greedy, bool", "BLIP-2, beam, bool", "BLIP-2, nucleus, bool"],
    "Count": [count_column1, count_column2, count_column3]
})

# Create the bar chart using Plotly
fig = px.bar(data_plot, x='Column', y='Count')

# Set the axis labels
fig.update_layout(width=1000, title='Model belief in recognizing object', xaxis_title='Decoding strategy', yaxis_title='Count')


# Show the plot
fig.show()

# # Save the plot as JSON
# fig_json = fig.to_json()
# with open('familiarity.json', 'w') as file:
#     file.write(fig_json)

In [65]:
import spacy
nlp = spacy.load('en_core_web_sm')

def calculate_similarity(string1, string2, string3):
    # Process the sentences with spaCy
    doc1 = nlp(string1)
    doc2 = nlp(string2)
    doc3 = nlp(string3)

    # Calculate the average similarity between the three sentences
    similarity1_2 = doc1.similarity(doc2)
    similarity1_3 = doc1.similarity(doc3)
    similarity2_3 = doc2.similarity(doc3)

    average_similarity = (similarity1_2 + similarity1_3 + similarity2_3) / 3
    return average_similarity


In [66]:
# Assuming your dataframe is named 'df' and the two columns are 'column1' and 'column2'
data['similarity'] = data.apply(lambda row: calculate_similarity(row['BLIP-2, greedy, caption'], row['BLIP-2, beam, caption'], row['BLIP-2, nucleus, caption']), axis=1)



[W007] The model you're using has no word vectors loaded, so the result of the Doc.similarity method will be based on the tagger, parser and NER, which may not give useful similarity judgements. This may happen if you're using one of the small models, e.g. `en_core_web_sm`, which don't ship with word vectors and only use context-sensitive tensors. You can always add your own word vectors, or use one of the larger models instead if available.


[W007] The model you're using has no word vectors loaded, so the result of the Doc.similarity method will be based on the tagger, parser and NER, which may not give useful similarity judgements. This may happen if you're using one of the small models, e.g. `en_core_web_sm`, which don't ship with word vectors and only use context-sensitive tensors. You can always add your own word vectors, or use one of the larger models instead if available.


[W007] The model you're using has no word vectors loaded, so the result of the Doc.similarity method wi

In [None]:
data = data.drop(['Category Bin Color', 'Category Bin Texture'], axis=1)
display(data)

# TODO

- implement BERT Score between real and caption
- implement BERT Score between different decoding strategies
- use FROMAGe
- Check if high BERT Score also correlates with proper saliencies, confidence, etc
- Look at extreme cases, very high or very low scores

## BERTScore Evaluation

In [37]:
import bert_score
from bert_score import score

# hide the loading messages
import logging
import transformers
transformers.tokenization_utils.logger.setLevel(logging.ERROR)
transformers.configuration_utils.logger.setLevel(logging.ERROR)
transformers.modeling_utils.logger.setLevel(logging.ERROR)

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import rcParams

rcParams["xtick.major.size"] = 0
rcParams["xtick.minor.size"] = 0
rcParams["ytick.major.size"] = 0
rcParams["ytick.minor.size"] = 0

rcParams["axes.labelsize"] = "large"
rcParams["axes.axisbelow"] = True
rcParams["axes.grid"] = True

In [38]:
refs = data['actual name']
cands_greedy_caption = data['BLIP-2, greedy, caption']
cands_beam_caption = data['BLIP-2, beam, caption']
cands_nucleus_caption = data['BLIP-2, nucleus, caption']

# cands_greedy_bool = data['BLIP-2, greedy, bool']
# cands_beam_bool = data['BLIP-2, beam, bool']
# cands_nucleus_bool = data['BLIP-2, nucleus, bool']

# cands_greedy_name = data['BLIP-2, greedy, name']
# cands_beam_name = data['BLIP-2, beam, name']
# cands_nucleus_name = data['BLIP-2, nucleus, name']

cands_greedy_real = data['BLIP-2, greedy, real']
cands_beam_real = data['BLIP-2, beam, real']
cands_nucleus_real = data['BLIP-2, nucleus, real']

### Familiarity

In [41]:
#data2['BERTScore greedy real']= score(cands_greedy_real, refs, lang='en', verbose=True)[2]
data2['BERTScore beam real']= score(cands_beam_real, refs, lang='en', verbose=True)[2]
# data2['BERTScore nucleus real']= score(cands_nucleus_real, refs, lang='en', verbose=True)[2]

calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:03<00:00,  3.84s/it]


computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]


### Nameability

In [42]:
data['BERTScore nameability caption'] = (score(cands_greedy_caption, cands_nucleus_caption, lang='en', verbose=True)[2] + score(
    cands_greedy_caption, cands_beam_caption, lang='en', verbose=True)[2] + score(cands_beam_caption, cands_nucleus_caption, lang='en', verbose=True)[2]) / 3

# data['BERTScore nameability real'] = (score(cands_greedy_real, cands_nucleus_real, lang='en', verbose=True)[2] + score(
#     cands_greedy_real, cands_beam_real, lang='en', verbose=True)[2] + score(cands_beam_real, cands_nucleus_real lang='en', verbose=True)[2]) / 3

# data['BERTScore nameability name'] = (score(cands_greedy_name, cands_nucleus_name, lang='en', verbose=True)[2] + score(
#     cands_greedy_name, cands_beam_name, lang='en', verbose=True)[2] + score(cands_beam_name, cands_nucleus_name, lang='en', verbose=True)[2]) / 3

calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:03<00:00,  3.54s/it]


computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]
