In [None]:
!pip install datasets
!pip install sentence_transformers
!pip install pandas
!pip install --upgrade transformers
!pip install torchmetrics --upgrade
!pip install nbformat
!pip install git+https://github.com/openai/CLIP.git
!pip install bert_score
!pip install diffusers

In [3]:

from PIL import Image
import torch
import os
from datasets import load_dataset
from datasets.utils.file_utils import get_datasets_user_agent
import requests
import numpy as np
import pandas as pd
import json
import clip
# from natsort import natsorted
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from sentence_transformers import SentenceTransformer, util
from torchmetrics.multimodal.clip_score import CLIPScore
USER_AGENT = get_datasets_user_agent()
from bert_score import score
from diffusers import StableDiffusionPipeline
from tqdm import tqdm



In [4]:



def calculate_clip_score(image, correct_prompt, revers_prompt, model, preprocess, device):
    try:
        # Ensure image is preprocessed and on the right device
        image_input = preprocess(image).unsqueeze(0).to(device)

        # Encode image features
        with torch.no_grad():
            image_features = model.encode_image(image_input)

            # Encode text prompts
            text_tokens = clip.tokenize([correct_prompt, revers_prompt]).to(device)
            text_features = model.encode_text(text_tokens)

            # Normalize the features
            image_features /= image_features.norm(dim=-1, keepdim=True)
            text_features /= text_features.norm(dim=-1, keepdim=True)

            # Calculate cosine similarities
            correct_score = torch.cosine_similarity(image_features, text_features[0].unsqueeze(0)).item()
            reverse_score = torch.cosine_similarity(image_features, text_features[1].unsqueeze(0)).item()

        # Clear GPU memory if using GPU
        if torch.cuda.is_available() and device == 'cuda':
            torch.cuda.empty_cache()

        return correct_score * 100, reverse_score * 100

    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None  # Return default values or handle the error appropriately


In [5]:
def calculate_blip_2(image, prompt, processor ,model, device):
        img_inputs = processor(images=image, return_tensors="pt").to(device)
        if next(model.parameters()).device != device:
            model.to(device)
        # Generate image features
        with torch.no_grad():
          generated_ids = model.generate(**img_inputs)

        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
      # Compare token lengths (optional)
        generated_tokens = generated_text.split()  # Simple whitespace tokenization
        prompt_tokens = prompt.split()


        # Compute BERTScore for semantic similarity
        # Wrap the texts in lists as required by BERTScore
        P, R, F1 = score([generated_text], [prompt], lang="en", rescale_with_baseline=True)
        if torch.cuda.is_available() and device == 'cuda':
            torch.cuda.empty_cache()
        return F1, generated_text


In [17]:

def csv_to_json (csv_file_path , json_file_path):
    df = pd.read_csv(csv_file_path)
    df.to_json(json_file_path, orient='records', lines=True)
    print(f"CSV file has been converted to JSON and saved at {json_file_path}")
    return json_file_path

In [13]:
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
import csv
import os

def plot_clip_scores(clip_scores):
    # Count positive and negative scores
    positive_count = sum(1 for score in clip_scores if score > 0)
    negative_count = sum(1 for score in clip_scores if score < 0)
    total = positive_count +  negative_count
    # Data for plotting
    categories = ['Positive', 'Negative']
    counts = [positive_count, negative_count]

    # Create bar plot
    plt.bar(categories, counts, color=['green', 'red'])
    plt.xlabel('Score Type')
    plt.ylabel('Count')
    plt.title(f'Number of Positive diff scores is {positive_count} out of  {total}')
    plt.show()



def create_csv(file_name, data_list, save_dir='.'):
    """
    Creates a CSV file from a list of dictionaries.

    :param file_name: Name of the output CSV file (without extension)
    :param data_list: List of dictionaries containing the data
    :param save_dir: Directory where the CSV file will be saved (default is current directory)
    """
    # Ensure the file name has a .csv extension
    if not file_name.endswith('.csv'):
        file_name += '.csv'

    # Full path to the output file
    file_path = os.path.join(save_dir, file_name)

    # Check if data_list is not empty and get the fieldnames from the first dictionary
    if data_list:
        fieldnames = data_list[0].keys()
    else:
        raise ValueError("Data list is empty")

    # Writing to csv file
    with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write the header
        writer.writeheader()

        # Write the data
        for row in data_list:
            writer.writerow(row)

    print(f"CSV file '{file_path}' created successfully.")


def evaluation(save_dir, json_file , metric= "clip", img_col = 'file_name', text_col='text', revers_col = "reverse_text", output_file_name = 'clip_output'):
    # Open the file in write mode ('w')
    print_data=[]
    total_blip_score =[]
    total_reverse_blip_score =[]
    total_diff_blip =[]
    blip_count =0
    total_clip_score =[]
    total_reverse_clip_score =[]
    total_diff_clip =[]
    clip_count =0
    output_clip_data = []
    output_blip_data = []
    with open(json_file, 'r', encoding='utf-8') as file:
        data = json.load(file)  # Load the entire JSON data
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using {device}")
    # Iterate over each entry in the array
    if(metric in ("clip", "both")):
      print(f"Metric used is CLIP Score using {device}")
      print_data.append(f"Metric used is CLIP Score using {device}")
      model, preprocess = clip.load("ViT-B/32", device=device)
    if(metric in ("blip" , "both")):
      print(f"Metric used is BLIP Score")
      print_data.append(f"Metric used is BLIP Score")
      processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
      model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
    for entry in data:
        img_path = os.path.join(save_dir, entry[img_col])
        caption = entry[text_col]
        revers_caption = entry[revers_col]
        if img_path.endswith(".jpg") or img_path.endswith(".png"):
            img = Image.open(img_path)
        if metric == "clip":
            score , reverse_score= calculate_clip_score(img, caption, revers_caption, model, preprocess, device )
            diff = 10*(score - reverse_score)
            print(f"Image: {img_path}, Caption: {caption}, CLIP Score: {score}, reverse caption {revers_caption}, reverse score {reverse_score}, diff*10 {diff}")
            print_data.append(f"Image: {img_path}, Caption: {caption}, CLIP Score: {score}, reverse caption {revers_caption}, reverse score {reverse_score}, diff*10 {diff}")
            output_clip_data.append({'Image': entry[img_col],'Caption': caption,'Score': score, 'reverse caption':revers_caption ,'reverse score': reverse_score,'diff*10': diff})
            total_clip_score.append(score)
            total_reverse_clip_score.append(reverse_score)
            total_diff_clip.append(diff)
            clip_count += 1
        if metric == "blip":
            blip_score_dec, generated_caption = calculate_blip_2(img,caption,processor,model,device)
            blip_reverse_score_dec, reverse_generated_caption = calculate_blip_2(img,revers_caption,processor,model,device)
            blip_score = 100*blip_score_dec.item()
            blip_reverse_score = 100*blip_reverse_score_dec.item()
            blip_diff = 10*(blip_score - blip_reverse_score)
            total_blip_score.append(blip_score)
            total_reverse_blip_score.append(blip_reverse_score)
            total_diff_blip.append(blip_diff)
            blip_count += 1
            print(f"Image: {img_path}, Caption: {caption}, Generated Caption {generated_caption} BLIP2 Score: {blip_score}, reverse caption: {revers_caption} , reverse BLIP score {blip_reverse_score}, diff*10 {blip_diff}")
            output_blip_data.append({'Image': entry[img_col],'Caption': caption,'Score': blip_score, 'reverse caption':revers_caption ,'reverse score': blip_reverse_score, 'diff*10': blip_diff})
            print_data.append(f"Image: {img_path}, Caption: {caption}, Generated Caption {generated_caption} BLIP2 Score: {blip_score}, reverse caption: {revers_caption} , reverse BLIP score {blip_reverse_score} , diff*10 {blip_diff} ")
        # Calculate averages
    if clip_count > 0:
        avg_clip_score = np.mean(total_clip_score)
        avg_reverse_clip_score = np.mean(total_reverse_clip_score)
        avg_diff_clip = np.mean(total_diff_clip)
        # Calculate variance
        var_clip_score= np.var(total_clip_score)
        var_reverse_clip_score = np.var(total_reverse_clip_score)
        var_diff_clip = np.var(total_diff_clip)
        # Create the box plot
        all_clip_scores = [total_clip_score, total_reverse_clip_score, total_diff_clip]
        plt.figure(figsize=(10, 6))
        sns.boxplot(data=all_clip_scores)
        plt.xticks(ticks=range(len(all_clip_scores)), labels=['CLIP Score', 'Reverse CLIP Score', 'CLIP Diffx10'])
        plt.ylabel('Score')
        plt.title('Distribution of Image-Caption Pair Scores')
        plt.grid(True)
        plt.savefig(f'{save_dir}/clip_distribution.png')  # You can change the file format by changing the extension
        plt.show()
        plt.close()  # Close the plot to free up memory
        print(f"CLIP Score: Avrage:{avg_clip_score} var: {var_clip_score},  Reverse CLIP Score: avg:{avg_reverse_clip_score} var:{var_reverse_clip_score},  CLIP Diff: avg:{avg_diff_clip} var: {var_diff_clip}")
        print_data.append(f"CLIP Score: Avrage:{avg_clip_score} var: {var_clip_score},  Reverse CLIP Score: avg:{avg_reverse_clip_score} var:{var_reverse_clip_score},  CLIP Diff: avg:{avg_diff_clip} var: {var_diff_clip}")
        plot_clip_scores(total_diff_clip)
        create_csv(output_file_name,output_clip_data,save_dir)
    if blip_count > 0:
        avg_blip_score = np.mean(total_blip_score)
        avg_reverse_blip_score = np.mean(total_reverse_blip_score)
        avg_diff_blip = np.mean(total_diff_blip)
        # Calculate variance
        var_blip_score= np.var(total_blip_score)
        var_reverse_blip_score = np.var(total_reverse_blip_score)
        var_diff_blip = np.var(total_diff_blip)
        # Create the box plot
        all_blip_scores = [total_blip_score, total_reverse_blip_score, total_diff_blip]
        plt.figure(figsize=(10, 6))
        sns.boxplot(data=all_blip_scores)
        plt.xticks(ticks=range(len(all_blip_scores)), labels=['BLIP Score', 'Reverse BLIP Score', 'BLIP Diffx100'])
        plt.ylabel('Score')
        plt.title('Distribution of Image-Caption Pair Scores')
        plt.grid(True)
        plt.savefig(f'{save_dir}/blip_distribution.png')  # You can change the file format by changing the extension
        plt.show()
        plt.close()  # Close the plot to free up memory
        print(f"BLIP Score: Avrage:{avg_blip_score} var: {var_blip_score},  Reverse BLIP Score: avg:{avg_reverse_blip_score} var:{var_reverse_blip_score},  BLIP Diff: avg:{avg_diff_blip} var: {var_diff_blip}")
        print_data.append(f"BLIP Score: Avrage:{avg_blip_score} var: {var_blip_score},  Reverse BLIP Score: avg:{avg_reverse_blip_score} var:{var_reverse_blip_score}, BLIP Diff: avg:{avg_diff_blip} var: {var_diff_blip}")
        plot_clip_scores(total_diff_blip)
        create_csv(output_file_name,output_blip_data,save_dir)
    with open(f'{save_dir}/{output_file_name}.txt', 'w', encoding='utf-8') as file:
      file.write("\n".join(print_data))  # Join the list into a string with line breaks
    torch.cuda.empty_cache()



Code for revers caption


In [None]:
!python -m spacy download en_core_web_trf

In [None]:

!pip install spacy-transformers

In [9]:
import spacy
import spacy_transformers
from google.colab import drive
import os
import pandas as pd
import json

In [None]:
# Load spaCy's English model
nlp = spacy.load("en_core_web_trf")

def get_full_phrase(token):
    """Helper function to return the full phrase for a given token, including its modifiers."""
    # Collect the token and its modifiers
    phrase_tokens = [token]

    # Add left children (modifiers, determiners, etc.) and sort by token position
    phrase_tokens.extend(list(token.lefts))

    # Sort tokens by their position in the sentence to maintain proper order
    phrase_tokens = sorted(phrase_tokens, key=lambda x: x.i)

    # Return the full phrase
    return ' '.join([t.text for t in phrase_tokens])

def identify_roles(sentence):
    doc = nlp(sentence)
    agent = ""
    patient = ""
    action = ""
    remainder = []

    agent_tokens = set()
    patient_tokens = set()
    action_token = None

    for token in doc:
        # Identify subject (agent) and object (patient)
        if token.dep_ == "nsubj":  # Nominal subject (agent)
            agent = get_full_phrase(token)
            agent_tokens.update([token] + list(token.lefts))  # Collect tokens for agent
        elif token.dep_ == "dobj":  # Direct object (patient)
            patient = get_full_phrase(token)
            patient_tokens.update([token] + list(token.lefts))  # Collect tokens for patient
        elif token.pos_ == "VERB":  # Action (verb)
            action = token.text
            action_token = token  # Store the action token
        else:
            remainder.append(token)  # Collect other tokens as remainder

    # Exclude agent, patient, and action tokens from remainder
    remainder = [token.text for token in remainder if token not in agent_tokens and token not in patient_tokens and token != action_token]

    # Remove any period ('.') from the remainder list
    remainder = [token for token in remainder if token != '.']

    # Return agent, action, patient, and the remainder
    return agent, action, patient, remainder

def switch_roles(sentence):
    agent, action, patient, remainder = identify_roles(sentence)

    # Capitalize first letter and adjust singular/plural of verb if needed
    if agent and patient:
        remainder_text = ' '.join(remainder).strip()

        # Check if there's a remainder and construct the sentence accordingly
        if remainder_text:
            switched_sentence = f"{patient.capitalize()} {action}{'s' if not action.endswith('s') else ''} {agent.lower()} {remainder_text}."
        else:
            switched_sentence = f"{patient.capitalize()} {action}{'s' if not action.endswith('s') else ''} {agent.lower()}."

        return switched_sentence
    else:
        return "Failed"

#Model Evaluation:
 Receive model save dir and caption.txt
 generate images from caption.txt, save them into save_dir and return Json with image, caption and revers caption for evaluation function

In [11]:
def evaluate_model(model_id, captions_txt, save_dir, json_caption, seed=42):

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Load the Stable Diffusion model
    image_data = []
    # Read captions from the .txt file
    if(captions_txt != "none"):
      with open(captions_txt, 'r') as f:
          captions = f.readlines()
    torch.manual_seed(seed)

    # Remove any leading/trailing whitespace characters
    if (json_caption != "none"):
      df = pd.read_csv(json_caption)
      captions = df["caption_0"]  # Load the entire JSON data
      reverse_captions = df["caption_1"]
    else:
      captions = [caption.strip() for caption in captions]
      reverse_captions = []

    sd_model = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Evaluating using {device}")

    for i, entry in enumerate(captions):
        if (json_caption != "none"):
          caption = entry
          reverse_caption = reverse_captions[i]
        else:
          caption = entry
          reverse_caption = switch_roles(caption)  # Example of reverse captioning


        sd_model = sd_model.to(device)
        # Generate image from caption
        with torch.no_grad():
            generator = torch.Generator(device).manual_seed(seed)
            image = sd_model(caption, generator=generator).images[0]



        # Save the image
        filename = f"image_{i}_seed_{seed}.png"
        filepath = os.path.join(save_dir, filename)
        image.save(filepath)


        if(reverse_caption != 'Failed'):
            # Append data to list
            image_data.append({"file_name": filename,"text": caption,"reverse_text": reverse_caption})
        else:
           print(f"Warning: Unable to reverse {caption}")

    # Save image data to JSON file
    json_file = os.path.join(save_dir, "image_data.json")
    with open(json_file, 'w') as f:
        json.dump(image_data, f, ensure_ascii=False, indent=4)

    if torch.cuda.is_available() and device == 'cuda':
      torch.cuda.empty_cache()
    return json_file

#Relevant For collab

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

This code use the original base model to generate images from our benchmark list
It saved in json file the image name, caption and reverse caption (generated fro caption using the function we used in the dataset filtering)


In [None]:
current_dir = f'/content/drive/My Drive/deep_learning_project/Project_036635803_305673212_312349509/'
evaluation_dir = f'{current_dir}/code/evaluation'
output_dir = f'{current_dir}/output'
seed_base =42

#Genrating images forom base model for manual set

In [None]:
base_model_id  = "stabilityai/stable-diffusion-2-1-base"  # Adjust model ID as needed
captions_txt = f'{evaluation_dir}/benchmark.txt'
caption_csv = "none"
save_dir = f'{output_dir}/models_results/base_stable_diffusion/generated_images/manual_set'
json_file = evaluate_model(base_model_id, captions_txt, save_dir, caption_csv , seed = seed_base )


In [37]:
torch.cuda.empty_cache()

Use this to evaluate the generated images

In [None]:
#base benchmark evalauation

save_dir = f'{output_dir}/models_results/base_stable_diffusion/generated_images/manual_set'
json_file = f'{save_dir}/image_data.json'
simularity_dir = f'{output_dir}/models_results/base_stable_diffusion/similarity_score/manual_set'


In [None]:
evaluation(simularity_dir,json_file,metric ='clip',output_file_name = "clip_output")

In [None]:
!pip install --upgrade transformers

In [None]:
  #base benchmark evalauation (Using BLIP BARST)

evaluation(simularity_dir,json_file,metric ='blip', output_file_name = "blip_output")

#Evaluation the base model usign winoground captions


This code use the original base model to generate images captions we considered as "interasting" from winoground
It saved in json file the image name, caption and reverse caption (generated fro caption using the function we used in the dataset filtering)

In [None]:

winoground_captions_csv_file = f'{output_dir}/winoground_filtering_output/winoground_filtering_output.csv'
base_model_id  = "stabilityai/stable-diffusion-2-1-base"
captions_txt_none = "none"

winoground_base_dir = f'{output_dir}/models_results/base_stable_diffusion/generated_images/Winoground'
base_winoground_json_file = evaluate_model(base_model_id, captions_txt_none, winoground_base_dir, winoground_captions_csv_file, seed = seed_base)

In [None]:
torch.cuda.empty_cache()

In [None]:
#base winoground evalauation

winoground_base_dir = f'{output_dir}/models_results/base_stable_diffusion/generated_images/Winoground'
base_winoground_json_file = f'{winoground_base_dir}/image_data.json'
winoground_base_simularity_dir = f'{output_dir}/models_results/base_stable_diffusion/similarity_score/Winoground'



In [None]:
evaluation(winoground_base_simularity_dir,base_winoground_json_file,metric ='clip',output_file_name = "clip_output")

In [None]:
#base winoground evalauation

evaluation(winoground_base_simularity_dir,base_winoground_json_file,metric ='blip', output_file_name = "blip_ouput")

#Evaluate our trained models using Benchmark list
This code use our trained model to generate images from our benchmark list It saved in json file the image name, caption and reverse caption (generated fro caption using the function we used in the dataset filtering)

In [None]:
torch.cuda.empty_cache()


In [None]:
#generate images for each model from Beanchmark

model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
bm_trained_json = [None for _ in range(len(model_id))]
trained_bm_dir = [None for _ in range(len(model_id))]
caption_csv = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_bm_dir[j] = f'{output_dir}/models_results/{dir_name}/generated_images/manual_set'
  bm_trained_json[j] = evaluate_model(model_name, captions_txt, trained_bm_dir[j], 'none',  seed = seed_base+i)


In [None]:

model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
bm_trained_json = [None for _ in range(len(model_id))]
trained_bm_simularity_dir = [None for _ in range(len(model_id))]
caption_csv = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_bm_simularity_dir[j]= f'{output_dir}/models_results/{dir_name}/similarity_score/manual_set'
  bm_trained_json[j] = f'{output_dir}/models_results/{dir_name}/generated_images/image_data.json'
  evaluation(trained_bm_simularity_dir[j],bm_trained_json[j],metric ='clip')


In [None]:

model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
bm_trained_json = [None for _ in range(len(model_id))]
trained_bm_simularity_dir = [None for _ in range(len(model_id))]
caption_csv = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_bm_simularity_dir[j]= f'{output_dir}/models_results/{dir_name}/similarity_score/manual_set'
  bm_trained_json[j] = f'{output_dir}/models_results/{dir_name}/generated_images/image_data.json'
  evaluation(trained_bm_simularity_dir[j],bm_trained_json[j],metric ='blip', output_file_name = "blip_ouput")


#Evaluate trained model using winoground captions
This code use our trained model to generate images captions we considered as "interasting" from winoground
It saved in json file the image name, caption and reverse caption (generated fro caption using the function we used in the dataset filtering)

In [None]:


winoground_captions_csv_file = f'{output_dir}/winoground_filtering_output/winoground_filtering_output.csv'
model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
winoground_trained_json = [None for _ in range(len(model_id))]
trained_winoground_dir = [None for _ in range(len(model_id))]
captions_txt_none = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_winoground_dir[j] = f'{output_dir}/models_results/{dir_name}/generated_images/Winoground'
  winoground_trained_json[j] = evaluate_model(model_name, captions_txt_none, trained_winoground_dir[j], winoground_captions_csv_file,  seed = seed_base)


In [None]:

model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
winoground_trained_json = [None for _ in range(len(model_id))]
trained_winoground_dir = [None for _ in range(len(model_id))]
trained_winoground_simularity_dir = [None for _ in range(len(model_id))]

captions_txt_none = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_winoground_dir[j] = f'{output_dir}/models_results/{dir_name}/generated_images/Winoground'
  winoground_trained_json[j] = f'{trained_winoground_dir[j]}/image_data.json'
  trained_winoground_simularity_dir[j]= f'{output_dir}/models_results/{dir_name}/similarity_score/Winoground'
  evaluation(trained_winoground_simularity_dir[j],winoground_trained_json[j],metric ='clip')


In [None]:
!pip install --upgrade transformers

In [None]:

model_id = [
  ("nadavo11/actions_model" , "ft_lr_1e-4"),("nadavo11/actions_model5" , "ft_lr_5e-5"),("nadavo11/ImSitu_actions_model", "ft_lr_1e-5")]
captions_txt = f'{evaluation_dir}/benchmark.txt'
winoground_trained_json = [None for _ in range(len(model_id))]
trained_winoground_dir = [None for _ in range(len(model_id))]
trained_winoground_simularity_dir = [None for _ in range(len(model_id))]

captions_txt_none = "none"
for j, (model_name, dir_name) in enumerate(model_id):
  trained_winoground_dir[j] = f'{output_dir}/models_results/{dir_name}/generated_images/Winoground'
  winoground_trained_json[j] = f'{trained_winoground_dir[j]}/image_data.json'
  trained_winoground_simularity_dir[j]= f'{output_dir}/models_results/{dir_name}/similarity_score/Winoground'
  evaluation(trained_winoground_simularity_dir[j],winoground_trained_json[j],metric ='blip', output_file_name = "blip_ouput")