In [1]:
!pip install lpips
!pip install open-clip-torch
!pip install pytorch_fid
!pip install pytorch_msssim

Collecting lpips
  Downloading lpips-0.1.4-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=0.4.0->lpips)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=0.4.0->lpips)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=0.4.0->lpips)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=0.4.0->lpips)
  Downloading nvidia_cufft

In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from PIL import Image
from skimage import io
from skimage.color import gray2rgb

from skimage.metrics import structural_similarity as ssim
from tabulate import tabulate
import lpips
import open_clip
from pytorch_fid import fid_score
from pytorch_msssim import ssim as torch_ssim

import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models.inception import inception_v3
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F


import warnings
warnings.filterwarnings("ignore")

In [3]:
from google.colab import drive
from google.colab import files
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [4]:
# Add the path of the basefolder where the data is located and the generated samples will be save later in the notebook
basepath = "/content/drive/MyDrive/CAMERA Stable Diffusion/data_tutorial"

In [5]:
# Count number of files in directory
data = 'ROCKS' # Choose among ROCKS, ROOTS, CMC
mode = 'generation' # Can be either "variation", "edit" or "generation"
save_rootpath = os.path.join(basepath, "generated/{}/dall-e-3_{}".format(data, mode))
save_list = [e for e in os.listdir(save_rootpath) if not e.startswith(".")]
print("Number of generated images available: {}".format(len(save_list)))


Number of generated images available: 3


**SSIM**

In [6]:
def compute_ssim(img, gen_img):
  """
  Compute Structural Similarity Index (SSIM) between two images.
  Args:
      img (numpy.ndarray): The original image.
      gen_img (numpy.ndarray): The generated image.
  Returns:
      float: The SSIM value.
  """
  if gen_img.shape != img.shape:
    print("Resizing generated image to input shape i.e. to {}".format(img.shape))
    gen_img = cv2.resize(gen_img, img.shape[:2][::-1])
  return ssim(img, gen_img, data_range=gen_img.max() - gen_img.min(), channel_axis=-1)


def compute_ssim_gpu(img, gen_img):
    """
    Compute SSIM using PyTorch on GPU.
    Inputs must be 4D torch tensors with shape [B, C, H, W] and values in [0, 1].

    Args:
        img (torch.Tensor): The original image tensor.
        gen_img (torch.Tensor): The generated image tensor.
    Returns:
        torch.Tensor: The SSIM tensor.
    """
    if gen_img.shape != img.shape:
      print("Resizing generated image to input shape i.e. to {}".format(img.shape))
      gen_img = F.interpolate(gen_img, size=img.shape[2:], mode='bilinear', align_corners=False)
    return torch_ssim(img, gen_img, data_range=1.0)


def average_ssim_over_folder_gpu(real_path, fake_path, model_prefix="model"):
    transform = transforms.Compose([
        transforms.ToTensor(),  # Converts to [0, 1]
    ])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    real_files = sorted([f for f in os.listdir(real_path) if f.endswith(('.png', '.jpg', '.tif'))])
    all_scores = []

    for real_file in real_files:
      real_img_path = os.path.join(real_path, real_file)
      real_img = transform(Image.open(real_img_path).convert("RGB")).unsqueeze(0).to(device)

      base_name = os.path.splitext(real_file)[0]
      fake_candidates = [f for f in os.listdir(fake_path) if f.startswith(f"{model_prefix}_{base_name}_")]

      if not fake_candidates:
        # print(f"⚠️ No matching fakes found for {real_file} with prefix {model_prefix}")
        continue

      scores = []
      for fake_file in fake_candidates:
        fake_img_path = os.path.join(fake_path, fake_file)
        fake_img = transform(Image.open(fake_img_path).convert("RGB")).unsqueeze(0).to(device)

        score = compute_ssim_gpu(real_img, fake_img).item()
        scores.append(score)

      avg_score = np.mean(scores)
      print(f"{real_file} — Avg SSIM over {len(scores)} fakes: {avg_score:.4f}")
      all_scores.append(avg_score)

    scores = np.array(all_scores)
    overall_avg = np.mean(scores)
    overall_std = np.std(scores)

    print(f"\n🔎 Overall average SSIM: {overall_avg:.4f} ± {overall_std:.4f}")
    return overall_avg, overall_std

**LPIPS**

In [7]:
# GPU device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained LPIPS model (you can choose between 'alex', 'vgg')
loss_fn_alex = lpips.LPIPS(net='alex').to(device)

def compute_lpips_gpu(real_img, fake_img):
  """
  Compute LPIPS between two RGB images using GPU.
  Inputs: PIL or NumPy image arrays.
  Returns: float
  """
  transform = transforms.Compose([
      transforms.ToTensor(),  # Converts to [0, 1]
      transforms.Resize((512, 512)),
      transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)  # Scale to [-1, 1] as LPIPS expects
  ])

  if isinstance(real_img, np.ndarray):
      real_img = Image.fromarray(real_img)
  if isinstance(fake_img, np.ndarray):
      fake_img = Image.fromarray(fake_img)

  real_tensor = transform(real_img).unsqueeze(0).to(device)
  fake_tensor = transform(fake_img).unsqueeze(0).to(device)

  with torch.no_grad():
      d = loss_fn_alex(real_tensor, fake_tensor)
  return d.item()

def average_lpips_over_folder_gpu(real_path, fake_path, model_prefix="model"):
  real_files = sorted([f for f in os.listdir(real_path) if f.endswith(('.png', '.jpg', '.tif'))])
  all_scores = []

  for real_file in real_files:
    real_img_path = os.path.join(real_path, real_file)
    real_img = np.array(Image.open(real_img_path).convert("RGB"))

    base_name = os.path.splitext(real_file)[0]
    fake_candidates = [f for f in os.listdir(fake_path) if f.startswith(f"{model_prefix}_{base_name}_")]

    if not fake_candidates:
      # print(f"⚠️ No matching fakes found for {real_file} with prefix {model_prefix}")
      continue

    scores = []
    for fake_file in fake_candidates:
      fake_img_path = os.path.join(fake_path, fake_file)
      fake_img = np.array(Image.open(fake_img_path).convert("RGB"))

      score = compute_lpips_gpu(real_img, fake_img)
      scores.append(score)

    avg_score = np.mean(scores)
    print(f"{real_file} — Avg LPIPS over {len(scores)} fakes: {avg_score:.4f}")
    all_scores.append(avg_score)

  scores = np.array(all_scores)
  overall_avg = np.mean(scores)
  overall_std = np.std(scores)
  print(f"\n🔎 Overall average LPIPS: {overall_avg:.4f} ± {overall_std:.4f}")
  return overall_avg, overall_std



Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 146MB/s]


Loading model from: /usr/local/lib/python3.11/dist-packages/lpips/weights/v0.1/alex.pth


**FID**

In [8]:
# Run FID over 3–5 different sets of generated samples and report the average ± std.
def compute_fid(path_real, path_fake, n_loops, batch_size=32):
  """
  Compute the Frechet Inception Distance (FID) between two sets of images.
  Args:
      path_real (str): Path to the directory containing the real images.
      path_fake (str): Path to the directory containing the generated images.
      n_loops (int): Number of loops to compute the FID.
      batch_size (int): Batch size for computing the FID.
  Returns:
      tuple: The average and standard deviation of the FID values.
  """
  # Two folders: one with real images, one with generated

  fid_scores = []
  for run in range(n_loops):
    fid_value = fid_score.calculate_fid_given_paths([path_real, path_fake],
                                                  batch_size=batch_size,
                                                  device='cuda',
                                                  dims=2048)
    # dims=2048 Feature Dimension of InceptionV3 Pooling Layer
    fid_scores.append(fid_value)

  mean_fid = np.mean(fid_scores)
  std_fid = np.std(fid_scores)
  print(f"FID = {mean_fid:.2f} ± {std_fid:.2f}")

  return mean_fid, std_fid


**CLIP Score**

***CLIPScore is a metric that measures how well an image aligns with a given text prompt by computing the cosine similarity between their CLIP-encoded embeddings.***

In [9]:
def compute_clipscore(prompt, image_dir):
  """
  Compute CLIP Score between a prompt and a set of images.
  Args:
      prompt (str): The text prompt.
      image_dir (str): Path to the directory containing the images.
  Returns:
      tuple: The average and standard deviation of the CLIP Score values.
  """

  # Load model and tokenizer
  model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
  tokenizer = open_clip.get_tokenizer('ViT-B-32')
  device = "cuda" if torch.cuda.is_available() else "cpu"
  model = model.to(device).eval()

  # Tokenize prompt
  text = tokenizer([prompt]).to(device)

  # Encode text once
  with torch.no_grad():
    text_features = model.encode_text(text)
    text_features /= text_features.norm(dim=-1, keepdim=True)

  # Get list of generated images
  image_files = [f for f in os.listdir(image_dir) if f.lower().endswith((".png", ".jpg", ".tif"))]

  scores = []

  # Compute CLIPScore for each image
  for img_file in image_files:
    image_path = os.path.join(image_dir, img_file)
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)

    with torch.no_grad():
      image_features = model.encode_image(image)
      image_features /= image_features.norm(dim=-1, keepdim=True)

      score = (image_features @ text_features.T).item()
      scores.append(score)
      print(f"{img_file}: {score:.4f}")

  # Compute average and std scores
  scores = np.array(scores)
  avg_score = scores.mean()
  std_score = scores.std()
  print(f"\nAverage CLIPScore: {avg_score:.4f} ± {std_score:.4f}")

  return avg_score, std_score


In [10]:
def get_df_metrics(path_real, path_fake, prompts, models, data):
  """
  Get Table summary of all calculated metrics with mean and standard variation into a dataframe format.
  Args:
    path_real (str): path to the real images
    path_fake (str): path to the fake
    prompts (dict): dictionary with the prompts for each model
    data (str): name of the dataset
  Returns:
    Pandas DataFrame: DataFrame with mean and std of each metric
  """

  rows = []
  for model_name in tqdm(models):
    if model_name.startswith('dall-e'): # harcoded to how I previously set up filenames during inference
      model_prefix = model_name
    else:
      model_prefix = model_name + "_" + data
    print(f"Model: {model_name}")
    path_fake_model = os.path.join(path_fake, model_name)
    prompt = prompts[model_name]
    avg_ssim, std_ssim = average_ssim_over_folder_gpu(path_real, path_fake_model, model_prefix=model_prefix)
    avg_lpips, std_lpips = average_lpips_over_folder_gpu(path_real, path_fake_model, model_prefix=model_prefix)
    mean_fid, std_fid = compute_fid(path_real, path_fake_model, n_loops=1, batch_size=32)

    if prompt == 'N/A':
      clip_mean = 0
      clip_std = 0
    else:
      clip_mean, clip_std = compute_clipscore(prompt, path_fake_model)


    row = {"Model": model_name,
            "CLIPScore_mean": clip_mean, "CLIPScore_std": clip_std,
            "FID_mean": mean_fid, "FID_std": std_fid,
            "LPIPS_mean": avg_lpips, "LPIPS_std": std_lpips,
            "SSIM_mean": avg_ssim, "SSIM_std": std_ssim,

          }
    rows.append(row)

  df = pd.DataFrame(rows)
  # df = df.set_index("Model")
  print("\nSummary Table:")
  print(df)
  return df

In [11]:
metrics = ["SSIM", "LPIPS", "FID", "IS", "CLIPScore"]

def format_df(df, metrics):
  """
  Convert summary DataFrame to Latex table with format mean ± std for all values.
  Args:
    df (DataFrame): DataFrame to convert into a latex table
  Return:
  """

  # Merge mean ± std
  df_combined = pd.DataFrame(index=df.index)
  df_combined['Model'] = df['Model']

  for metric in metrics:
      df_combined[metric] = df[f"{metric}_mean"].map(lambda x: f"{x:.3f}") + " ± " + df[f"{metric}_std"].map(lambda x: f"{x:.3f}")
  # Convert to LaTeX
  latex_table = df_combined.to_latex(
      escape=False,  # allows ± symbol
      caption="Quantitative metrics (mean ± std) for image editing models.",
      label="tab:editing_metrics_pretty",
      column_format="l" + "c" * len(metrics)
  )

  print(latex_table)
  return df_combined, latex_table


## Prompts for each datasets:
- **CMC Dataset**
            prompts = {'controlNET': "X-ray image of a composite material with deformed circles",
                      'stableunclip': "X-ray image of a composite material with deformed circles",
                      'ledits': "small deformed circles",
                      'diffedit': "large circles",
                      'instructpix2pix': "make circles larger",
                      'dall-e-2_edit': "X-ray image of a composite material with deformed circles",
                      'dall-e-2_variation': 'N/A',
                      "dall-e-2_generation": "X-ray image of a composite material with deformed circles",
                      "dall-e-3_generation": "X-ray image of a composite material with deformed circles"}
- **EcoFAB Dataset**
          prompts = {'controlNET': "2D image of growing entangled plant roots",
                    'ledits': "disentangled plant roots",
                    'stableunclip': "2D image of growing entangled plant roots",
                    'diffedit': 'disentangled roots with leaves',
                    'instructpix2pix': "2D image of growing entangled plant roots",
                    'dall-e-2_edit': "2D image of entangled plant roots",
                    'dall-e-2_variation': 'N/A',
                    'dall-e-2_generation': 'microscopy image of entangled plant root in hydroponic system',
                    'dall-e-3_generation': 'microscopy image of entangled plant root in hydroponic system'

          }

- **Rocksample Dataset**
            prompts = {'controlNET': "microCT scan from a rock sample containing large sediment grains seperated by pores",
                      'ledits': "rock fragments and pores",
                      'stableunclip': "microCT scan from a rock sample containing large sediment grains seperated by pores",
                      'diffedit': "large rock fragments and small pores",
                      'instructpix2pix': "large rock fragments and pores",
                      'dall-e-2_edit': "microCT scan from a rock sample containing large sediment grains seperated by pores",
                      'dall-e-2_variation': 'N/A',
                      'dall-e-2_generation': 'microCT scan from a rock sample containing large sediment grains seperated by pores',
                      'dall-e-3_generation': 'microCT scan from a rock sample containing large sediment grains seperated by pores'}

Choose from the following models for image inference: *'controlNET', 'ledits', 'stableunclip', 'diffedit', 'instructpix2pix' 'dall-e-2_edit', 'dall-e-2_variation', 'dall-e-2_variation', 'dall-e-2_generation', 'dall-e-3_generation'*

In [16]:
models = ['controlNET', 'ledits', 'stableunclip', 'diffedit', 'instructpix2pix', 'dall-e-2_edit', 'dall-e-2_variation', 'dall-e-2_generation', 'dall-e-3_generation']


# CMC Metrics Calculation

In [54]:
prompts = {'controlNET': "X-ray image of a composite material with deformed circles",
           'stableunclip': "X-ray image of a composite material with deformed circles",
           'ledits': "small deformed circles",
           'diffedit': "large circles",
           'instructpix2pix': "make circles larger",
           'dall-e-2_edit': "X-ray image of a composite material with deformed circles",
           'dall-e-2_variation': 'N/A',
           "dall-e-2_generation": "X-ray image of a composite material with deformed circles",
           "dall-e-3_generation": "X-ray image of a composite material with deformed circles"}

data = 'CMC'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}".format(data))

df_cmc = get_df_metrics(path_real, path_fake, prompts, models, data)

  0%|          | 0/9 [00:00<?, ?it/s]

Model: controlNET
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0199
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.0118
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.0113

🔎 Overall average SSIM: 0.0143 ± 0.0039
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.9633
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.9071
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.8738

🔎 Overall average LPIPS: 0.9148 ± 0.0369



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  4.10it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  4.06it/s]


FID = 522.32 ± 0.00


 11%|█         | 1/9 [00:25<03:22, 25.27s/it]

controlNET_CMC_Wet_raw42_0023_0.png: 0.2431
controlNET_CMC_Wet_raw42_0043_0.png: 0.2583
controlNET_CMC_Wet_raw43_0036_0.png: 0.2542

Average CLIPScore: 0.2518 ± 0.0064
Model: ledits
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.4920
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.5207
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.4825

🔎 Overall average SSIM: 0.4984 ± 0.0163
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.1354
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.1316
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.1501

🔎 Overall average LPIPS: 0.1390 ± 0.0080



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.99it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  4.11it/s]


FID = 106.60 ± 0.00


 22%|██▏       | 2/9 [00:58<03:29, 29.92s/it]

ledits_CMC_Wet_raw42_0023_0.png: 0.2618
ledits_CMC_Wet_raw42_0043_0.png: 0.2720
ledits_CMC_Wet_raw43_0036_0.png: 0.2381

Average CLIPScore: 0.2573 ± 0.0142
Model: stableunclip
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0479
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.0575
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.0318

🔎 Overall average SSIM: 0.0458 ± 0.0106
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.6151
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.5246
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.4970

🔎 Overall average LPIPS: 0.5456 ± 0.0504



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.82it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.96it/s]


FID = 440.86 ± 0.00


 33%|███▎      | 3/9 [01:29<03:01, 30.26s/it]

stableunclip_CMC_Wet_raw42_0023_0.png: 0.2604
stableunclip_CMC_Wet_raw42_0043_0.png: 0.2289
stableunclip_CMC_Wet_raw43_0036_0.png: 0.2812

Average CLIPScore: 0.2568 ± 0.0215
Model: diffedit
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.5469
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.5318
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.5045

🔎 Overall average SSIM: 0.5277 ± 0.0176
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.1073
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.1189
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.1753

🔎 Overall average LPIPS: 0.1338 ± 0.0297



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.70it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.87it/s]


FID = 183.08 ± 0.00


 44%|████▍     | 4/9 [02:06<02:45, 33.17s/it]

diffedit_CMC_Wet_raw42_0023_0.png: 0.2009
diffedit_CMC_Wet_raw42_0043_0.png: 0.2054
diffedit_CMC_Wet_raw43_0036_0.png: 0.1609

Average CLIPScore: 0.1891 ± 0.0200
Model: instructpix2pix
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0879
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.1455
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.3741

🔎 Overall average SSIM: 0.2025 ± 0.1236
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.8567
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.7957
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.2885

🔎 Overall average LPIPS: 0.6470 ± 0.2547



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]


FID = 490.37 ± 0.00


 56%|█████▌    | 5/9 [02:28<01:56, 29.18s/it]

instructpix2pix_CMC_Wet_raw42_0023_0.png: 0.1824
instructpix2pix_CMC_Wet_raw42_0043_0.png: 0.2189
instructpix2pix_CMC_Wet_raw43_0036_0.png: 0.2109

Average CLIPScore: 0.2041 ± 0.0157
Model: dall-e-2_edit

🔎 Overall average SSIM: nan ± nan

🔎 Overall average LPIPS: nan ± nan



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.57it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.56it/s]


FID = 157.78 ± 0.00


 67%|██████▋   | 6/9 [02:50<01:19, 26.58s/it]

dalle2_edit_Wet_raw43_0036_1.png: 0.3272
dalle2_edit_Wet_raw42_0043_0.png: 0.3427
dalle2_edit_Wet_raw42_0043_2.png: 0.3181

Average CLIPScore: 0.3293 ± 0.0102
Model: dall-e-2_variation
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0635
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.0312
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.0752

🔎 Overall average SSIM: 0.0567 ± 0.0186
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.6141
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.6771
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.6358

🔎 Overall average LPIPS: 0.6423 ± 0.0261



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.55it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.85it/s]
 78%|███████▊  | 7/9 [03:13<00:51, 25.52s/it]

FID = 437.08 ± 0.00
Model: dall-e-2_generation
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0624
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.0472
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.0618

🔎 Overall average SSIM: 0.0571 ± 0.0070
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.9658
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.8033
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.7868

🔎 Overall average LPIPS: 0.8520 ± 0.0808



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  1.55it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.94it/s]


FID = 515.42 ± 0.00


 89%|████████▉ | 8/9 [03:39<00:25, 25.57s/it]

dall-e-2_generation_Wet_raw43_0036_1.png: 0.3352
dall-e-2_generation_Wet_raw42_0023_2.png: 0.3551
dall-e-2_generation_Wet_raw42_0043_1.png: 0.3421

Average CLIPScore: 0.3441 ± 0.0083
Model: dall-e-3_generation
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw42_0023.png — Avg SSIM over 1 fakes: 0.0256
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw42_0043.png — Avg SSIM over 1 fakes: 0.0202
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Wet_raw43_0036.png — Avg SSIM over 1 fakes: 0.0236

🔎 Overall average SSIM: 0.0231 ± 0.0022
Wet_raw42_0023.png — Avg LPIPS over 1 fakes: 0.8356
Wet_raw42_0043.png — Avg LPIPS over 1 fakes: 0.7225
Wet_raw43_0036.png — Avg LPIPS over 1 fakes: 0.6863

🔎 Overall average LPIPS: 0.7482 ± 0.0636



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.58it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.22it/s]


FID = 451.86 ± 0.00


100%|██████████| 9/9 [04:01<00:00, 26.82s/it]

dall-e-3_generation_Wet_raw43_0036_0.png: 0.3211
dall-e-3_generation_Wet_raw42_0023_0.png: 0.3074
dall-e-3_generation_Wet_raw42_0043_0.png: 0.3495

Average CLIPScore: 0.3260 ± 0.0175

Summary Table:
                 Model  CLIPScore_mean  CLIPScore_std    FID_mean  FID_std  \
0           controlNET        0.251846       0.006402  522.320144      0.0   
1               ledits        0.257311       0.014179  106.599896      0.0   
2         stableunclip        0.256850       0.021489  440.861024      0.0   
3             diffedit        0.189069       0.019975  183.077547      0.0   
4      instructpix2pix        0.204063       0.015675  490.372314      0.0   
5        dall-e-2_edit        0.329300       0.010151  157.783900      0.0   
6   dall-e-2_variation        0.000000       0.000000  437.076672      0.0   
7  dall-e-2_generation        0.344104       0.008263  515.424631      0.0   
8  dall-e-3_generation        0.325994       0.017529  451.858289      0.0   

   LPIPS_mean  LPIPS




In [44]:
df_cmc.head()

Unnamed: 0,Model,CLIPScore_mean,CLIPScore_std,FID_mean,FID_std,IS_mean,IS_std,LPIPS_mean,LPIPS_std,SSIM_mean,SSIM_std
0,controlNET,0.251846,0.006402,522.320144,0.0,,,0.914751,0.036931,0.014345,0.00392
1,ledits,0.257311,0.014179,106.599896,0.0,,,0.139026,0.007964,0.498384,0.016258
2,stableunclip,0.25685,0.021489,440.861024,0.0,,,0.545581,0.050439,0.045763,0.010584
3,diffedit,0.189069,0.019975,183.077547,0.0,,,0.133835,0.029733,0.527732,0.01756
4,instructpix2pix,0.204063,0.015675,490.372314,0.0,,,0.646954,0.254711,0.202485,0.123595


In [55]:
metrics = ["SSIM", "LPIPS", "FID", "CLIPScore"]

os.makedirs(os.path.join(basepath, "csv_metrics_results"), exist_ok=True)

csv_savepath = os.path.join(basepath, "csv_metrics_results/{}_results_diffusion.csv".format(data))
df_cmc.to_csv(csv_savepath, index=False) # Comment if already saved

df_cmc = pd.read_csv(csv_savepath)
df_cmc_formatted, latex_table_cmc = format_df(df_cmc, metrics)
df_cmc_formatted

\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lcccc}
\toprule
 & Model & SSIM & LPIPS & FID & CLIPScore \\
\midrule
0 & controlNET & 0.014 ± 0.004 & 0.915 ± 0.037 & 522.320 ± 0.000 & 0.252 ± 0.006 \\
1 & ledits & 0.498 ± 0.016 & 0.139 ± 0.008 & 106.600 ± 0.000 & 0.257 ± 0.014 \\
2 & stableunclip & 0.046 ± 0.011 & 0.546 ± 0.050 & 440.861 ± 0.000 & 0.257 ± 0.021 \\
3 & diffedit & 0.528 ± 0.018 & 0.134 ± 0.030 & 183.078 ± 0.000 & 0.189 ± 0.020 \\
4 & instructpix2pix & 0.202 ± 0.124 & 0.647 ± 0.255 & 490.372 ± 0.000 & 0.204 ± 0.016 \\
5 & dall-e-2_edit & nan ± nan & nan ± nan & 157.784 ± 0.000 & 0.329 ± 0.010 \\
6 & dall-e-2_variation & 0.057 ± 0.019 & 0.642 ± 0.026 & 437.077 ± 0.000 & 0.000 ± 0.000 \\
7 & dall-e-2_generation & 0.057 ± 0.007 & 0.852 ± 0.081 & 515.425 ± 0.000 & 0.344 ± 0.008 \\
8 & dall-e-3_generation & 0.023 ± 0.002 & 0.748 ± 0.064 & 451.858 ± 0.000 & 0.326 ± 0.018 \\
\bottomrule
\end{

Unnamed: 0,Model,SSIM,LPIPS,FID,CLIPScore
0,controlNET,0.014 ± 0.004,0.915 ± 0.037,522.320 ± 0.000,0.252 ± 0.006
1,ledits,0.498 ± 0.016,0.139 ± 0.008,106.600 ± 0.000,0.257 ± 0.014
2,stableunclip,0.046 ± 0.011,0.546 ± 0.050,440.861 ± 0.000,0.257 ± 0.021
3,diffedit,0.528 ± 0.018,0.134 ± 0.030,183.078 ± 0.000,0.189 ± 0.020
4,instructpix2pix,0.202 ± 0.124,0.647 ± 0.255,490.372 ± 0.000,0.204 ± 0.016
5,dall-e-2_edit,nan ± nan,nan ± nan,157.784 ± 0.000,0.329 ± 0.010
6,dall-e-2_variation,0.057 ± 0.019,0.642 ± 0.026,437.077 ± 0.000,0.000 ± 0.000
7,dall-e-2_generation,0.057 ± 0.007,0.852 ± 0.081,515.425 ± 0.000,0.344 ± 0.008
8,dall-e-3_generation,0.023 ± 0.002,0.748 ± 0.064,451.858 ± 0.000,0.326 ± 0.018


In [56]:
# ROCKS
prompts = {'controlNET': "microCT scan from a rock sample containing large sediment grains seperated by pores",
           'ledits': "rock fragments and pores",
           'stableunclip': "microCT scan from a rock sample containing large sediment grains seperated by pores",
           'diffedit': "large rock fragments and small pores",
           'instructpix2pix': "large rock fragments and pores",
           'dall-e-2_edit': "microCT scan from a rock sample containing large sediment grains seperated by pores",
           'dall-e-2_variation': 'N/A',
           'dall-e-2_generation': 'microCT scan from a rock sample containing large sediment grains seperated by pores',
           'dall-e-3_generation': 'microCT scan from a rock sample containing large sediment grains seperated by pores'}


data = 'ROCKS'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}".format(data))

df_rocks = get_df_metrics(path_real, path_fake, prompts, models, data)

  0%|          | 0/9 [00:00<?, ?it/s]

Model: controlNET
Empty-Fe-Sand-10x_101.png — Avg SSIM over 1 fakes: 0.2129
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.1109
Empty-Fe-Sand-10x_7.png — Avg SSIM over 1 fakes: 0.0751

🔎 Overall average SSIM: 0.1330 ± 0.0583
Empty-Fe-Sand-10x_101.png — Avg LPIPS over 1 fakes: 0.8534
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.9055
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 1 fakes: 0.8903

🔎 Overall average LPIPS: 0.8831 ± 0.0218



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.57it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.65it/s]


FID = 506.34 ± 0.00


 11%|█         | 1/9 [00:24<03:18, 24.83s/it]

controlNET_ROCKS_Empty-Fe-Sand-10x_101_0.png: 0.2540
controlNET_ROCKS_Empty-Fe-Sand-10x_7_0.png: 0.2475
controlNET_ROCKS_Empty-Fe-Sand-10x_48_0.png: 0.2256

Average CLIPScore: 0.2424 ± 0.0121
Model: ledits
Empty-Fe-Sand-10x_101.png — Avg SSIM over 1 fakes: 0.4997
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.5134
Empty-Fe-Sand-10x_7.png — Avg SSIM over 1 fakes: 0.4951

🔎 Overall average SSIM: 0.5028 ± 0.0078
Empty-Fe-Sand-10x_101.png — Avg LPIPS over 1 fakes: 0.1579
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.1559
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 1 fakes: 0.1585

🔎 Overall average LPIPS: 0.1574 ± 0.0011



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.39it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.66it/s]


FID = 150.63 ± 0.00


 22%|██▏       | 2/9 [00:49<02:54, 24.98s/it]

ledits_ROCKS_Empty-Fe-Sand-10x_101_0.png: 0.2947
ledits_ROCKS_Empty-Fe-Sand-10x_7_0.png: 0.2865
ledits_ROCKS_Empty-Fe-Sand-10x_48_0.png: 0.2814

Average CLIPScore: 0.2875 ± 0.0055
Model: stableunclip

🔎 Overall average SSIM: nan ± nan

🔎 Overall average LPIPS: nan ± nan



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.57it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:02<00:00,  2.22s/it]


FID = 160.81 ± 0.00


 33%|███▎      | 3/9 [01:17<02:36, 26.16s/it]

ledits_ROCKS_Empty-Fe-Sand-10x_101_0.png: 0.3202
ledits_ROCKS_Empty-Fe-Sand-10x_7_0.png: 0.3317
ledits_ROCKS_Empty-Fe-Sand-10x_48_0.png: 0.3370

Average CLIPScore: 0.3296 ± 0.0070
Model: diffedit
Empty-Fe-Sand-10x_101.png — Avg SSIM over 1 fakes: 0.5938
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.5877
Empty-Fe-Sand-10x_7.png — Avg SSIM over 1 fakes: 0.5899

🔎 Overall average SSIM: 0.5905 ± 0.0025
Empty-Fe-Sand-10x_101.png — Avg LPIPS over 1 fakes: 0.1022
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.1128
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 1 fakes: 0.1172

🔎 Overall average LPIPS: 0.1108 ± 0.0063



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.58it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


FID = 123.01 ± 0.00


 44%|████▍     | 4/9 [01:45<02:15, 27.02s/it]

diffedit_rocksample_Empty-Fe-Sand-10x_415_3.png: 0.2739
diffedit_rocksample_Empty-Fe-Sand-10x_179_0.png: 0.2706
diffedit_rocksample_Empty-Fe-Sand-10x_0_3.png: 0.2776
diffedit_ROCKS_Empty-Fe-Sand-10x_101_0.png: 0.2734
diffedit_ROCKS_Empty-Fe-Sand-10x_7_0.png: 0.2822
diffedit_ROCKS_Empty-Fe-Sand-10x_48_0.png: 0.2747

Average CLIPScore: 0.2754 ± 0.0037
Model: instructpix2pix
Empty-Fe-Sand-10x_101.png — Avg SSIM over 1 fakes: 0.2960
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.2561
Empty-Fe-Sand-10x_7.png — Avg SSIM over 1 fakes: 0.3173

🔎 Overall average SSIM: 0.2898 ± 0.0254
Empty-Fe-Sand-10x_101.png — Avg LPIPS over 1 fakes: 0.7007
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.7278
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 1 fakes: 0.6579

🔎 Overall average LPIPS: 0.6955 ± 0.0288



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.66it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:02<00:00,  2.07s/it]


FID = 344.39 ± 0.00


 56%|█████▌    | 5/9 [02:15<01:52, 28.03s/it]

instructpix2pix_rocksample_Empty-Fe-Sand-10x_415_3.png: 0.3168
instructpix2pix_rocksample_Empty-Fe-Sand-10x_179_0.png: 0.2462
instructpix2pix_rocksample_Empty-Fe-Sand-10x_0_2.png: 0.2974
instructpix2pix_ROCKS_Empty-Fe-Sand-10x_101_0.png: 0.2695
instructpix2pix_ROCKS_Empty-Fe-Sand-10x_7_0.png: 0.2805
instructpix2pix_ROCKS_Empty-Fe-Sand-10x_48_0.png: 0.2439

Average CLIPScore: 0.2757 ± 0.0261
Model: dall-e-2_edit
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.6420
Empty-Fe-Sand-10x_7.png — Avg SSIM over 2 fakes: 0.6356

🔎 Overall average SSIM: 0.6388 ± 0.0032
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.2853
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 2 fakes: 0.2363

🔎 Overall average LPIPS: 0.2608 ± 0.0245



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.60it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.51it/s]


FID = 190.89 ± 0.00


 67%|██████▋   | 6/9 [02:39<01:20, 26.77s/it]

dall-e-2_edit_Empty-Fe-Sand-10x_48_1.png: 0.2970
dall-e-2_edit_Empty-Fe-Sand-10x_7_0.png: 0.3109
dall-e-2_edit_Empty-Fe-Sand-10x_7_1.png: 0.3076

Average CLIPScore: 0.3052 ± 0.0059
Model: dall-e-2_variation
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.2565
Empty-Fe-Sand-10x_7.png — Avg SSIM over 2 fakes: 0.2668

🔎 Overall average SSIM: 0.2617 ± 0.0052
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.5828
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 2 fakes: 0.6367

🔎 Overall average LPIPS: 0.6098 ± 0.0270



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.58it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.49it/s]
 78%|███████▊  | 7/9 [03:01<00:49, 24.99s/it]

FID = 496.42 ± 0.00
Model: dall-e-2_generation
Empty-Fe-Sand-10x_7.png — Avg SSIM over 3 fakes: 0.1768

🔎 Overall average SSIM: 0.1768 ± 0.0000
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 3 fakes: 0.6010

🔎 Overall average LPIPS: 0.6010 ± 0.0000



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.56it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.61it/s]


FID = 459.89 ± 0.00


 89%|████████▉ | 8/9 [03:25<00:24, 24.72s/it]

dall-e-2_generation_Empty-Fe-Sand-10x_7_0.png: 0.3255
dall-e-2_generation_Empty-Fe-Sand-10x_7_1.png: 0.3281
dall-e-2_generation_Empty-Fe-Sand-10x_7_2.png: 0.3184

Average CLIPScore: 0.3240 ± 0.0041
Model: dall-e-3_generation
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Empty-Fe-Sand-10x_101.png — Avg SSIM over 1 fakes: 0.0407
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Empty-Fe-Sand-10x_48.png — Avg SSIM over 1 fakes: 0.0466
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Empty-Fe-Sand-10x_7.png — Avg SSIM over 1 fakes: 0.0366

🔎 Overall average SSIM: 0.0413 ± 0.0041
Empty-Fe-Sand-10x_101.png — Avg LPIPS over 1 fakes: 0.6811
Empty-Fe-Sand-10x_48.png — Avg LPIPS over 1 fakes: 0.7146
Empty-Fe-Sand-10x_7.png — Avg LPIPS over 1 fakes: 0.6687

🔎 Overall average LPIPS: 0.6881 ± 0.0194



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.28it/s]


FID = 371.99 ± 0.00


100%|██████████| 9/9 [03:52<00:00, 25.87s/it]

dall-e-3_generation_Empty-Fe-Sand-10x_101_0.png: 0.3135
dall-e-3_generation_Empty-Fe-Sand-10x_48_0.png: 0.2825
dall-e-3_generation_Empty-Fe-Sand-10x_7_0.png: 0.3273

Average CLIPScore: 0.3078 ± 0.0187

Summary Table:
                 Model  CLIPScore_mean  CLIPScore_std    FID_mean  FID_std  \
0           controlNET        0.242369       0.012121  506.342161      0.0   
1               ledits        0.287538       0.005456  150.633435      0.0   
2         stableunclip        0.329626       0.007004  160.810137      0.0   
3             diffedit        0.275425       0.003674  123.006972      0.0   
4      instructpix2pix        0.275704       0.026148  344.388686      0.0   
5        dall-e-2_edit        0.305167       0.005918  190.893401      0.0   
6   dall-e-2_variation        0.000000       0.000000  496.418221      0.0   
7  dall-e-2_generation        0.324015       0.004103  459.892780      0.0   
8  dall-e-3_generation        0.307794       0.018721  371.988036      0.0   

  




In [57]:
metrics = ["SSIM", "LPIPS", "FID", "CLIPScore"]

csv_savepath = os.path.join(basepath, "csv_metrics_results/{}_results.csv".format(data))
df_rocks.to_csv(csv_savepath, index=False)

df_rocks = pd.read_csv(os.path.join(basepath, "csv_metrics_results/{}_results.csv".format(data)))
df_rocks_formatted, latex_table_rocks = format_df(df_rocks, metrics)
df_rocks_formatted


\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lcccc}
\toprule
 & Model & SSIM & LPIPS & FID & CLIPScore \\
\midrule
0 & controlNET & 0.133 ± 0.058 & 0.883 ± 0.022 & 506.342 ± 0.000 & 0.242 ± 0.012 \\
1 & ledits & 0.503 ± 0.008 & 0.157 ± 0.001 & 150.633 ± 0.000 & 0.288 ± 0.005 \\
2 & stableunclip & nan ± nan & nan ± nan & 160.810 ± 0.000 & 0.330 ± 0.007 \\
3 & diffedit & 0.590 ± 0.003 & 0.111 ± 0.006 & 123.007 ± 0.000 & 0.275 ± 0.004 \\
4 & instructpix2pix & 0.290 ± 0.025 & 0.695 ± 0.029 & 344.389 ± 0.000 & 0.276 ± 0.026 \\
5 & dall-e-2_edit & 0.639 ± 0.003 & 0.261 ± 0.024 & 190.893 ± 0.000 & 0.305 ± 0.006 \\
6 & dall-e-2_variation & 0.262 ± 0.005 & 0.610 ± 0.027 & 496.418 ± 0.000 & 0.000 ± 0.000 \\
7 & dall-e-2_generation & 0.177 ± 0.000 & 0.601 ± 0.000 & 459.893 ± 0.000 & 0.324 ± 0.004 \\
8 & dall-e-3_generation & 0.041 ± 0.004 & 0.688 ± 0.019 & 371.988 ± 0.000 & 0.308 ± 0.019 \\
\bottomrule
\end{

Unnamed: 0,Model,SSIM,LPIPS,FID,CLIPScore
0,controlNET,0.133 ± 0.058,0.883 ± 0.022,506.342 ± 0.000,0.242 ± 0.012
1,ledits,0.503 ± 0.008,0.157 ± 0.001,150.633 ± 0.000,0.288 ± 0.005
2,stableunclip,nan ± nan,nan ± nan,160.810 ± 0.000,0.330 ± 0.007
3,diffedit,0.590 ± 0.003,0.111 ± 0.006,123.007 ± 0.000,0.275 ± 0.004
4,instructpix2pix,0.290 ± 0.025,0.695 ± 0.029,344.389 ± 0.000,0.276 ± 0.026
5,dall-e-2_edit,0.639 ± 0.003,0.261 ± 0.024,190.893 ± 0.000,0.305 ± 0.006
6,dall-e-2_variation,0.262 ± 0.005,0.610 ± 0.027,496.418 ± 0.000,0.000 ± 0.000
7,dall-e-2_generation,0.177 ± 0.000,0.601 ± 0.000,459.893 ± 0.000,0.324 ± 0.004
8,dall-e-3_generation,0.041 ± 0.004,0.688 ± 0.019,371.988 ± 0.000,0.308 ± 0.019


In [None]:
data = 'ROOTS'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}".format(data))

prompts = {'controlNET': "2D image of growing entangled plant roots",
           'ledits': "disentangled plant roots",
           'stableunclip': "2D image of growing entangled plant roots",
           'diffedit': 'disentangled roots with leaves',
           'instructpix2pix': "2D image of growing entangled plant roots",
           'dall-e-2_edit': "2D image of entangled plant roots",
           'dall-e-2_variation': 'N/A',
           'dall-e-2_generation': 'microscopy image of entangled plant root in hydroponic system',
           'dall-e-3_generation': 'microscopy image of entangled plant root in hydroponic system'

}

df_roots = get_df_metrics(path_real, path_fake, prompts, models, data)


  0%|          | 0/9 [00:00<?, ?it/s]

Model: controlNET
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg SSIM over 1 fakes: 0.0696
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg SSIM over 1 fakes: 0.1419
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg SSIM over 1 fakes: 0.1796

🔎 Overall average SSIM: 0.1303 ± 0.0457
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg LPIPS over 1 fakes: 0.8261
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg LPIPS over 1 fakes: 0.8770
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg LPIPS over 1 fakes: 0.9832

🔎 Overall average LPIPS: 0.8954 ± 0.0654



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.42it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.35it/s]


FID = 510.53 ± 0.00


 11%|█         | 1/9 [00:27<03:41, 27.73s/it]

controlNET_ROOTS_YY22EX0002EF012_S1-H1_20221105_17_07_EPSON_0.png: 0.2743
controlNET_ROOTS_YY22EX0001EF027_S1-L1_20220820_17_17_EPSON_0.png: 0.3139
controlNET_ROOTS_YY22EX0002EF036_S1-L2_20221108_21_20_EPSON_0.png: 0.2086

Average CLIPScore: 0.2656 ± 0.0434
Model: ledits
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg SSIM over 1 fakes: 0.7832
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg SSIM over 1 fakes: 0.7676
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg SSIM over 1 fakes: 0.7292

🔎 Overall average SSIM: 0.7600 ± 0.0227
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg LPIPS over 1 fakes: 0.2432
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg LPIPS over 1 fakes: 0.3137
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg LPIPS over 1 fakes: 0.2920

🔎 Overall average LPIPS: 0.2830 ± 0.0295



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.46it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.45it/s]


FID = 324.96 ± 0.00


 22%|██▏       | 2/9 [00:55<03:12, 27.49s/it]

ledits_ROOTS_YY22EX0002EF012_S1-H1_20221105_17_07_EPSON_0.png: 0.2893
ledits_ROOTS_YY22EX0001EF027_S1-L1_20220820_17_17_EPSON_0.png: 0.2785
ledits_ROOTS_YY22EX0002EF036_S1-L2_20221108_21_20_EPSON_0.png: 0.2646

Average CLIPScore: 0.2775 ± 0.0101
Model: stableunclip

🔎 Overall average SSIM: nan ± nan

🔎 Overall average LPIPS: nan ± nan



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.36it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


FID = 339.74 ± 0.00


 33%|███▎      | 3/9 [01:22<02:44, 27.38s/it]

ledits_ROOTS_YY22EX0002EF012_S1-H1_20221105_17_07_EPSON_0.png: 0.2940
ledits_ROOTS_YY22EX0001EF027_S1-L1_20220820_17_17_EPSON_0.png: 0.3412
ledits_ROOTS_YY22EX0002EF036_S1-L2_20221108_21_20_EPSON_0.png: 0.3329

Average CLIPScore: 0.3227 ± 0.0206
Model: diffedit
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg SSIM over 1 fakes: 0.9052
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg SSIM over 1 fakes: 0.8791
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg SSIM over 1 fakes: 0.8632

🔎 Overall average SSIM: 0.8825 ± 0.0173
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg LPIPS over 1 fakes: 0.0849
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg LPIPS over 1 fakes: 0.1398
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg LPIPS over 1 fakes: 0.1034

🔎 Overall average LPIPS: 0.1094 ± 0.0228



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.35it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.23it/s]


FID = 190.15 ± 0.00


 44%|████▍     | 4/9 [01:46<02:10, 26.05s/it]

diffedit_ROOTS_YY22EX0002EF012_S1-H1_20221105_17_07_EPSON_0.png: 0.2206
diffedit_ROOTS_YY22EX0001EF027_S1-L1_20220820_17_17_EPSON_0.png: 0.2608
diffedit_ROOTS_YY22EX0002EF036_S1-L2_20221108_21_20_EPSON_0.png: 0.2290

Average CLIPScore: 0.2368 ± 0.0173
Model: instructpix2pix
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg SSIM over 1 fakes: 0.3089
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg SSIM over 1 fakes: 0.3341
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg SSIM over 1 fakes: 0.4023

🔎 Overall average SSIM: 0.3484 ± 0.0395
YY22EX0001EF027_S1-L1_20220820_17_17_EPSON.png — Avg LPIPS over 1 fakes: 0.7353
YY22EX0002EF012_S1-H1_20221105_17_07_EPSON.png — Avg LPIPS over 1 fakes: 0.8027
YY22EX0002EF036_S1-L2_20221108_21_20_EPSON.png — Avg LPIPS over 1 fakes: 0.7755

🔎 Overall average LPIPS: 0.7712 ± 0.0277



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  3.03it/s]


FID = 524.69 ± 0.00


 56%|█████▌    | 5/9 [02:10<01:41, 25.32s/it]

instructpix2pix_ROOTS_YY22EX0002EF012_S1-H1_20221105_17_07_EPSON_0.png: 0.2579
instructpix2pix_ROOTS_YY22EX0001EF027_S1-L1_20220820_17_17_EPSON_0.png: 0.2738
instructpix2pix_ROOTS_YY22EX0002EF036_S1-L2_20221108_21_20_EPSON_0.png: 0.2428

Average CLIPScore: 0.2581 ± 0.0126
Model: dall-e-2_edit

🔎 Overall average SSIM: nan ± nan

🔎 Overall average LPIPS: nan ± nan



  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:00<00:00,  2.41it/s]





  0%|          | 0/1 [00:00<?, ?it/s][A

In [None]:
metrics = ["SSIM", "LPIPS", "FID", "CLIPScore"]

csv_savepath = os.path.join(basepath, "csv_metrics_results/{}_results.csv".format(data))
df_roots.to_csv(csv_savepath, index=False) # Comment if not needed

df_roots = pd.read_csv(csv_savepath)
df_roots_formatted, latex_table_roots = format_df(df_roots, metrics)
df_roots_formatted


\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lccccc}
\toprule
 & SSIM & LPIPS & FID & IS & CLIPScore \\
Model &  &  &  &  &  \\
\midrule
controlNET & 0.085 ± 0.037 & 0.820 ± 0.041 & 380.363 ± 0.000 & 5.790 ± 0.318 & 0.276 ± 0.056 \\
ledits & 0.673 ± 0.056 & 0.343 ± 0.041 & 206.072 ± 0.000 & 4.117 ± 0.159 & 0.316 ± 0.028 \\
stableunclip & 0.391 ± 0.034 & 0.654 ± 0.035 & 218.497 ± 0.000 & 4.943 ± 0.321 & 0.251 ± 0.040 \\
diffedit & 0.843 ± 0.046 & 0.147 ± 0.030 & 64.686 ± 0.000 & 2.352 ± 0.125 & 0.183 ± 0.036 \\
instructpix2pix & 0.520 ± 0.062 & 0.428 ± 0.075 & 143.595 ± 0.000 & 4.335 ± 0.213 & 0.261 ± 0.043 \\
dall-e-2_edit & 0.751 ± 0.028 & 0.293 ± 0.019 & 189.235 ± 0.000 & 2.556 ± 0.198 & 0.306 ± 0.031 \\
dall-e-2_variation & 0.566 ± 0.069 & 0.578 ± 0.035 & 178.433 ± 0.000 & 3.311 ± 0.408 & 0.000 ± 0.000 \\
dall-e-2_generation & 0.282 ± 0.048 & 0.695 ± 0.036 & 405.784 ± 0.000 & 4.468 ± 0.385 & 0.

Unnamed: 0_level_0,SSIM,LPIPS,FID,IS,CLIPScore
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
controlNET,0.085 ± 0.037,0.820 ± 0.041,380.363 ± 0.000,5.790 ± 0.318,0.276 ± 0.056
ledits,0.673 ± 0.056,0.343 ± 0.041,206.072 ± 0.000,4.117 ± 0.159,0.316 ± 0.028
stableunclip,0.391 ± 0.034,0.654 ± 0.035,218.497 ± 0.000,4.943 ± 0.321,0.251 ± 0.040
diffedit,0.843 ± 0.046,0.147 ± 0.030,64.686 ± 0.000,2.352 ± 0.125,0.183 ± 0.036
instructpix2pix,0.520 ± 0.062,0.428 ± 0.075,143.595 ± 0.000,4.335 ± 0.213,0.261 ± 0.043
dall-e-2_edit,0.751 ± 0.028,0.293 ± 0.019,189.235 ± 0.000,2.556 ± 0.198,0.306 ± 0.031
dall-e-2_variation,0.566 ± 0.069,0.578 ± 0.035,178.433 ± 0.000,3.311 ± 0.408,0.000 ± 0.000
dall-e-2_generation,0.282 ± 0.048,0.695 ± 0.036,405.784 ± 0.000,4.468 ± 0.385,0.345 ± 0.021
dall-e-3_generation,0.131 ± 0.027,0.664 ± 0.034,302.775 ± 0.000,4.387 ± 0.227,0.301 ± 0.031


Data
====

In this notebook we will the results obtained from inference of multiple diffusion APIs with tutorials that can be
found on HuggingFace [Hugging Face Diffusers Pipeline
](https://huggingface.co/docs/diffusers/index).

The dataset will download as a file named `scientific_dataset.zip`. The resulting directory
structure should be:

``` {.sh}
/path/to/scientific_dataset
    -> intput_data  
        -> 188242.png
        -> 173822.png
        -> 284702.png
           ...
    -> generated_data  
        -> 188242.png
        -> 173822.png
        -> 284702.png
           ...
```


# Calculate metrics for DCGAN and StyleGAN

In [13]:
from torchvision.transforms import Resize, Compose
from torchvision import transforms
from torchvision.transforms import ToPILImage


# Main function
def evaluate_fake_images_vs_reals_gan(path_real, path_fake, model_name="model", resize_to=None):
    """
    Get Table summary of all calculated metrics for GAN-based models with mean and standard variation into a dataframe format.
    Args:
      path_real (str): path to the real images
      path_fake (str): path to the fake
      model_name (str): name of the model, either stylegan or dcgan
    Returns:
      Pandas DataFrame: DataFrame with mean and std of each metric
    """

    # Init transforms and metrics
    if resize_to:
      transform = Compose([
          transforms.Resize(resize_to),
          transforms.ToTensor(),
          ])
    else:
      transform = Compose([
          transforms.ToTensor(),
      ])
    real_paths = sorted([os.path.join(path_real, f) for f in os.listdir(path_real) if f.lower().endswith((".png", ".jpg", ".tif"))])
    fake_paths = sorted([os.path.join(path_fake, f) for f in os.listdir(path_fake) if f.lower().endswith((".png", ".jpg", ".tif"))])
    print(f"Number of real images: {len(real_paths)}")
    print(f"Number of fake images: {len(fake_paths)}")

    n = min(len(real_paths), len(fake_paths))
    results = []
    ssim_scores, lpips_scores = [], []
    for idx in tqdm(range(n), desc=f"Computing SSIM & LPIPS for {model_name}"):
      real_img = transform(Image.open(real_paths[idx]).convert("RGB"))
      fake_img = transform(Image.open(fake_paths[idx]).convert("RGB"))

      # LPIPS
      to_pil = ToPILImage()

      real_lpips = (to_pil(real_img * 2 - 1))
      fake_lpips = (to_pil(fake_img * 2 - 1))
      lpips_score = compute_lpips_gpu(real_lpips, fake_lpips)
      lpips_scores.append(lpips_score)

      real_img = real_img.unsqueeze(0).to(device)
      fake_img = fake_img.unsqueeze(0).to(device)

      # SSIM
      ssim_score = compute_ssim_gpu(real_img, fake_img).item()
      ssim_scores.append(ssim_score)


    print("Computed average SSIM of {} and average LPIPS of {}".format(str(np.mean(ssim_scores)), str(np.mean(lpips_scores))))
    print("Now computing FID Score")
    # Compute FID and Inception Score
    mean_fid, _ = compute_fid(path_real, path_fake, n_loops=1, batch_size=32)

    # Build summary DataFrame
    summary_df = pd.DataFrame([{
        "Model": model_name,
        "ssim_mean": np.mean(ssim_scores),
        "ssim_std": np.std(ssim_scores),
        "lpips_mean": np.mean(lpips_scores),
        "lpips_std": np.std(lpips_scores),
        "fid_mean": mean_fid,
        "fid_std": 0,
    }])

    return summary_df


In [78]:
data = 'ROOTS'
model = 'dcgan'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}/{}".format(data, model))

df_roots_dcgan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model, resize_to=(64,64))


Number of real images: 3
Number of fake images: 3


Computing SSIM & LPIPS for dcgan: 100%|██████████| 3/3 [00:00<00:00, 24.49it/s]

Computed average SSIM of 0.33607587218284607 and average LPIPS of 0.6930538415908813
Now computing FID Score







100%|██████████| 1/1 [00:00<00:00,  2.95it/s]




100%|██████████| 1/1 [00:00<00:00,  2.90it/s]


FID = 457.95 ± 0.00


In [79]:
df_roots_dcgan.head()

Unnamed: 0,Model,ssim_mean,ssim_std,lpips_mean,lpips_std,fid_mean,fid_std
0,dcgan,0.336076,0.154529,0.693054,0.075118,457.946586,0


In [81]:
metrics = ["ssim", "lpips", "fid"]
df_roots_dcgan_formatted, latex_table_ecofab_gan = format_df(df_roots_dcgan, metrics)
df_roots_dcgan_formatted

\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lccc}
\toprule
 & Model & ssim & lpips & fid \\
\midrule
0 & dcgan & 0.336 ± 0.155 & 0.693 ± 0.075 & 457.947 ± 0.000 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0,Model,ssim,lpips,fid
0,dcgan,0.336 ± 0.155,0.693 ± 0.075,457.947 ± 0.000


In [82]:
data = 'ROOTS'
model = 'stylegan'

path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}/{}".format(data, model))
df_roots_stylegan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model)
df_roots_stylegan


Number of real images: 3
Number of fake images: 2


Computing SSIM & LPIPS for stylegan: 100%|██████████| 2/2 [00:02<00:00,  1.01s/it]


Computed average SSIM of 0.3831506669521332 and average LPIPS of 0.8230463862419128
Now computing FID Score


100%|██████████| 1/1 [00:00<00:00,  2.97it/s]




100%|██████████| 1/1 [00:00<00:00,  2.62it/s]


FID = 392.39 ± 0.00


Unnamed: 0,Model,ssim_mean,ssim_std,lpips_mean,lpips_std,fid_mean,fid_std
0,stylegan,0.383151,0.115796,0.823046,0.102207,392.388519,0


In [84]:
df_roots_stylegan_formatted, _ = format_df(df_roots_stylegan, metrics)
df_roots_stylegan_formatted


\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lccc}
\toprule
 & Model & ssim & lpips & fid \\
\midrule
0 & stylegan & 0.383 ± 0.116 & 0.823 ± 0.102 & 392.389 ± 0.000 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0,Model,ssim,lpips,fid
0,stylegan,0.383 ± 0.116,0.823 ± 0.102,392.389 ± 0.000


In [85]:
df_roots_gan = pd.concat([df_roots_dcgan_formatted, df_roots_stylegan_formatted])
df_roots_gan.to_csv(os.path.join(basepath, "csv_metrics_results/{}_results_gans.csv".format(data)), index=False)

df_roots_gan

Unnamed: 0,Model,ssim,lpips,fid
0,dcgan,0.336 ± 0.155,0.693 ± 0.075,457.947 ± 0.000
0,stylegan,0.383 ± 0.116,0.823 ± 0.102,392.389 ± 0.000


# Rocksample

In [88]:
data = 'ROCKS'
metrics = ["ssim", "lpips", "fid"]

model = 'dcgan'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}/{}".format(data, model))

df_rocks_dcgan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model, resize_to=(64,64))
df_rocks_dcgan['model'] = model

# ---------------

model = 'stylegan'
df_rocks_stylegan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model)
df_rocks_stylegan['model'] = model

df_rocks_gan = pd.concat([df_rocks_dcgan, df_rocks_stylegan])
df_rocks_gan, _ = format_df(df_rocks_gan, metrics)

df_rocks_gan.to_csv(os.path.join(basepath, "csv_metrics_results/{}_results_gans.csv").format(data), index=False)
df_rocks_gan


Number of real images: 3
Number of fake images: 2


Computing SSIM & LPIPS for dcgan: 100%|██████████| 2/2 [00:00<00:00, 18.98it/s]

Computed average SSIM of 0.05831996724009514 and average LPIPS of 1.0038783550262451
Now computing FID Score







100%|██████████| 1/1 [00:00<00:00,  1.07it/s]




100%|██████████| 1/1 [00:00<00:00,  1.79it/s]


FID = 475.63 ± 0.00
Number of real images: 3
Number of fake images: 2


Computing SSIM & LPIPS for stylegan: 100%|██████████| 2/2 [00:00<00:00, 17.16it/s]

Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Computed average SSIM of 0.2758052945137024 and average LPIPS of 1.1946526765823364
Now computing FID Score







100%|██████████| 1/1 [00:00<00:00,  3.11it/s]




100%|██████████| 1/1 [00:00<00:00,  3.76it/s]


FID = 475.63 ± 0.00
\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lccc}
\toprule
 & Model & ssim & lpips & fid \\
\midrule
0 & dcgan & 0.058 ± 0.007 & 1.004 ± 0.000 & 475.625 ± 0.000 \\
0 & stylegan & 0.276 ± 0.001 & 1.195 ± 0.013 & 475.625 ± 0.000 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0,Model,ssim,lpips,fid
0,dcgan,0.058 ± 0.007,1.004 ± 0.000,475.625 ± 0.000
0,stylegan,0.276 ± 0.001,1.195 ± 0.013,475.625 ± 0.000


In [89]:
metrics = ["ssim", "lpips", "fid"]
df_rocks_gan

Unnamed: 0,Model,ssim,lpips,fid
0,dcgan,0.058 ± 0.007,1.004 ± 0.000,475.625 ± 0.000
0,stylegan,0.276 ± 0.001,1.195 ± 0.013,475.625 ± 0.000


In [15]:
data = 'CMC'
metrics = ["ssim", "lpips", "fid"]

model = 'dcgan'
path_real = os.path.join(basepath, "raw/{}".format(data))
path_fake = os.path.join(basepath, "generated/{}/{}".format(data, model))

df_cmc_dcgan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model, resize_to=(64,64))
df_cmc_dcgan['model'] = model

# ---------------

model = 'stylegan'

df_cmc_stylegan = evaluate_fake_images_vs_reals_gan(path_real, path_fake, model)
df_cmc_stylegan['model'] = model

df_cmc_gan = pd.concat([df_cmc_dcgan, df_cmc_stylegan])
df_cmc_gan, _ = format_df(df_cmc_gan, metrics)

df_cmc_gan.to_csv(os.path.join(basepath, "csv_metrics_results/{}_results_gans.csv").format(data), index=False)
df_cmc_gan


Number of real images: 3
Number of fake images: 2


Computing SSIM & LPIPS for dcgan: 100%|██████████| 2/2 [00:00<00:00, 20.21it/s]

Computed average SSIM of 0.04631611704826355 and average LPIPS of 0.55335932970047
Now computing FID Score







100%|██████████| 1/1 [00:00<00:00,  3.10it/s]




100%|██████████| 1/1 [00:00<00:00,  7.42it/s]


FID = 432.87 ± 0.00
Number of real images: 3
Number of fake images: 2


Computing SSIM & LPIPS for stylegan: 100%|██████████| 2/2 [00:00<00:00, 13.84it/s]

Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Resizing generated image to input shape i.e. to torch.Size([1, 3, 512, 512])
Computed average SSIM of 0.061622584238648415 and average LPIPS of 1.0379655361175537
Now computing FID Score







100%|██████████| 1/1 [00:00<00:00,  4.66it/s]




100%|██████████| 1/1 [00:00<00:00,  7.31it/s]


FID = 432.87 ± 0.00
\begin{table}
\caption{Quantitative metrics (mean ± std) for image editing models.}
\label{tab:editing_metrics_pretty}
\begin{tabular}{lccc}
\toprule
 & Model & ssim & lpips & fid \\
\midrule
0 & dcgan & 0.046 ± 0.001 & 0.553 ± 0.005 & 432.867 ± 0.000 \\
0 & stylegan & 0.062 ± 0.002 & 1.038 ± 0.002 & 432.867 ± 0.000 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0,Model,ssim,lpips,fid
0,dcgan,0.046 ± 0.001,0.553 ± 0.005,432.867 ± 0.000
0,stylegan,0.062 ± 0.002,1.038 ± 0.002,432.867 ± 0.000
