In [None]:
# Run with GPU

# load and define globals

import torch
from IPython.display import clear_output
import torchvision
import cv2
import numpy as np
import pandas as pd
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size = 2**10 # 2**10

image_size = (100, 100, 3) # 100, 100, 3
encoding_size = 16 # 64

def to_img(x):
    return np.moveaxis(x.numpy() * 255, 0, -1).astype(np.uint8)

# Function to get the length-to-width ratio of the largest contour in an image
def getLWR(img):
    # Convert image to grayscale if it has more than one channel
    if img.shape[2] > 1:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Add a constant border to the image
    img = cv2.copyMakeBorder(img, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=0)
    # Apply binary thresholding
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # Invert image if the background is white
    if img[0, 0] == 255:
        img = cv2.bitwise_not(img)
    # Find contours in the image
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # Get the largest contour by area
    contour = max(contours, key=cv2.contourArea)
    # Get dimensions of the minimum area rectangle enclosing the contour
    w, h = cv2.minAreaRect(contour)[1]
    # Return the length-to-width ratio
    return max(w, h) / min(w, h)

def getShapeIndex(img):
      # Convert image to grayscale if it has more than one channel
    if img.shape[2] > 1:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Add a constant border to the image
    img = cv2.copyMakeBorder(img, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=0)
    # Apply binary thresholding
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # Invert image if the background is white
    if img[0, 0] == 255:
        img = cv2.bitwise_not(img)
    # Find contours in the image
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # Get the largest contour by area
    contour = max(contours, key=cv2.contourArea)
    # Initialize variables to store the max width and corresponding height
    max_width = 0
    max_height = 0
    point1 = None
    point2 = None
    # Iterate through each pair of points in the contour
    for i in range(len(contour)):
        for j in range(i + 1, len(contour)):
            # Calculate the distance between the two points in both x and y directions
            dx = contour[j][0][0] - contour[i][0][0]
            dy = contour[j][0][1] - contour[i][0][1]
            # If the distance in the x direction (width) is the largest we've seen
            width = abs(dx)
            if width > max_width:
                max_width = width
                max_height = abs(dy)  # The corresponding height (vertical distance)
                point1 = contour[i][0]
                point2 = contour[j][0]
    return max_height / max_width

def getMeanNonBlackColor(img):
  data = img.reshape(-1, img.shape[-1])
  data = data[np.array([np.all(i != [0,0,0]) for i in data])]
  return np.mean(data, axis = 0)

def getRedness(img):
  # image should be rgb
  data = img.reshape(-1, img.shape[-1])
  data = data[np.array([np.all(i != [0,0,0]) for i in data])]
  # redness is distance from (160,20,20)
  distances = np.linalg.norm(data - [160,20,20], axis = 1)
  distances = 255 - np.mean(distances)
  return distances


img_transform = transforms.Compose([transforms.ToTensor()])

def loader(path):
    return img_transform(cv2.imread(path)[:,:,range(image_size[2])])

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.fc1 = nn.Linear(np.prod(image_size), 400)
        self.fc21 = nn.Linear(400, encoding_size)
        self.fc22 = nn.Linear(400, encoding_size)
        self.fc3 = nn.Linear(encoding_size, 400)
        self.fc4 = nn.Linear(400, np.prod(image_size))

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return F.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

reconstruction_function = nn.MSELoss(reduction='sum')

def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    return BCE + KLD

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)
os.chdir('/content/drive/Shareddrives/Strawberries/Image experiment/')

Mounted at /content/drive


In [None]:
for random_seed in range(1,51):

  # load data, create train/test sets, and initialize model
  images = torch.load('fullImages.pt')

  trainKey = np.genfromtxt(str(random_seed) + '_trainKey.csv', delimiter=',', skip_header=0)
  testKey = np.genfromtxt(str(random_seed) + '_testKey.csv', delimiter=',', skip_header=0)
  trainKey = trainKey >= 0
  testKey = testKey >= 0

  trainImages, testImages = images[trainKey], images[testKey]

  trainDataloader = DataLoader(TensorDataset(trainImages), batch_size=batch_size, shuffle=True)
  testDataloader = DataLoader(TensorDataset(testImages), batch_size=batch_size, shuffle=True)

  # load VAE
  model = VAE()
  model.load_state_dict(torch.load(str(random_seed) + "_vae_" + str(encoding_size) + ".pth"))
  model.to(device)

  # check accuracy of the VAE
  pixelMSEs = []
  knownLWRs = []
  predictedLWRs = []
  predictedShapeIndices = []
  knownBs, knownGs, knownRs = [], [], []
  predictedBs, predictedGs, predictedRs = [], [], []
  knownRednesses = []
  predictedRednesses = []
  for batch_idx, data in enumerate(testDataloader):
      data = data[0].to(device)  # Move data to the GPU
      for datum in data:
          original_image = datum.to(device)  # Ensure individual image tensor is on GPU
          knownLWR = getLWR(to_img(original_image.detach().cpu()))  # Move back to CPU for processing
          knownLWRs.append(knownLWR)
          knownR, knownG, knownB = getMeanNonBlackColor(to_img(original_image.detach().cpu()))
          knownBs.append(knownB)
          knownGs.append(knownG)
          knownRs.append(knownR)
          knownRedness = getRedness(to_img(original_image.detach().cpu()))
          knownRednesses.append(knownRedness)

          decoded_image = original_image.view(1, -1).to(device)  # Ensure image is on GPU and properly reshaped
          mu, logvar = model.encode(decoded_image)
          encoded = model.reparametrize(mu, logvar)
          decoded_image = model.decode(encoded)
          decoded_image = decoded_image.view(1, -1)
          decoded_image = to_img(decoded_image.detach().cpu().view(image_size[::-1]))  # Move back to CPU for processing
          # plt.figure()
          # plt.imshow(to_img(original_image.detach().cpu()) - decoded_image)
          # plt.pause(0.0001)
          mse = np.mean(((to_img(original_image.detach().cpu()) - decoded_image) ** 2).flatten())
          pixelMSEs.append(mse.item())
          predictedLWR = getLWR(decoded_image)
          predictedLWRs.append(predictedLWR)
          predictedR, predictedG, predictedB = getMeanNonBlackColor(decoded_image)
          predictedBs.append(predictedB)
          predictedGs.append(predictedG)
          predictedRs.append(predictedR)
          predictedRedness = getRedness(decoded_image)
          predictedRednesses.append(predictedRedness)

  # Calculate correlation coefficient
  print("random seed: " + str(random_seed))
  print("encoding size: " + str(encoding_size))
  print(np.mean(pixelMSEs))
  print(np.corrcoef(knownLWRs, predictedLWRs)[0,1] ** 2)
  print(np.corrcoef(knownBs, predictedBs)[0,1] ** 2)
  print(np.corrcoef(knownGs, predictedGs)[0,1] ** 2)
  print(np.corrcoef(knownRs, predictedRs)[0,1] ** 2)
  print(np.corrcoef(knownRednesses, predictedRednesses)[0,1] ** 2)

  eps = torch.cuda.FloatTensor(std.size()).normal_()


random seed: 1
encoding size: 16
68.20786732401099
0.7987885032783987
0.7433872346421094
0.8389374287408314
0.6685508609051714
0.8459723002323091
random seed: 2
encoding size: 16
68.07116240019067
0.7879076897527151
0.76340564165297
0.8511800709011875
0.6659714453586325
0.8667416172585574
random seed: 3
encoding size: 16
68.23219895361004
0.7988393716942095
0.7639686352002462
0.8567061468315095
0.6853684149242617
0.8628296992005843
random seed: 4
encoding size: 16
68.28667181409295
0.7790289896937266
0.7744474547699409
0.860295467079498
0.6583207020928572
0.8589247667108355
random seed: 5
encoding size: 16
67.88046520213024
0.8016135751318062
0.7455114426531726
0.8504144759755663
0.6965771693275982
0.853850344333122
random seed: 6
encoding size: 16
68.1753867147147
0.7984594465970899
0.733686247396718
0.8535244627304944
0.7051689548076516
0.8578119404315041
random seed: 7
encoding size: 16
68.5781310522443
0.795383587961182
0.7572231585947835
0.8575091275860504
0.6784982671293217
0.858