## Test for "Generate 50 design solutions..." vs. "Generate 25 design solutions..." vs. "Generate 5 design solutions..."

This was all done for the prompt sequence of:
"Generate [X number] of design solutions for lightweight exercise device that can be used while traveling."

Note, generate 50... just generated 50 design solutions directly.

Generate 25 design solutions --> Generate 25 more design solutions for a total of 50 design solutions.

Generate 5.. just followed our methodology in the paper.

In [1]:
from sentence_transformers import SentenceTransformer
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
import torch
import numpy as np
import json
from scipy.spatial import ConvexHull
from sklearn.decomposition import PCA
import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
def convexhull(x, n_components):
    pca = PCA(n_components = n_components)
    reduced_data = pca.fit_transform(x)
    explained_ratio = pca.explained_variance_ratio_
    hull = ConvexHull(reduced_data)
    volume = hull.volume
    return volume, explained_ratio

In [4]:
def percent_change(values):
    lst = []
    for i in range(len(values)):
        percent_change = ((values[i] - values[len(values)-1])/abs(values[len(values)-1]))*100
        lst.append(round(percent_change))
    return lst

In [5]:
def DPP_diversity(x, lambda0=0):
    x = tf.convert_to_tensor(x, dtype='float32')
    
    # Normalize the rows of x to have unit norm, which is required for cosine similarity
    x_normalized = tf.linalg.normalize(x, axis=1)[0]
    
    # Compute the cosine similarity matrix
    S = tf.matmul(x_normalized, tf.transpose(x_normalized))
    
    # Transform cosine similarity values to be non-negative
    S_non_negative = (S + 1.0) / 2.0
    
    # Create a vector of ones with the same length as the number of points in x
    y = tf.ones(np.shape(x)[0])
    
    # Compute the outer product of y with itself, resulting in a matrix of Q where all elements are equal to 1
    Q = tf.tensordot(tf.expand_dims(y, 1), tf.expand_dims(y, 0), 1)
    if lambda0 == 0:
        L = S_non_negative
    else:
        L= S*tf.math.pow(Q, lambda0)
    # Compute the eigenvalues of L
    eig_val, _  = tf.linalg.eigh(L)
    # compute the log-determinant of L using the eigenvalues
    log_det_L = -tf.reduce_mean(tf.math.log(tf.math.maximum(eig_val, 1e-7)))
    return log_det_L.numpy()

In [6]:
def distance_to_centroid(embeddings):
    distances = []
    for i in range(embeddings.shape[0]):
        pca = PCA(n_components = 20)
        embeddings = pca.fit_transform(embeddings)
        mean = np.mean(embeddings[i])
        dist = np.sqrt(np.sum(np.square(np.subtract(embeddings[i], mean))))
        distances.append(dist)
    return np.mean(np.array(distances))

In [7]:
def L2_vectorized(X, Y):
    #Vectorize L2 calculation using x^2+y^2-2xy
    X_sq = np.sum(np.square(X), axis=1)
    Y_sq = np.sum(np.square(Y), axis=1)
    sq = np.add(np.expand_dims(X_sq, axis=-1), np.transpose(Y_sq)) - 2*np.matmul(X,np.transpose(Y))
    sq = np.clip(sq, 0.0, 1e12)
    return np.sqrt(sq)

In [8]:
def calc_distance(X, Y, distance="Euclidean"):
    if distance=="Euclidean":
        return L2_vectorized(X,Y)
    else:
        raise Exception("Unknown distance metric specified")

In [9]:
def gen_gen_distance(embeddings, reduction):
    x = embeddings
    res = calc_distance(x, x, distance = "Euclidean")
    # this sets the diagonal of the matrix to the maximum of elements across the column dimension (axis = 1)
    res = tf.linalg.set_diag(res, tf.reduce_max(res, axis=1))
    # pick the smallest values along the columns
    if reduction == "min":
        scores = tf.reduce_min(res, axis=1)
    # pick the average value along the columns
    elif reduction == "ave":
        scores = tf.reduce_mean(res, axis=1)
    else:
        raise Exception("Unknown reduction method")
    return np.mean(scores.numpy())

In [10]:
# List of CSV files
csv_files = [
    'data/reviewer_test_case.csv'
]

In [11]:
dict_1_DPP = {}
dict_1_convex = {}
dict_1_centroid = {}
dict_1_nearest = {}
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    count = 0
    for column in df.columns:
        # encode the column text data into embeddings
        embeddings = model.encode(df[column].astype(str).tolist())
        # calculate the DPP
        dict_1_DPP[(csv_file, count)] = DPP_diversity(embeddings, lambda0=0)
        # calculate the convex hull
        dict_1_convex[(csv_file, count)] = convexhull(embeddings, n_components = 13)
        # calculate the distance to centroid
        dict_1_centroid[(csv_file, count)] = distance_to_centroid(embeddings)
        # calculate the nearest generated distance (average)
        dict_1_nearest[(csv_file, count)] = gen_gen_distance(embeddings, reduction = "ave")
        count += 1

In [12]:
# DPP percent difference calculation
percent_diff = {}
for key, value in dict_1_DPP.items():
    csv_file = key[0]
    if csv_file not in percent_diff:
        percent_diff[csv_file] = []
    percent_diff[csv_file].append(value)

print("Note order goes (50-design/25-design/5-design)" )
print("")
for csv_file, values in percent_diff.items():
    percent_changes = percent_change(values)
    print(csv_file)
    print(f"Percent difference for {csv_file}:{percent_changes}")

Note order goes (50-design/25-design/5-design)

data/reviewer_test_case.csv
Percent difference for data/reviewer_test_case.csv:[3, -3, 0]


In [13]:
# Nearest Generated Difference percent difference calculation
percent_diff = {}
for key, value in dict_1_nearest.items():
    csv_file = key[0]
    if csv_file not in percent_diff:
        percent_diff[csv_file] = []
    percent_diff[csv_file].append(value)

print("Note order goes (50-design/25-design/5-design)")
print("")
for csv_file, values in percent_diff.items():
    percent_changes = percent_change(values)
    print(f"Percent difference for {csv_file}:{percent_changes}")

Note order goes (50-design/25-design/5-design)

Percent difference for data/reviewer_test_case.csv:[-3, 1, 0]


In [14]:
# Nearest Generated Difference percent difference calculation
percent_diff = {}
for key, value in dict_1_centroid.items():
    csv_file = key[0]
    if csv_file not in percent_diff:
        percent_diff[csv_file] = []
    percent_diff[csv_file].append(value)

print("Note order goes (50-design/25-design/5-design)")
print("")
for csv_file, values in percent_diff.items():
    percent_changes = percent_change(values)
    print(f"Percent difference for {csv_file}:{percent_changes}")

Note order goes (50-design/25-design/5-design)

Percent difference for data/reviewer_test_case.csv:[-4, 0, 0]


In [20]:
# Nearest Generated Difference percent difference calculation
percent_diff = {}
for key, value in dict_1_convex.items():
    csv_file = key[0]
    value1 = value[0]
    if csv_file not in percent_diff:
        percent_diff[csv_file] = []
    percent_diff[csv_file].append(value1)

print("Note order goes (50-design/25-design/5-design)")
print("")
for csv_file, values in percent_diff.items():
    percent_changes = percent_change(values)
    print(f"Percent difference for {csv_file}:{percent_changes}")

Note order goes (50-design/25-design/5-design)

Percent difference for data/reviewer_test_case.csv:[-30, -16, 0]
