In [1]:
import torch
from ipywidgets import widgets
import PIL
import os
import pickle
import numpy as np
from improved_aesthetic_predictor import aesthetic_model

  warn(


In [2]:
def cosine_similarity(a, b):
    return torch.nn.functional.cosine_similarity(a, b)

def shrink_image(img):
    return img.resize((224, 224))

def local_image_paths():
    for root, dirs, files in os.walk("."):
        for file in files:
            if file.endswith(".jpg"):
                yield os.path.join(root, file)
             

In [3]:
aes_model = aesthetic_model.AestheticModel()


In [27]:
def sample_inference() -> aesthetic_model.AestheticInferenceResult:
    img = PIL.Image.open("foobar.jpg")
    return aes_model.infer(img)
    
inference_result = sample_inference()
print(inference_result.aesthetic_score, inference_result.embedding.shape)

[[5.1040144]] (1, 768)


In [8]:
image_paths = list(local_image_paths())

In [None]:
# Process all of the local images by computing their clip embeddings and aesthetic scores.
# Write the outputs to a bunch of pickle files with the given BATCH_SIZE.

filename_to_embedding = {}
BATCH_SIZE = 128
batch_no = 0
def flush_batch(local_batch_no, embeddings):
    fn = f"aes_embeddings_scores_{local_batch_no}.pkl"
    print(f"writing {len(embeddings)} outputs to {fn}")
    pickle.dump(embeddings, open(fn, "wb"))
    embeddings.clear()


for idx, image_path in enumerate(image_paths):
    image = PIL.Image.open(image_path)
    aes_inference = aes_model.infer(image)
    
    filename_to_embedding[image_path] = aes_inference
    if len(filename_to_embedding) == BATCH_SIZE:
        flush_batch(batch_no, filename_to_embedding)
        batch_no += 1    

if len(filename_to_embedding) > 0:
    flush_batch(batch_no, filename_to_embedding)


In [10]:
def load_aes_embeddings_scores_pickles():
    embeddings_dict = {}

    # Get the list of all embeddings files
    embeddings_files = [fn for fn in os.listdir() if fn.startswith("aes_") and fn.endswith(".pkl")]

    # Load the embeddings from each file and add them to the dictionary
    for embeddings_file in embeddings_files:
        embeddings = pickle.load(open(embeddings_file, "rb"))
        embeddings_dict.update(embeddings)
    return embeddings_dict
embeddings_scores = load_aes_embeddings_scores_pickles()


In [51]:
def max_correlation(needle, haystack):
    # Calculate the correlation between the needle and each row vector in the haystack
    correlations = np.dot(haystack, needle)
    
    # Find the index of the row vector with the maximum correlation
    max_index = np.argmax(correlations)
    
    # Return the maximum correlation and the corresponding row vector
    return correlations[max_index]


In [21]:
import heapq

restricted_images_set = [v for v in embeddings_scores.items() 
                         # if not '202305' in v[0]
] 
                         #   and max_correlation(v[1].embedding.squeeze(), accepted_images_embeddings) < .95]
                         
top_images_scores = list(heapq.nlargest(1500, restricted_images_set, key=lambda x: x[1].aesthetic_score))

In [13]:
import matplotlib.pyplot as plt

def render_images(images):
    # Create a figure with a 4x4 grid
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))

    # Iterate over the images and plot them in the grid
    for i, image in enumerate(images):
        # Shrink the image while preserving the aspect ratio
        image.thumbnail((400, 400))

        # Calculate the row and column indices in the grid
        row = i // 4
        col = i % 4

        # Plot the image in the corresponding grid cell
        axes[row, col].imshow(image)
        axes[row, col].axis('off')

    # Adjust the spacing between subplots
    plt.subplots_adjust(wspace=0.05, hspace=0.05)

    # Show the figure
    plt.show()


In [14]:
import numpy as np
from sklearn.metrics.pairwise import cosine_distances

def diversity_score(M):
    # Calculate the cosine distances between all pairs of vectors in M
    distances = cosine_distances(M)

    # Set the diagonal elements to infinity to exclude self-distances
    np.fill_diagonal(distances, np.inf)

    # Calculate the minimum distance for each vector
    min_distances = np.min(distances, axis=1)

    # Calculate the sum of the minimum distances
    score = np.sum(min_distances)

    return score

# Test case
M = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, .5, .5]])
result = diversity_score(M)
print(result)


1.8786796564403576


In [None]:
def collage_quality(choosen_indexes, aes_inference_scores, diversity_over_quality_weight):
    choosen_inferences = [aes_inference_scores[i] for i in choosen_indexes]
    choosen_embeddings = [i.embedding.squeeze() for i in choosen_inferences]
    diversity = diversity_score(np.array(choosen_embeddings))
    quality = sum([i.aesthetic_score for i in choosen_inferences])
    return diversity * diversity_over_quality_weight + quality * (1 - diversity_over_quality_weight)

def find_best_collage(aes_inference_scores, num_images=16, diversity_over_quality_weight=.9):
    best_indexes = list(range(0, num_images))
    best_score = collage_quality(best_indexes, aes_inference_scores, diversity_over_quality_weight)

    num_changes = 0
    changed = True
    while changed:
        changed = False

        for i in range(len(aes_inference_scores)):
            for j in range(num_images):
                this_collage = best_indexes.copy()
                this_collage[j] = i

                score = collage_quality(this_collage, aes_inference_scores, diversity_over_quality_weight)
                if score > best_score:
                    best_score = score
                    best_indexes = this_collage
                    changed = True
                    num_changes += 1
                    print(f"new best score {best_score} after {num_changes} changes")
        
    return best_indexes

best_indexes = find_best_collage([a[1] for a in top_images_scores], 16, .7)


In [None]:
image_paths = [top_images_scores[i][0] for i in best_indexes]
collage = [PIL.Image.open(i) for i in image_paths]
render_images(collage)