# Create other 'everyday' NFTs

# Prepare everything

In [None]:
import os
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import imageio
import cv2
from skimage.transform import resize, rescale
from tqdm.notebook import tqdm
import tensorflow as tf

%matplotlib inline

# 1. Prepare images

The aligned photos are all in 4k resolution. That is too much detail for a 'quick' data exploration. Therefore, this step is trying to find a reasonable downsampling resolution.

In [None]:
# Collect all images
imgs = sorted(glob("img_small/20*"))

print(f"We found {len(imgs)} images.")

In [None]:
# First and last photo
imgs[0], imgs[-1]

In [None]:
# Months total
n_months = 21*12+4
n_months

In [None]:
# Plot the last of these images
last_img = imageio.imread(imgs[-1])
plt.imshow(last_img)
print(
    f"Each image has the shape {last_img.shape}, where the last dimension are the 3 RGB color channels."
)

Machine learning models love a lot of data, but images in 4k resolution might nonetheless push it a bit. So let's resize this to something a bit more compact and let's make the image shape squared. This is not really required, just a data science habbit.

In [None]:
# Get image height and width
height, width = last_img.shape[:2]

# Crop image to a squared shape, centered in the middle
offset = int((width - height) / 3.5)
img_squared = last_img[:, offset:-offset, :]

# Plot the cropped image version
plt.imshow(img_squared);

In [None]:
# Let's explore different resizing shapes
resolutions = [1/i for i in range(1, 5)]

fig, axes = plt.subplots(1, len(resolutions), figsize=(3 * len(resolutions), 4))
for i, res in enumerate(resolutions):
    new_img = rescale(img_squared, res, anti_aliasing=True, multichannel=True)
    axes[i].set_title(f"Resolution: {np.round(res, 3)} {new_img.shape}")
    axes[i].imshow(new_img, interpolation='nearest')
    axes[i].axis("off")
plt.tight_layout()

64 x 64 pixels is definitely too small. Let's go with 256 for now. As the following image shows, the resolution is good enough to see some smallish details, but also small enough to keep the data dimension (256 * 256 * 3 = 196,608 pixel values) to something almost 'manageable'.

In [None]:
res = 1/3.
plt.figure(figsize=(6, 6))
plt.imshow(rescale(img_squared, res, anti_aliasing=True, multichannel=True));

So now that we have explored all this. Let's go ahead and preapre the whole dataset. We will take the aligned 4k photos, crop the images to a square and downsize them to 256 x 256 pixels.

In [None]:
# Create output folder for AI prepared images
out_dir = "img_AI_NFT"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Define image resolution
res = 1/3.

# Prepare images and store them in new folder
for i in tqdm(range(len(imgs))):

    # Define file output name
    out_file = os.path.join(out_dir, f"img_{i+1:04d}.png")

    # Create image if it doesn't yet exist
    if not os.path.exists(out_file):

        #  Transform image
        img = (
            255 * rescale(imageio.imread(imgs[i])[:, offset:-offset, :],
                          res, anti_aliasing=True, multichannel=True)
        ).astype("uint8")

        # Store transformed file
        imageio.imwrite(out_file, img)

# 2. Load dataset (and have some fun)

Now that the data is prepared and stored on the hard drive, let's load all of it into memory and have some fun with some data exploration :-)

In [None]:
data = np.array(
    [imageio.imread(d) for d in tqdm(sorted(glob(os.path.join(out_dir, "*png"))))]
)

## 2.1. Yearly average collage

In [None]:
x, y = (3, 7)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_yearly_vertical.png')

In [None]:
x, y = (7, 3)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize, figsize/np.divide(mosaic.shape[1], mosaic.shape[0])))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_yearly_horizontal.png')

## 2.1. Monthly average collage

In [None]:
x, y = (16, 16)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_monthly.png')

## 2.2. Monthly but squared images

In [None]:
x, y = (16, 16)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
min_offset = imgs_averages.shape[2] - imgs_averages.shape[1]
min_offset = min_offset // 2

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x, :, min_offset:-min_offset, :], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_monthly_squared.png')

## 2.3. Monthly average collage, row per year

In [None]:
x, y = (12, 21)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_monthly_year_per_row.png')

## 2.4. Weekly average collage, row per year

In [None]:
x, y = (30, 37)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x], axis=1)
                         for i in tqdm(range(y))], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 60
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_week_30x37.png')

In [None]:
x, y = (33, 33)
grid_points = np.prod((x, y))
imgs_split = np.array_split(data, grid_points)
print(len(imgs_split))
imgs_averages = np.array([im.mean(0).astype("int") for im in tqdm(imgs_split)])

In [None]:
min_offset = imgs_averages.shape[2] - imgs_averages.shape[1]
min_offset = min_offset // 2

In [None]:
mosaic = np.concatenate([np.concatenate(imgs_averages[i*x:(i+1)*x, :, min_offset:-min_offset, :], axis=1)
                         for i in tqdm(range(y))], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 60
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_week_33x33_squared.png')

## 2.5. 21 year in 21 seconds video - small

As a homage to my first tweet about this project, let's create a gif of all alligned 7777 photos and loop through them in 21 seconds (i.e. 1 year per second). And as in the original post, let's average for each frame 60 images at once.

In [None]:
imgs = sorted(glob("img_small/20*"))

In [None]:
from skimage import io

# Save images to disk
out_dir = 'img_video_21s'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

n_steps = 12
smooth = 60
imgs_average = []
for i in tqdm(range(len(imgs)//n_steps)):
    img_files = imgs[i*n_steps:i*n_steps+smooth]
    data_small = np.mean([imageio.imread(f) for f in img_files], axis=0)
            
    # Create out_filename
    out_filename = os.path.join(out_dir, '%04d.jpg' % (i + 1))
    
    # Save composition image
    io.imsave(out_filename, data_small.astype('uint8'))

In [None]:
# Use either code (the one that works) to create the video
!cat img_video_21s/*jpg | ffmpeg -f image2pipe -r 30 -vcodec mjpeg -i - -vcodec libx264 video_21s_small.mp4

# 3. Dimensionality reduction

To better understand how the images relate to each other, let's look at two ways of projecting the images from a high-dimensional space (i.e. 256 x 256 pixels = 65536 dimenions), down to two dimensions. To do so, we will use principal component analysis (PCA) and UMAP.

In [None]:
out_dir = 'img_AI_NFT'
offset = 60
data_small = np.array(
    [imageio.imread(d)[::2, offset:-offset:2, :] / 255.0 for d in tqdm(sorted(glob(os.path.join(out_dir, "*png"))))]
)
data_small.shape

In [None]:
# Flatten color images
X_small = np.reshape(data_small, (len(data_small), -1))
X_small.shape

In [None]:
# Establish a supportive 'age' variable
age = np.linspace(0, 256, len(data_small), endpoint=False, dtype="int")
age

## 3.1. Principal Component Analysis (PCA)

In [None]:
# Specify how many PCA components should be kept
n_comp = 50

In [None]:
from sklearn.decomposition import PCA

In [None]:
# Compute PCA components
pca_small = PCA(n_comp)
%time X_pca_small = pca_small.fit_transform(X_small)

In [None]:
# Show scree plot
plt.figure(figsize=(15, 4))
plt.plot(pca_small.explained_variance_ratio_.cumsum())
plt.show()

In [None]:
# How much variance is explained by first X components
pca_small.explained_variance_ratio_.sum()

In [None]:
# Plot first two PCA components with image index color coded
plt.figure(figsize=(10, 8))
plt.scatter(
    X_pca_small[:, 0],
    X_pca_small[:, 1],
    c=age,
    alpha=0.5,
    cmap="Spectral",
)
plt.colorbar();

## 3.2. UMAP

UMAP is another AI approach of how high-dimensional data can be projected down to just a few dimensions (here 2D). In contrast to PCA, where the dimensions are reduced into the direction of most explained variance, with UMAP, the reduction is done in such a way that the points keep the relative distance to each other.

In other words, with UMAP, points in N-dimensional space that were far apart are still far apart, and points that were close are still close.

In [None]:
from umap import UMAP

### UMAP on color images

In [None]:
# Let's compute UMAP projection based on the PCA reduced features
umap_small = UMAP(n_neighbors=15, min_dist=1)
%time X_umap_small = umap_small.fit_transform(X_pca_small)

In [None]:
# Let's plot the UMAP projection
plt.figure(figsize=(12, 9))
plt.scatter(X_umap_small[:, 0], X_umap_small[:, 1], c=age, cmap="Spectral", s=10)
plt.colorbar()
#plt.yticks([])
#plt.xticks([]);

In [None]:
# Let's plot the UMAP projection with a equally spaced target grid
x, y = (25, 25)
grid = np.array(
    [(i, j) for i in np.linspace(-7, 15, x) for j in np.linspace(-3, 15, y)]
)

plt.figure(figsize=(12, 9))
plt.scatter(
    X_umap_small[:, 0],
    X_umap_small[:, 1],
    c=np.linspace(0, 21, len(data_small)),
    cmap="Spectral",
    s=10,
)
plt.colorbar()
plt.scatter(*grid.T, s=10, c="k")
plt.yticks([])
plt.xticks([]);

In [None]:
# Invert UMAP projection for target grid
%time inv_umap_small = umap_small.inverse_transform(grid)

In [None]:
# Reverse PCA dimensionality reduction
faces_small = np.array(
    [
        p.reshape(data_small.shape[1:]).clip(min=0, max=1)
        for p in tqdm(pca_small.inverse_transform(inv_umap_small))
    ]
)
faces_small.shape

In [None]:
mosaic = np.concatenate([np.concatenate(255*faces_small[i*x:(i+1)*x], axis=1)
                         for i in range(y)], axis=0).astype("uint8")
mosaic.shape

In [None]:
# Plot a grid of average images
figsize = 45
plt.figure(figsize=(figsize/np.divide(*mosaic.shape[:2]), figsize))
plt.imshow(mosaic)
plt.axis("off")
plt.tight_layout()
plt.savefig('noah_mosaic_multiverse_25x25.png')