In [2]:
from ImgPrep import file_names, read_img, reshape_array, split_img
from ImgCluster import process_all_pics, cluster_pixels, cluster_pieces
from Funcs4Testing import display_cluster_imgs, display_img, img_scatter
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
import joblib
import numpy as np

# list of files to read in
files = file_names('puzzle_scans/puzzle_1')
pictures = []
pic_avg_rgb = []
pic_arrays = []

for picture in files:
    
    # store picture as array
    pic = read_img(picture)

    # split each image into 20 pieces
    pieces = split_img(pic, 5, 4)
    pictures += pieces
    assert all([piece.shape for piece in pieces]), 'All arrays are not the same size'

########## Method 1: Find Avg RGB values of each sector of puzzle piece
#----------------------------------------------------------------------
# split each piece into n by n parts
    n = 3

    # split each piece into quarters
    chopped = []
    for piece in pieces:
        chopped.append(split_img(piece, n, n))    
    assert all([part.shape for piece in chopped for part in piece]), \
    'All parts of all pieces are not the same size'

    # reshape arrays and find average RGB value of each part of each piece    
    avg_rgb = np.zeros((len(chopped), n**2, 3))
    for i,piece in enumerate(chopped):
        for ii,part in enumerate(piece):
                chopped[i][ii] = reshape_array(part, starting_dim = len(part.shape))
                avg_rgb[i,ii,:] = np.mean(chopped[i][ii], axis = 0)
    
    pic_avg_rgb.append(avg_rgb)

# combine all pictures into one large array for clustering
pic_avg_rgb = np.vstack(pic_avg_rgb)
assert pic_avg_rgb.shape == (300, n**2, 3), \
f'Output array should be size (300,{n**2},3) but is actually size {pic_avg_rgb.shape}'

Best parameters for KMeans model: {'algorithm': 'elkan', 'n_clusters': 14}


In [None]:
########## Method 2: Find RGB values of cluster centers for each puzzle piece
#----------------------------------------------------------------------
# reshape arrays for clustering and find average RGB of total puzzle piece
total_avg_rgb = np.zeros(len(pictures),3)
for indx,piece in enumerate(pictures):
    flat_pic = reshape_array(piece, starting_dim = len(piece.shape))
    total_avg_rgb[indx,:] = np.mean(flat_pic, axis = 0)
    pic_arrays.append(flat_pic)

# filter out scanner background 
# filtered_pixels = process_all_pics(pic_arrays)
filtered_pixels = joblib.load('pickle_files/filtered_pixels.pkl')

# find n clusters in each puzzle piece and flatten 
# pic_centers = cluster_pixels(pixels = filtered_pixels, n_clusters = 3)
pic_centers = joblib.load('pickle_files/pic_centers.pkl')
centers_flattened = np.zeros((len(pic_centers), pic_centers[0].shape[0] * pic_centers[0].shape[1]))
for indx, pic in enumerate(pic_centers):
    centers_flattened[indx,:] = reshape_array(pic, starting_dim = len(pic.shape))

# bind features together for final clustering of pieces
feature_set = np.concatenate((total_avg_rgb, centers_flattened), axis = 1)

# perform Isomap clustering on final feature set
iso_model = Isomap(n_components = 2).fit_transform(feature_set)

# visual check of kmeans clusters
display_cluster_imgs(pictures, km_model.labels_, 1)

# visual check of isomap clusters
img_scatter(iso_model, pictures, 25)

In [None]:
########## Method 3: Use PCA to shrink pieces to 2 dimensional array and perform ISOMAP clustering
#----------------------------------------------------------------------
# compress images with PCA
# pics_compressed = np.zeros((len(pic_arrays), pic_arrays[0].shape[0]))
# for indx,pic in enumerate(pic_arrays):
#     pca = PCA(n_components = 1)
#     components = pca.fit(pic).components_
#     pics_compressed[indx,:] = pic.dot(components.T).reshape(pic.shape[0])
pics_compressed = joblib.load('pickle_files/pics_compressed.pkl')
    
# cluster compressed images with ISOMAP clustering
# iso_pics = Isomap(n_components = 2).fit_transform(pics_compressed)
iso_pics = joblib.load('pickle_files/iso_pics.pkl')

# visual check of isomap clusters
img_scatter(iso_pics, pictures, 25)