In [None]:
#load with pandas, manipulate with numpy, plot with matplotlib
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
from skimage import filters
from skimage import data, exposure
from sklearn.feature_extraction import image
from sklearn.cluster import spectral_clustering
from scipy import ndimage


from sklearn.mixture import GMM


#ML - we will classify using a naive xgb with stratified cross validation
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss





In [None]:
#filenames
inputFolder = "../input/"
trainSet = 'train.json'
#testSet = 'test.json'
subName = 'iceberg-svd-xgb-3fold.csv'


In [None]:
#load data
trainDF = pd.read_json(inputFolder+trainSet)
#testDF = pd.read_json(inputFolder+testSet)

In [None]:
trainDF.head(15)

In [None]:
#get numpy arrays for train/test data, prob there is a more pythonic approach
band1 = trainDF['band_1'].values
im1 = np.zeros((len(band1),len(band1[0])))
for j in range(len(band1)):
    im1[j,:]=np.asarray(band1[j])
    
band2 = trainDF['band_2'].values
im2 = np.zeros((len(band2),len(band2[0])))
for j in range(len(band2)):
    im2[j,:]=np.asarray(band2[j])
    


basic view
---

In [None]:

from time import time

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter
import random
from sklearn import manifold, datasets

# Next line to silence pyflakes. This import is needed.
Axes3D
image=im1[6,:]
image2=im2[6,:]
image=image-image.min()
image=image/image.max()*254+1
image2=image2-image2.min()
image2=image2/image2.max()*254+1
n_points = 75*75

X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
for xi in range(0,75):
    for yi in range(0,75):
        X[xi+yi*75,0]=xi+random.random()/100
        X[xi+yi*75,1]=yi+random.random()/100 #image[xi+yi*75]
        X[xi+yi*75,2]=image[xi+yi*75]/2+image2[xi+yi*75]/2+random.random()/100
color=image               
n_neighbors = 15
n_components = 2

fig = plt.figure(figsize=(20, 10))
plt.suptitle("Manifold Learning with %i points, %i neighbors"
             % (1000, n_neighbors), fontsize=14)


ax = fig.add_subplot(251, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)

methods = ['standard', 'hessian', 'modified', 'ltsa']
labels = ['LLE', 'Hessian LLE', 'Modified LLE', 'LTSA']

for i, method in enumerate(methods):
    t0 = time()
    Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
                                        eigen_solver='auto',
                                        method=method).fit_transform(X)
    t1 = time()
    print("%s: %.2g sec" % (methods[i], t1 - t0))

    ax = fig.add_subplot(252 + i)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title("%s (%.2g sec)" % (labels[i], t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

t0 = time()
Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X)
t1 = time()
print("Isomap: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(257)
plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
plt.title("Isomap (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')


t0 = time()
mds = manifold.MDS(n_components, max_iter=100, n_init=1)
Y = mds.fit_transform(X)
t1 = time()
print("MDS: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(258)
plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
plt.title("MDS (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')


t0 = time()
se = manifold.SpectralEmbedding(n_components=n_components,
                                n_neighbors=n_neighbors)
Y = se.fit_transform(X)
t1 = time()
print("SpectralEmbedding: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(259)
plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
plt.title("SpectralEmbedding (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')

t0 = time()
tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
Y = tsne.fit_transform(X)
t1 = time()
print("t-SNE: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(2, 5, 10)
plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
plt.title("t-SNE (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')

plt.show()

In [None]:
import time
import warnings

import numpy as np
import matplotlib.pyplot as plt

from sklearn import cluster, datasets, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice
imagematrix=np.reshape(im1[6,:],(75,75))
imagematrix=imagematrix-imagematrix.min()
no_structure = imagematrix,None
np.random.seed(0)

# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============

# ============
# Set up cluster parameters
# ============
plt.figure(figsize=(9 * 2 + 3, 12.5))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
                    hspace=.01)

plot_num = 1

default_base = {'quantile': .3,
                'eps': .3,
                'damping': .9,
                'preference': -200,
                'n_neighbors': 10,
                'n_clusters': 3}

datasets = [ (no_structure, {})]

for i_dataset, (dataset, algo_params) in enumerate(datasets):
    # update parameters with dataset-specific values
    params = default_base.copy()
    params.update(algo_params)

    X, y = dataset

    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)

    # estimate bandwidth for mean shift
    bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])

    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(
        X, n_neighbors=params['n_neighbors'], include_self=False)
    # make connectivity symmetric
    connectivity = 0.5 * (connectivity + connectivity.T)

    # ============
    # Create cluster objects
    # ============
    ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])
    ward = cluster.AgglomerativeClustering(
        n_clusters=params['n_clusters'], linkage='ward',
        connectivity=connectivity)
    spectral = cluster.SpectralClustering(
        n_clusters=params['n_clusters'], eigen_solver='arpack',
        affinity="nearest_neighbors")
    dbscan = cluster.DBSCAN(eps=params['eps'])
    affinity_propagation = cluster.AffinityPropagation(
        damping=params['damping'], preference=params['preference'])
    average_linkage = cluster.AgglomerativeClustering(
        linkage="average", affinity="cityblock",
        n_clusters=params['n_clusters'], connectivity=connectivity)
    birch = cluster.Birch(n_clusters=params['n_clusters'])
    gmm = mixture.GaussianMixture(
        n_components=params['n_clusters'], covariance_type='full')

    clustering_algorithms = (
        ('MiniBatchKMeans', two_means),
        ('AffinityPropagation', affinity_propagation),
        ('MeanShift', ms),
        ('SpectralClustering', spectral),
        ('Ward', ward),
        ('AgglomerativeClustering', average_linkage),
        ('Birch', birch),
        ('GaussianMixture', gmm)
    )

    for name, algorithm in clustering_algorithms:
        t0 = time.time()

        # catch warnings related to kneighbors_graph
        with warnings.catch_warnings():
            warnings.filterwarnings(
                "ignore",
                message="the number of connected components of the " +
                "connectivity matrix is [0-9]{1,2}" +
                " > 1. Completing it to avoid stopping the tree early.",
                category=UserWarning)
            warnings.filterwarnings(
                "ignore",
                message="Graph is not fully connected, spectral embedding" +
                " may not work as expected.",
                category=UserWarning)
            algorithm.fit(X)

        t1 = time.time()
        if hasattr(algorithm, 'labels_'):
            y_pred = algorithm.labels_.astype(np.int)
        else:
            y_pred = algorithm.predict(X)

        plt.subplot(len(datasets), len(clustering_algorithms), plot_num)
        if i_dataset == 0:
            plt.title(name, size=18)

        colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
                                             '#f781bf', '#a65628', '#984ea3',
                                             '#999999', '#e41a1c', '#dede00']),
                                      int(max(y_pred) + 1))))
        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

        plt.xlim(-2.5, 2.5)
        plt.ylim(-2.5, 2.5)
        plt.xticks(())
        plt.yticks(())
        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
                 transform=plt.gca().transAxes, size=15,
                 horizontalalignment='right')
        plot_num += 1

plt.show()

In [None]:
import time

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

from sklearn.feature_extraction import image
from sklearn.cluster import spectral_clustering

for xi in range(0,15):
    face=np.reshape(im1[xi,:],(75,75))
    face= face-face.min()
    face= face/face.max()*255
    # Resize it to 10% of the original size to speed up the processing
    face = sp.misc.imresize(face, 0.5) / 255.

    # Convert the image into a graph with the value of the gradient on the
    # edges.
    graph = image.img_to_graph(face)

    # Take a decreasing function of the gradient: an exponential
    # The smaller beta is, the more independent the segmentation is of the
    # actual image. For beta=1, the segmentation is close to a voronoi
    beta = 2
    eps = 1e-6
    graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps

    # Apply spectral clustering (this step goes much faster if you have pyamg
    # installed)
    N_REGIONS = 2

    for assign_labels in ('kmeans', 'discretize'):
        t0 = time.time()
        labels = spectral_clustering(graph, n_clusters=N_REGIONS,
                                 assign_labels=assign_labels, random_state=1)
        t1 = time.time()
        labels = labels.reshape(face.shape)

        plt.figure(figsize=(5, 5))
        plt.imshow(face, cmap=plt.cm.gray)
        for l in range(N_REGIONS):
            plt.contour(labels == l, contours=1,
                        colors=[plt.cm.spectral(l / float(N_REGIONS))])
        plt.xticks(())
        plt.yticks(())
        title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
        print(title)
        plt.title(title)
    plt.show()

In [None]:
from sklearn.metrics.pairwise import cosine_similarity


for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))
    Ui,si,Vi=np.linalg.svd(image)
    imagecore=Ui[:,:3].dot(Vi[:,:3].T)
    image=np.corrcoef(imagecore)
    imainv=np.linalg.inv(image)    
    imacos=cosine_similarity(Ui[:,:3],Vi[:,:3])

    fig, ax = plt.subplots(1,2) 
    ax[0].imshow(imagecore, cmap='nipy_spectral',interpolation='nearest')
    ax[1].imshow( imacos, cmap='nipy_spectral', interpolation='nearest')
    ax[1].set_xlabel(trainDF.iloc[xi]['is_iceberg'])


In [None]:
import numpy as np
import matplotlib.pyplot as plt

from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
                                 denoise_wavelet, estimate_sigma)
from skimage import data, img_as_float, color
from skimage.util import random_noise


original =np.reshape(im1[xi,:],(75,75))
original = original-original.min()
original = (original+original.T)/2
#original = original * 255/original.max()
#print(original)
sigma = original.std()*4
noisy = random_noise(original, var=sigma**2)

fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(8, 5), sharex=True,
                       sharey=True, subplot_kw={'adjustable': 'box-forced'})

plt.gray()

# Estimate the average noise standard deviation across color channels.
sigma_est = estimate_sigma(noisy, multichannel=True, average_sigmas=True)
# Due to clipping in random_noise, the estimate will be a bit smaller than the
# specified sigma.
print("Estimated Gaussian noise standard deviation = {}".format(sigma_est))

ax[0, 0].imshow(noisy)
ax[0, 0].axis('off')
ax[0, 0].set_title('Noisy')
ax[0, 1].imshow(denoise_tv_chambolle(noisy, weight=0.1, multichannel=False))
ax[0, 1].axis('off')
ax[0, 1].set_title('TV')
ax[0, 2].imshow(denoise_bilateral(noisy, sigma_color=0.05, sigma_spatial=15,
                multichannel=False))
ax[0, 2].axis('off')
ax[0, 2].set_title('Bilateral')
ax[0, 3].imshow(denoise_wavelet(noisy, multichannel=False))
ax[0, 3].axis('off')
ax[0, 3].set_title('Wavelet denoising')

ax[1, 1].imshow(denoise_tv_chambolle(noisy, weight=0.2, multichannel=False))
ax[1, 1].axis('off')
ax[1, 1].set_title('(more) TV')
ax[1, 2].imshow(denoise_bilateral(noisy, sigma_color=0.1, sigma_spatial=15,
                multichannel=False))
ax[1, 2].axis('off')
ax[1, 2].set_title('(more) Bilateral')
ax[1, 3].imshow(denoise_wavelet(noisy, multichannel=True, convert2ycbcr=False))
ax[1, 3].axis('off')
ax[1, 3].set_title('Wavelet denoising\nin YCbCr colorspace')
ax[1, 0].imshow(original)
ax[1, 0].axis('off')
ax[1, 0].set_title('Original')

fig.tight_layout()

plt.show()

statistical
---
* above mean
*  normalized 

In [None]:
for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))
    fig, ax = plt.subplots(1,2) 
    ax[0].imshow(image*(image > image.mean()), cmap='nipy_spectral', interpolation='nearest')
    ax[1].imshow( ( image -image.min() )/ (image.max()-image.min() )*image.var(), cmap='nipy_spectral', interpolation='nearest')
    ax[1].set_xlabel(trainDF.iloc[xi]['is_iceberg'])


In [None]:
gaussian filter
---
pure noise filter


In [None]:
for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))
    fig, ax = plt.subplots(1,2) 
    image_gf=ndimage.gaussian_filter(image, 3)
    image_perf=image*(image*1.3 < image_gf)
    ax[0].imshow(image_gf)
    ax[1].imshow(image_perf)
    ax[1].set_xlabel(trainDF.iloc[xi]['is_iceberg'])


In [None]:
gaussian laplace
---

In [None]:
for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))
    image_gl=ndimage.filters.gaussian_laplace(image, image.std(), output=None, mode='reflect', cval=0.0)
    fig, ax = plt.subplots(1,2) 
    ax[0].imshow(image_gl)
    ax[1].imshow(image_gl, cmap='nipy_spectral', interpolation='nearest')
    ax[1].set_xlabel(trainDF.iloc[xi]['is_iceberg'])


In [None]:

#hsobel_text = filters.sobel_h(image)
#val = filters.threshold_otsu(image)

for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))    
    hist, bin_edges = np.histogram(image, bins=60)
    #print(hist.shape, bin_edges.shape)
    camera_equalized = exposure.equalize_hist(image)

    threshold = np.mean(image)-image.std()
    binary_img = image > threshold
    fig, ax = plt.subplots(1,4) 
    ax[0].hist(img, bins=20)
    ax[1].imshow(image*binary_img, cmap='nipy_spectral', interpolation='nearest')
    ax[3].imshow(camera_equalized)




In [None]:
from math import sqrt
from skimage import data
from skimage.feature import blob_dog, blob_log, blob_doh
from skimage.color import rgb2gray

import matplotlib.pyplot as plt

for xi in range(0,15):
    image=np.reshape(im1[xi,:],(75,75))
    image_gray = rgb2gray(image)
    blobs_log = blob_log(image_gray, max_sigma=30, num_sigma=10, threshold=.1)
    # Compute radii in the 3rd column.
    blobs_log[:, 2] = blobs_log[:, 2] * sqrt(2)
    blobs_dog = blob_dog(image_gray, max_sigma=30, threshold=.0001)
    blobs_dog[:, 2] = blobs_dog[:, 2] * sqrt(2)
    blobs_doh = blob_doh(image_gray, max_sigma=30, threshold=.1)
    blobs_list = [blobs_log, blobs_dog, blobs_doh]
    colors = ['yellow', 'lime', 'red']
    titles = ['Laplacian of Gaussian', 'Difference of Gaussian',
          'Determinant of Hessian']
    sequence = zip(blobs_list, colors, titles)

    fig, axes = plt.subplots(1, 3, figsize=(9, 3), sharex=True, sharey=True,
                         subplot_kw={'adjustable': 'box-forced'})
    ax = axes.ravel()

    for idx, (blobs, color, title) in enumerate(sequence):
        ax[idx].set_title(title)
        ax[idx].imshow(image, interpolation='nearest')
        for blob in blobs:
            y, x, r = blob
            c = plt.Circle((x, y), r, color=color, linewidth=2, fill=False)
            ax[idx].add_patch(c)
        ax[idx].set_axis_off()

    plt.tight_layout()
    plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage as ndi

from skimage import feature

for xi in range(0,15):
    # Generate noisy image of a square
    im = np.reshape(im1[xi,:],(75,75))
    im = ndimage.gaussian_filter(im, 3)

    # Compute the Canny filter for two values of sigma
    edges1 = feature.canny(im)
    edges2 = feature.canny(im, sigma=5)

    # display results
    fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(8, 3),
                                    sharex=True, sharey=True)

    ax1.imshow(im, cmap=plt.cm.gray)
    ax1.axis('off')
    ax1.set_title('noisy image', fontsize=20)

    ax2.imshow(edges1, cmap=plt.cm.seismic)
    ax2.axis('off')
    ax2.set_title('Canny filter, $\sigma=1$', fontsize=20)

    ax3.imshow(edges2, cmap=plt.cm.spectral)
    ax3.axis('off')
    #ax3.set_title('Canny filter, $\sigma=3$', fontsize=20)
    ax3.set_title(trainDF.iloc[xi]['is_iceberg'])        

    fig.tight_layout()

    plt.show()

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

# loading image
#img0 = cv2.imread('SanFrancisco.jpg',)
img0 = image = np.reshape(im1[9,:],(75,75))
from skimage.exposure import equalize_hist

equalized_image = equalize_hist(img0)


# remove noise
img = cv2.GaussianBlur(equalized_image,(3,3),0)

# convolute with proper kernels
laplacian = cv2.Laplacian(img,cv2.CV_64F)
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)  # x
sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)  # y

plt.subplot(2,2,1),plt.imshow(img,cmap = 'nipy_spectral')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'nipy_spectral')
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'nipy_spectral')
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'nipy_spectral')
plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])

plt.show()
from skimage.feature import corner_harris,corner_peaks

# More pyplot!
def show_corners(corners,image,title=None):
    """Display a list of corners overlapping an image"""
    fig = plt.figure()
    plt.imshow(image,cmap = 'gray')
    # Convert coordinates to x and y lists
    y_corner,x_corner = zip(*corners)
    plt.plot(x_corner,y_corner,'v') # Plot corners
    if title:
        plt.title(title)
    plt.xlim(0,image.shape[1])
    plt.ylim(image.shape[0],0) # Images use weird axes
    fig.set_size_inches(np.array(fig.get_size_inches()) * 1.5)
    plt.show()
    print ("Number of corners:",len(corners) )

# Make a checker board
checkers = np.zeros((100,100),dtype=np.bool)
ind = np.arange(100).reshape((10,10))[::2].flatten()
checkers[ind,:] = True
checkers[:,ind] = np.invert(checkers[:,ind])
checkers = np.where(checkers,1.,0.)

# Run Harris
checkers_corners = corner_peaks(corner_harris(checkers),min_distance=2)
show_corners(checkers_corners,checkers)
corners = corner_peaks(corner_harris(image),min_distance=2)
show_corners(corners,image,
             title="Harris Corner Algorithm")

In [None]:
U1,s1,V1  = np.linalg.svd(im1,full_matrices = 0)
#U2,s2,V2  = np.linalg.svd(im2,full_matrices = 0)
print(U1[:,:100].shape,V1.shape)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity


Singular reformed images
---

less (noise) is more (iceberg)

In [None]:


for rank in range(3,50,3):
    im1cs=cosine_similarity(U1[:,:rank],V1[:rank,:].T)
    image=np.reshape(im1cs[13,:],(75,75))
    fig, ax = plt.subplots(1,3) 
    ax[0].imshow(image)
    ax[1].imshow(image, cmap='nipy_spectral', interpolation='nearest')
    ax[2].imshow(image, cmap='gray', interpolation='nearest')



In [None]:
print(np.reshape(im1[13,:],(75,75)))
im1ce = exposure.equalize_hist(im1)
U1,s1,V1  = np.linalg.svd(im1ce,full_matrices = 0)

camera exposure equilizing
---

In [None]:
for rank in range(3,50,3):
    im1cs=cosine_similarity(U1[:,:rank],V1[:rank,:].T)
    image=np.reshape(im1cs[13,:],(75,75))
    fig, ax = plt.subplots(1,3) 
    ax[0].imshow(image)
    ax[1].imshow(image, cmap='nipy_spectral', interpolation='nearest')
    ax[2].imshow(image, cmap='gray', interpolation='nearest')

In [None]:
from sklearn.preprocessing import normalize
def distanc(X,Y):
    Z=X
    for yi in range(0,len(X)):
        Z[yi]=angle_between((X[yi],Y[yi],0),(1,0,0))
    return Z #np.reshape(Z,(75,75))

def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

ima=im1
for xi in range(0,len(im1)):
    xi1=np.reshape(im1[xi,:],(75,75))
    xi2=np.reshape(im2[xi,:],(75,75))
    ima[xi]=distanc(im1[xi,:],im2[xi,:])

In [None]:
U1,s1,V1  = np.linalg.svd(ima,full_matrices = 0)

for rank in range(3,50,3):
    im1cs=cosine_similarity(U1[:,:rank],V1[:rank,:].T)
    image=np.reshape(im1cs[13,:],(75,75))
    fig, ax = plt.subplots(1,3) 
    ax[0].imshow(image)
    ax[1].imshow(image, cmap='nipy_spectral', interpolation='nearest')
    ax[2].imshow(image, cmap='gray', interpolation='nearest')