In [None]:
import numpy as np
import cv2
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D
from sklearn.cluster import KMeans
from tensorflow.keras.applications.vgg16 import preprocess_input


def load_cifar10():
    # Load the CIFAR-10 dataset
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    return x_train, y_train, x_test, y_test

def preprocess_cifar10(x_train, x_test, batch_size=32):
    # Resize the images to 128x128 pixels
    x_train_resized = np.zeros((len(x_train), 128, 128, 3), dtype=np.uint8)
    x_test_resized = np.zeros((len(x_test), 128, 128, 3), dtype=np.uint8)
    for i in range(0, len(x_train), batch_size):
        batch = x_train[i:i+batch_size]
        x_train_resized[i:i+batch_size] = np.array([cv2.resize(img, (128, 128)) for img in batch])
    for i in range(0, len(x_test), batch_size):
        batch = x_test[i:i+batch_size]
        x_test_resized[i:i+batch_size] = np.array([cv2.resize(img, (128, 128)) for img in batch])

    # Preprocess the images for the VGG16 model
    x_train_preprocessed = np.zeros((len(x_train_resized), 128, 128, 3), dtype=np.float32)
    x_test_preprocessed = np.zeros((len(x_test_resized), 128, 128, 3), dtype=np.float32)
    for i in range(0, len(x_train_resized), batch_size):
        batch = x_train_resized[i:i+batch_size]
        x_train_preprocessed[i:i+batch_size] = np.array([preprocess_input(img) for img in batch])
    for i in range(0, len(x_test_resized), batch_size):
        batch = x_test_resized[i:i+batch_size]
        x_test_preprocessed[i:i+batch_size] = np.array([preprocess_input(img) for img in batch])

    return x_train_preprocessed, x_test_preprocessed


# Load the dataset
x_train, y_train, x_test, y_test = load_cifar10()

# Preprocess the dataset
x_train_preprocessed, x_test_preprocessed = preprocess_cifar10(x_train, x_test, batch_size=32)


# Load the pre-trained VGG16 model
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

# Use the VGG16 model to extract features from the images
x_train_features = vgg16.predict(x_train_preprocessed)
x_test_features = vgg16.predict(x_test_preprocessed)

# Flatten the feature vectors
x_train_flattened = x_train_features.reshape(x_train_features.shape[0], -1)
x_test_flattened = x_test_features.reshape(x_test_features.shape[0], -1)

# Perform k-means clustering on the feature vectors
kmeans = KMeans(n_clusters=10, random_state=42)
y_train_pred = kmeans.fit_predict(x_train_flattened)

# Compute the purity score of the clustering
def purity_score(y_true, y_pred):
    # Compute the contingency matrix
    contingency = np.zeros((10, 10))
    for i in range(len(y_true)):
        contingency[int(y_true[i]), int(y_pred[i])] += 1

    # Compute the purity score
    purity = np.sum(np.amax(contingency, axis=1)) / np.sum(contingency)
    return purity

purity = purity_score(y_train, y_train_pred)
print("The purity score of the clustering is:", purity)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Choose two images from the training set 
img1 = x_train_preprocessed[2]
img2 = x_train_preprocessed[2]

# Extract the features of the two images using the pre-trained VGG16 model
img1_features = vgg16.predict(img1.reshape(1, 128, 128, 3)).flatten()
img2_features = vgg16.predict(img2.reshape(1, 128, 128, 3)).flatten()

# Calculate the cosine similarity between the feature vectors of the two images
similarity = cosine_similarity(img1_features.reshape(1, -1), img2_features.reshape(1, -1))[0][0]

print("The cosine similarity between the two images is:", similarity)



In [None]:
import matplotlib.pyplot as plt

# Display the first image in the training set
plt.imshow(x_train[2])
plt.show()

# Display the second image in the training set
plt.imshow(x_train[2])
plt.show()