In [None]:
# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# clustering and dimension reduction
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [75]:
AliPC = True
KianaPC = False
path=""

if AliPC:
  # path = r"E:\\University\\Term 8\\Computational Intelligence\\Project\\1\\Main-Project\\ML\\dataset"
  path = r"E:\\University\\Term 8\\Computational Intelligence\\Project\\1\\Main-Project\\ML\\dataset\\original-dataset"
else:
  path = r"/content/drive/MyDrive/dataset"


# change the working directory to the path where the images are located
os.chdir(path)

# this list holds all the image filename
images = []

# creates a ScandirIterator aliased as files
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        # if file.name.endswith('.jpg'):
          # adds only the image files to the images list
            images.append(file.name)

In [None]:
if KianaPC:
    from google.colab import drive
    drive.mount('/content/drive')

In [76]:
print(images[:-1])

['subject01.centerlight', 'subject01.glasses', 'subject01.happy', 'subject01.leftlight', 'subject01.noglasses', 'subject01.normal', 'subject01.rightlight', 'subject01.sad', 'subject01.sleepy', 'subject01.surprised', 'subject01.wink', 'subject02.centerlight', 'subject02.glasses', 'subject02.happy', 'subject02.leftlight', 'subject02.noglasses', 'subject02.normal', 'subject02.rightlight', 'subject02.sad', 'subject02.sleepy', 'subject02.surprised', 'subject02.wink', 'subject03.centerlight', 'subject03.glasses', 'subject03.happy', 'subject03.leftlight', 'subject03.noglasses', 'subject03.normal', 'subject03.rightlight', 'subject03.sad', 'subject03.sleepy', 'subject03.surprised', 'subject03.wink', 'subject04.centerlight', 'subject04.glasses', 'subject04.happy', 'subject04.leftlight', 'subject04.noglasses', 'subject04.normal', 'subject04.rightlight', 'subject04.sad', 'subject04.sleepy', 'subject04.surprised', 'subject04.wink', 'subject05.centerlight', 'subject05.glasses', 'subject05.happy', 's

In [77]:
# load the model first and pass as an argument
model = VGG16()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

def extract_features(file, model):
    # load the image as a 224x224 array
    img = load_img(file, target_size=(224,224))
    # convert from 'PIL.Image.Image' to numpy array
    img = np.array(img) 
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1,224,224,3) 
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx)
    return features

In [93]:
data = {}

if AliPC:
    p = r"E:\\University\\Term 8\\Computational Intelligence\\Project\\1\\Main-Project\\ML\\dataset\\dataset.pkl"
else:
    p = r"/content/drive/MyDrive/dataset.pkl"

    
# loop through each image in the dataset
for image in images:
    # try to extract the features and update the dictionary
    try:
        feat = extract_features(image,model)
        data[image] = feat
        print(image)
    # if something fails, save the extracted features as a pickle file (optional)
    except Exception as ex:
        with open(p,'wb') as file:
            pickle.dump(data,file)
            print(f"Exception: {ex}")
          
with open(p,'wb') as file:
    pickle.dump(data,file)
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat = np.array(list(data.values()))
feat.shape
(210, 1, 4096)

# reshape so that there are 210 samples of 4096 vectors
feat = feat.reshape(-1,4096)
feat.shape
(210, 4096)

# get the unique labels (from the image_labels.csv)
# df = pd.read_csv('image_labels.csv')
# label = df['label'].tolist()
# unique_labels = list(set(label))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
subject01.centerlight
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
subject01.glasses
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
subject01.happy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
subject01.leftlight
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
subject01.noglasses
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
subject01.normal
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
subject01.rightlight
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
subject01.sad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
subject01.sleepy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
subject01.surprised
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
subject01.wi

(210, 4096)

In [None]:
pca = PCA(n_components=165, random_state=22)
pca.fit(feat)
x = pca.transform(feat)

print(f"Components before PCA: {feat.shape[1]}")
print(f"Components after PCA: {pca.n_components}")

Components before PCA: 4096
Components after PCA: 165


In [92]:
kmeans = KMeans(n_clusters=15, random_state=22)
kmeans.fit(x)

# kmeans.labels_

In [94]:
# holds the cluster id and the images { id: [images] }
groups = {}
for file, cluster in zip(filenames,kmeans.labels_):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(file)
    else:
        groups[cluster].append(file)


# view the filenames in cluster clusterNum
clusterNum = 2
groups[clusterNum]

# for image in groups[clusterNum]:
#     im = mpl.image.imread(path+'\\\\'+image)
#     plt.imshow(im)
#     print(path+'\\\\'+image)



['subject01.rightlight',
 'subject03.rightlight',
 'subject04.rightlight',
 'subject05.rightlight',
 'subject08.rightlight',
 'subject09.rightlight',
 'subject11.leftlight',
 'subject15.leftlight']

In [64]:
import shutil

# Create a folder to save the images
if AliPC:
    output_folder = "E:\\University\\Term 8\\Computational Intelligence\\Project\\1\\Main-Project\\ML\\dataset\\output\\"
else:
    output_folder = "/content/drive/MyDrive/output"

os.makedirs(output_folder, exist_ok=True)

# holds the cluster id and the images { id: [images] }
groups = {}
for file, cluster in zip(filenames, kmeans.labels_):
    if cluster not in groups.keys():
        groups[cluster] = []
    groups[cluster].append(file)

# Save the images
for cluster_id, images in groups.items():
    cluster_folder = os.path.join(output_folder, f"cluster_{cluster_id}")
    os.makedirs(cluster_folder, exist_ok=True)
    for image_file in images:
        shutil.copy(image_file, cluster_folder)

print("Images saved successfully.")


Images saved successfully.
