## Import Libraries:

In [7]:
import os
import cv2
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from PIL import Image as Im
from PIL import ImageTk, ImageOps
import tkinter as tk
from tkinter import messagebox
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from IPython.display import display

## All defined functions:

In [8]:
# Step 1: Read the folder
def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        if filename.endswith(('.jpg', '.png', '.jpeg')):
            img = cv2.imread(os.path.join(folder_path, filename))
            if img is not None:
                images.append((filename, img))
    return images

# Step 2: Process images and collect face data
def detect_faces(images):
    face_data = []
    #face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    face_cascade = cv2.CascadeClassifier('C:\\Users\\ravik\\Documents\\GitHub\\opencv\\data\\haarcascades\\haarcascade_frontalface_default.xml')
    tempnum = 0
    for filename, img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)
        for (x, y, w, h) in faces:
            face = gray[y:y+h, x:x+w]    
            resized_face = cv2.resize(face, (180, 180))  # Define new_width and new_height   
            tempnum = tempnum+1
            faceId = "face"+str(tempnum)
            face_data.append((faceId, filename, resized_face))
    return face_data

current_image_index = 0
filename = ""
user_choices = {}
def show_image(face_data):
    global current_image_index, user_choices, filename
    #filename, faces = face_data
    # Initialize variables
    current_image_index = 0

    # Function to show the next image
    def show_next_image(face_data):
        global current_image_index
        if current_image_index < len(face_data):
            face = face_data[current_image_index][2]
            load_and_display_image(face)
            current_image_index += 1
        else:
            messagebox.showinfo("Finished","All images are done!")
            root.destroy()

    # Function to load and display an image
    def load_and_display_image(face):
        image1 = Im.fromarray(face)
        image2 = image1.resize((300, 300))  # Resize the image to fit the window
        photo = ImageTk.PhotoImage(image2)
        image_label.config(image=photo)
        image_label.image = photo  # Keep a reference to prevent garbage collection

    # Function to handle the "Yes" button
    def answer_yes():
        faceId = face_data[current_image_index-1][0]
        user_choices[faceId] = 1
        show_next_image(face_data)

    # Function to handle the "No" button
    def answer_no():
        faceId = face_data[current_image_index-1][0]
        user_choices[faceId] = 0
        show_next_image(face_data)

    # Create the main window
    root = tk.Tk()
    root.title("Image Viewer")

    # Create a label to display the images
    image_label = tk.Label(root)
    image_label.pack()

    # Create "Yes" and "No" buttons
    yes_button = tk.Button(root, text="Yes", command=answer_yes)
    no_button = tk.Button(root, text="No", command=answer_no)
    yes_button.pack(side=tk.LEFT)
    no_button.pack(side=tk.RIGHT)

    # Show the first image
    show_next_image(face_data)

    # Start the Tkinter main loop
    root.mainloop()

# Step 4: Append user choices to the existing dataframe
def append_user_choices(dataframe, user_choices):
    for faceId in user_choices:
        dataframe.loc[dataframe['FaceId'] == faceId, 'Consider'] = user_choices[faceId]
    return dataframe

# Step 5: Prepare a convolution model for face recognition
def create_face_recognition_model(input_shape):
    model = keras.Sequential([
        keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(2, activation='sigmoid')  # Binary classification (consider or not)
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def show_verified_result(faces):
    model_output = model.predict(faces)
    
    # Assuming 'predictions' is your array of probabilities
    predictions = model_output

    # Apply a threshold of 0.5 to convert probabilities to binary predictions
    binary_predictions = (predictions >= 0.5).astype(int)
    
    # showing the results with images
    counter=0
    for imageName in faces:
        NewImage = Im.fromarray(imageName)
        #index_val = X_test.index(imageName)
        if binary_predictions[counter] == 1:
            expanded_im = ImageOps.expand(NewImage, border=20, fill='red')
            display(expanded_im)
        else:
            display(NewImage)
        counter=counter+1
    return binary_predictions


def identify_images(folder_path):
    images = load_images_from_folder(folder_path)

    face_data = detect_faces(images)

    df_final = pd.DataFrame(data={
        'FaceId': [faceId for faceId, _, _ in face_data],
        'Filename': [filename for _, filename, _ in face_data],
        'Face': [face for _, _, face in face_data],
        'Consider': [0] * len(face_data)
    })

    # Convert the 'Face' column into NumPy arrays
    X = np.array(df_final['Face'].tolist())

    user_choices = show_verified_result(X)
    df_final['Consider'] = user_choices
    return df_final

def move_images(source,destination):

    # gather all files
    allfiles = os.listdir(source)

    # iterate on all files to move them to destination folder
    for f in allfiles:
        #consider_value = df_final.loc[df_final['Filename'] == f, 'Consider'].iloc[0]
        #print(consider_value)
        if (df_final.loc[df_final['Filename'] == f, 'Consider'] == 1).any():
            print("consider as 1")
            src_path = os.path.join(source, f)
            dst_path = os.path.join(destination, f)
            os.rename(src_path, dst_path)



## Read the files and process them:

In [9]:
if __name__ == "__main__":
    source = "C:\\Users\\ravik\\Desktop\\My images\\test"
    destination = 'C:\\Users\\ravik\\Desktop\\My images\\my pics'
    
    # Step 1: Read the folder
    images = load_images_from_folder(source)
    
    # Step 2: Process images and collect face data
    face_data = detect_faces(images)
    # Create a DataFrame to store face data
    df = pd.DataFrame(data={
        'FaceId': [faceId for faceId, _, _ in face_data],
        'Filename': [filename for _, filename, _ in face_data],
        'Face': [face for _, _, face in face_data],
        'Consider': [0] * len(face_data)
    })
    print(df)
    
    show_image(face_data)        

    # Step 4: Append user choices to the existing dataframe
    df = append_user_choices(df, user_choices)
    df
    
    # Step 5: Prepare a convolution model for face recognition

    X = np.array(df['Face'].tolist())
    y = np.array(df['Consider'].tolist())
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    input_shape = X_train[0].shape

    FaceId                 Filename  \
0    face1      20161006_222700.jpg   
1    face2      20161007_203010.jpg   
2    face3      20161007_203010.jpg   
3    face4      20161007_203010.jpg   
4    face5      20161007_203010.jpg   
..     ...                      ...   
74  face75  IMG_20220905_163224.jpg   
75  face76  IMG_20220905_163224.jpg   
76  face77  IMG_20220905_163224.jpg   
77  face78  IMG_20220905_163224.jpg   
78  face79  IMG_20220905_163224.jpg   

                                                 Face  Consider  
0   [[88, 87, 83, 81, 80, 85, 85, 77, 84, 80, 78, ...         0  
1   [[254, 254, 250, 254, 252, 254, 243, 210, 157,...         0  
2   [[174, 174, 174, 175, 175, 175, 175, 175, 175,...         0  
3   [[111, 87, 80, 76, 65, 64, 54, 46, 37, 31, 33,...         0  
4   [[185, 184, 186, 186, 186, 186, 185, 185, 184,...         0  
..                                                ...       ...  
74  [[217, 217, 205, 189, 174, 158, 143, 127, 111,...         0  
75 

## Train the data with CNN

In [4]:
# Assume df is your DataFrame containing the data

# Convert the 'Face' column into NumPy arrays
X = df['Face'].tolist()

# Filter out empty arrays and get the maximum length for non-empty arrays
non_empty_X = [x for x in X if len(x) > 0]

# Determine the maximum sequence length (number of values in each list)
max_length = max(len(lst) for lst in non_empty_X)

# Pad the sequences to make them uniform in length
X_padded = pad_sequences(non_empty_X, maxlen=max_length, padding='post', dtype='float32')

# Assuming the 'Consider' column represents the labels
y = df['Consider'].values

# Create and train the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(max_length, len(X_padded[0]), 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Reshape the input data for CNN
X_padded = X_padded.reshape(X_padded.shape[0], X_padded.shape[1], X_padded.shape[2], 1)

# Step 6: Pass the dataframe to the convolution model
# Train the model
model.fit(X_padded, y, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 1.0


In [6]:
# Assuming 'predictions' is your array of probabilities
predictions = model.predict(X_test)

# Apply a threshold of 0.5 to convert probabilities to binary predictions
binary_predictions = (predictions >= 0.5).astype(int)



## Identify and move the images:

In [None]:
df_final = identify_images(source)
print(df_final)

move_images(source,destination)