<a href="https://colab.research.google.com/github/santoshr/FirstRepository/blob/master/FacialRecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
pip install -r requirements.txt

Collecting git+https://github.com/bedapudi6788/NudeNet (from -r requirements.txt (line 9))
  Cloning https://github.com/bedapudi6788/NudeNet to /tmp/pip-req-build-fomp4_mn
  Running command git clone --filter=blob:none --quiet https://github.com/bedapudi6788/NudeNet /tmp/pip-req-build-fomp4_mn
  Resolved https://github.com/bedapudi6788/NudeNet to commit 6ccc81c6c305cccfd46d92b414f8a5c0a816574d
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deepface (from -r requirements.txt (line 4))
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting pymongo (from -r requirements.txt (line 10))
  Downloading pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting flask-cors>=4.0.1 (from deepface->-r requirements.txt (line 4))
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface->-r requirements.txt (line 4))
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5

In [8]:
from ultralytics import YOLO
import cv2
import os
import torch
from nudenet import NudeDetector

# Check if GPU is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Initialize NudeNet's NudeClassifier on GPU if available
classifier = NudeDetector()

# Confidence threshold to filter out low-confidence detections
CONFIDENCE_THRESHOLD = 0.8
IOU_THRESHOLD = 0.5  # Avoid overlapping detections by increasing this threshold

filename = 'Amrapali S02E04.mp4'
video_folder = f'detections/{filename}'
faces = f'{video_folder}/faces'
flagged_nudity = f'{video_folder}/flagged_nudity'
# Create the detections directories if they don't exist
os.makedirs(video_folder, exist_ok=True)
os.makedirs(faces, exist_ok=True)
os.makedirs(flagged_nudity, exist_ok=True)

# Load the pretrained YOLOv8 model for face detection on GPU
model = YOLO('yolov8l-face.pt').to(device)

# Load the video file for frame-by-frame processing
video_capture = cv2.VideoCapture(filename)

frame_idx = 0  # Frame counter

# Stream the results from the video file
results = model(filename, stream=True, conf=CONFIDENCE_THRESHOLD, iou=IOU_THRESHOLD)

# Iterate over the streaming results and process each frame
for result in results:
    # Read the next frame from the video
    ret, frame = video_capture.read()
    if not ret:
        break  # Break the loop if no more frames are available

    # Move frame to GPU if available for NudeNet processing
    nudity_results = classifier.detect(frame) if device == 'cuda' else classifier.detect(frame)

    # Check for nudity in the frame using NudeNet
    for nudity_result in nudity_results:
        if nudity_result['class'] in ("BUTTOCKS_EXPOSED", "FEMALE_BREAST_EXPOSED", "FEMALE_GENITALIA_EXPOSED",
                                      "MALE_GENITALIA_EXPOSED", "FEMALE_BREAST_COVERED", "BUTTOCKS_COVERED") and nudity_result['score'] > 0.6:
            print(f"Nudity detected in frame {frame_idx} with class = {nudity_result['class']} and score = {nudity_result['score']}")

            # Save the frame with nudity in a separate directory
            nudity_path = f"{flagged_nudity}/frame_{frame_idx}_{nudity_result['class']}_{nudity_result['score']:.2f}.jpg"
            cv2.imwrite(nudity_path, frame)
            # Skip further processing for this frame if nudity is detected
            continue

    # Extract bounding boxes and confidence scores from the detection results
    detections = result.boxes
    boxes = detections.xyxy.to('cpu') if device == 'cuda' else detections.xyxy  # Bounding box coordinates
    confidences = detections.conf.to('cpu') if device == 'cuda' else detections.conf  # Confidence scores

    # Iterate over each detection in the frame
    for i, (box, conf) in enumerate(zip(boxes, confidences)):
        if conf >= CONFIDENCE_THRESHOLD:  # Only process high-confidence detections
            print(f"CONFIDENCE = {conf}")
            x1, y1, x2, y2 = map(int, box)  # Get bounding box coordinates
            cropped_face = frame[y1:y2, x1:x2]  # Crop the detected face from the frame

            # Resize the face image to (160, 160)
            resized_face = cv2.resize(cropped_face, (160, 160))

            # Define the save path for each cropped face
            cropped_face_path = f'{faces}/frame_{frame_idx}_face_{i}_{conf:.2f}.jpg'

            # Save the resized cropped face
            cv2.imwrite(cropped_face_path, resized_face)

    # Save the original frame with bounding boxes drawn
    frame_save_path = f'{video_folder}/frame_{frame_idx}.jpg'
    result.plot()  # Draw bounding boxes on the frame
    result.save(frame_save_path)  # Save the frame with detections

    frame_idx += 1  # Increment the frame counter

# Release the video capture object
video_capture.release()

print("Processing complete. Detections saved in the 'detections' folder.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
CONFIDENCE = 0.803111732006073
video 1/1 (frame 30841/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 44.7ms
CONFIDENCE = 0.850353479385376
CONFIDENCE = 0.8008624911308289
video 1/1 (frame 30842/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 43.8ms
CONFIDENCE = 0.8484117388725281
CONFIDENCE = 0.8098267316818237
video 1/1 (frame 30843/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 45.3ms
CONFIDENCE = 0.8554372191429138
CONFIDENCE = 0.8140369653701782
video 1/1 (frame 30844/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 43.7ms
CONFIDENCE = 0.8489636182785034
CONFIDENCE = 0.8121837973594666
video 1/1 (frame 30845/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 44.3ms
CONFIDENCE = 0.8422912955284119
CONFIDENCE = 0.8150705099105835
video 1/1 (frame 30846/34169) /content/Amrapali S02E04.mp4: 544x960 2 faces, 44.1ms
CONFIDENCE = 0.8382552266120911
CONFIDENCE = 0.8341455459594727
video 1/1 (frame 

In [None]:
from deepface import DeepFace
import os
import cv2
import numpy as np
import pickle
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity


# Path to the directory containing cropped face images
filename = 'Amrapali S02E04.mp4'
video_folder = f'detections/{filename}'
face_dir = f'{video_folder}/faces'

# Initialize a list to store the embeddings


# Function to generate embedding for a given image, with augmentations
def get_stable_embedding(img):
    # Original embedding
    base_embedding = DeepFace.represent(img, model_name="ArcFace", enforce_detection=False)[0]['embedding']

    # Augmented embeddings
    rotated_image_15 = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    embedding_15 = DeepFace.represent(rotated_image_15, model_name="ArcFace", enforce_detection=False)[0]['embedding']

    flipped_image = cv2.flip(img, 1)
    embedding_flip = DeepFace.represent(flipped_image, model_name="ArcFace", enforce_detection=False)[0]['embedding']

    # Average embeddings to create a more stable representation
    stable_embedding = np.mean([base_embedding, embedding_15, embedding_flip], axis=0)

    # Normalize the stable embedding to unit length
    stable_embedding = stable_embedding / np.linalg.norm(stable_embedding)

    return stable_embedding

def save_embeddings(face_dir, pickle_path='face_embeddings.pkl'):
    embeddings = []
    image_paths = []
# Iterate through each image in the directory
    for filename in os.listdir(face_dir):
        if filename.endswith(".jpg"):  # Process only .jpg files
            img_path = os.path.join(face_dir, filename)
            image_paths.append(img_path)

            # Load the image
            img = cv2.imread(img_path)

            # Extract and store the stable embedding
            embedding = get_stable_embedding(img)
            embeddings.append(embedding)

    # Normalize all embeddings to have unit length, ensuring consistency
    embeddings = normalize(embeddings, norm='l2')

    # Now we have the embeddings and corresponding image paths
    print(f"Extracted {len(embeddings)} face embeddings with augmentation and normalization.")

    # Save the embeddings and image paths for use in clustering
    with open(pickle_path, 'wb') as f:
        pickle.dump((embeddings, image_paths), f)

    print("Embeddings saved to face_embeddings.pkl")
    return embeddings, image_paths

save_embeddings(face_dir, pickle_path='face_embeddings.pkl')


24-11-04 13:39:53 - Directory /root/.deepface has been created
24-11-04 13:39:53 - Directory /root/.deepface/weights has been created
24-11-04 13:39:56 - arcface_weights.h5 will be downloaded to /root/.deepface/weights/arcface_weights.h5


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5
To: /root/.deepface/weights/arcface_weights.h5
100%|██████████| 137M/137M [00:03<00:00, 39.3MB/s]


In [None]:
import pickle
from sklearn.cluster import AgglomerativeClustering
import os
import shutil
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt

def find_optimal_clusters_silhouette(embeddings, max_k=10):
    silhouette_scores = []
    K = range(2, max_k + 1)  # Start from 2 clusters

    for k in K:
        agg_cluster = AgglomerativeClustering(n_clusters=k, linkage='ward')
        labels = agg_cluster.fit_predict(embeddings)
        score = silhouette_score(embeddings, labels)
        silhouette_scores.append(score)

    # Plot the silhouette scores
    plt.figure(figsize=(8, 6))
    plt.plot(K, silhouette_scores, 'bx-')
    plt.xlabel('Number of clusters')
    plt.ylabel('Silhouette Score')
    plt.title('Silhouette Score Method for Optimal K')
    plt.show()

    return silhouette_scores



def plot_dendrogram(embeddings, method='ward'):
    # Perform hierarchical clustering and calculate linkage matrix
    Z = linkage(embeddings, method=method)

    # Plot the dendrogram
    plt.figure(figsize=(10, 7))
    dendrogram(Z)
    plt.title('Dendrogram for Optimal Cluster Selection')
    plt.xlabel('Sample index')
    plt.ylabel('Distance')
    plt.show()



# Load the face embeddings and image paths from the pickle file
with open('face_embeddings.pkl', 'rb') as f:
    embeddings, image_paths = pickle.load(f)

# plot_dendrogram(embeddings)
# find_optimal_clusters_silhouette(embeddings)

# Number of clusters (adjust based on the expected number of people)
num_clusters = 10  # Change this to your preference

# Apply Agglomerative clustering
agg_clustering = AgglomerativeClustering(n_clusters=num_clusters, linkage='ward')
labels = agg_clustering.fit_predict(embeddings)

# Create a directory for storing clustering results
output_dir = 'agg_clusters'
os.makedirs(output_dir, exist_ok=True)

# Organize images into cluster folders
for i, label in enumerate(labels):
    cluster_dir = os.path.join(output_dir, f'cluster_{label}')
    os.makedirs(cluster_dir, exist_ok=True)

    # Copy the image to the corresponding cluster folder
    shutil.copy(image_paths[i], cluster_dir)

print(f"Agglomerative clustering complete. Images organized in '{output_dir}'.")
