In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from keras.src.legacy.saving import legacy_h5_format

In [2]:
import os
import random
import shutil
  
# Paths
src_folder = r"D:\Automatic_Face_Labelling\Dataset\UTKFace"  # where your original UTKFace images are
labeled_folder = r"D:\Automatic_Face_Labelling\Dataset\labeled"
unlabeled_folder = r"D:\Automatic_Face_Labelling\Dataset\unlabeled"

os.makedirs(labeled_folder, exist_ok=True)
os.makedirs(unlabeled_folder, exist_ok=True) 

# Percentage of dataset to make "unlabeled"
UNLABELED_SPLIT = 0.5  # 50% unlabeled

# Get all images
images = [f for f in os.listdir(src_folder) if f.lower().endswith(('.jpg', '.png'))]
random.shuffle(images)

# Split
split_index = int(len(images) * UNLABELED_SPLIT)
unlabeled_imgs = images[:split_index]
labeled_imgs = images[split_index:]

# Move files
for img in labeled_imgs:
    shutil.copy(os.path.join(src_folder, img), os.path.join(labeled_folder, img))

for img in unlabeled_imgs:
    shutil.copy(os.path.join(src_folder, img), os.path.join(unlabeled_folder, img))

print(f"Labeled: {len(labeled_imgs)}, Unlabeled: {len(unlabeled_imgs)}")


Labeled: 11854, Unlabeled: 11854


In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing import image

# ====== 1. Paths ======
labeled_dir = r"D:\Automatic_Face_Labelling\Dataset\labeled"
unlabeled_dir = r"D:\Automatic_Face_Labelling\Dataset\unlabeled"

# ====== 2. Load Labeled Images from UTKFace ======
def load_utkface_images(folder, target_size=(64, 64)):
    images = []
    labels = []

    for filename in os.listdir(folder):
        if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue

        try:
            # UTKFace format: [age]_[gender]_[race]_[date&time].jpg
            parts = filename.split("_")
            if len(parts) < 4:  # skip malformed names
                continue
            race = int(parts[2])  # ethnicity is the 3rd value

            # Load image
            file_path = os.path.join(folder, filename)
            img = image.load_img(file_path, target_size=target_size)
            img_array = image.img_to_array(img) / 255.0

            images.append(img_array)
            labels.append(race)

        except Exception as e:
            print("Skipping:", filename, "Error:", e)

    return np.array(images), np.array(labels)

# Load labeled data
X_labeled, y_labeled = load_utkface_images(labeled_dir, target_size=(64, 64))

# ====== 3. Load Unlabeled Images (no labels) ======
def load_images_unlabeled(folder, target_size=(64, 64)):
    images = []
    for filename in os.listdir(folder):
        if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue
        file_path = os.path.join(folder, filename)
        img = image.load_img(file_path, target_size=target_size)
        img_array = image.img_to_array(img) / 255.0
        images.append(img_array)
    return np.array(images)

X_unlabeled = load_images_unlabeled(unlabeled_dir, target_size=(64, 64))

print("Labeled images shape:", X_labeled.shape)
print("Labeled ethnicity labels shape:", y_labeled.shape)
print("Unique ethnicity labels:", np.unique(y_labeled))
print("Unlabeled images shape:", X_unlabeled.shape)


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def preprocess_ethnicity_image(img):
  
    img = cv2.resize(img, (128, 128))
 
    return img

# Preprocess your dataset
X_unlabeled_ethnicity = np.array([preprocess_ethnicity_image(img) for img in X_unlabeled])
X_labeled_ethnicity = np.array([preprocess_ethnicity_image(img) for img in X_labeled])

# Visualization
def visualize_ethnicity_images(images, num=10):
    plt.figure(figsize=(15, 5))
    for i in range(num):
        plt.subplot(2, num//2, i+1)
        img = images[i]
        plt.imshow(img)   # Already RGB + normalized
        plt.axis("off")
        plt.title(f"Image {i}")
    plt.show()

# Example usage
visualize_ethnicity_images(X_unlabeled_ethnicity, num=10)


In [None]:
from tensorflow.keras.models import load_model
import numpy as np

# Load your pre-trained models

ethnicity_model = load_model(r"D:\Automatic_Face_Labelling\models\nationality_model.h5")

ethnicity_preds = ethnicity_model.predict(X_unlabeled_ethnicity)

ethnicity_pseudo = np.argmax(ethnicity_preds, axis=1)


print("Pseudo labels generated.")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def display_images_with_ethnicity(images, ethnicity_preds, num=10, class_names=None):
    preds = np.argmax(ethnicity_preds, axis=1)

    plt.figure(figsize=(15, 6))
    for i in range(num):
        idx = np.random.randint(0, len(images))
        img = images[idx]

        # Convert to 0–255 range for display
        disp_img = (img * 255).astype("uint8")

        label = preds[idx]
        title = f"Pred: {label}"
        if class_names is not None and label < len(class_names):
            title = f"Pred: {class_names[label]}"

        plt.subplot(2, num//2, i+1)
        plt.imshow(disp_img)
        plt.axis("off")
        plt.title(title, fontsize=10)

    plt.tight_layout()
    plt.show()


In [None]:
# Suppose you have ethnicity_preds from your model
class_names = ["African", "American", "Asian", "Indian", "Others"]  # adjust to your dataset

display_images_with_ethnicity(X_unlabeled_ethnicity, ethnicity_preds, num=10, class_names=class_names)


In [None]:
confidences = np.max(ethnicity_preds, axis=1)
mask = confidences > 0.8

X_confident = X_unlabeled_ethnicity[mask]
y_confident = ethnicity_pseudo[mask]


In [None]:
# Combine labeled and pseudo-labeled data
X_combined = np.concatenate([X_labeled_ethnicity, X_confident], axis=0)
y_combined = np.concatenate([y_labeled, ethnicity_pseudo], axis=0)

print("Final dataset shape:", X_combined.shape, y_combined.shape)


In [None]:
print("Images:", X_combined.shape)
print("Labels:", y_combined.shape)

# Ensure same length
min_len = min(len(X_combined), len(y_combined))
X_combined = X_combined[:min_len]
y_combined = y_combined[:min_len]


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# Suppose you have these
# labeled_img : numpy array of shape (N, 128, 128, 3)
# labels      : numpy array of shape (N,)   # ethnicity class labels

# Split into training (80%) and test (20%)
X_train, X_test, y_train, y_test = train_test_split(
   X_combined, y_combined,
    test_size=0.2, random_state=42, stratify=y_combined  # stratify keeps class balance
)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)


In [None]:
# Normalize

# One-hot encode labels if using categorical crossentropy
from tensorflow.keras.utils import to_categorical

num_classes = len(np.unique(y_combined))  # number of ethnicity classes
y_train = to_categorical(y_train, num_classes)
y_test  = to_categorical(y_test, num_classes)

print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


In [None]:
from tensorflow.keras.optimizers import Adam
ethnicity_model = load_model("Ethnicity_lebelling.h5")
ethnicity_model.compile(optimizer=Adam(1e-4),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

history = ethnicity_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=64
)


In [None]:
  ethnicity_model.save("Ethnicity_lebelling.h5")