In [1]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
import splitfolders
import cv2
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import layers, models
import kagglehub
import os
import random
import shutil
import pickle
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import seaborn as sns

In [3]:
#constraints
#classes
max_classes_num = 130

#images
img_size = (224,224)
batch_size = 32

#files settings paths, model/classes files
download_dir = kagglehub.dataset_download("thedevastator/anime-face-dataset-by-character-name")
input_dir = "/kaggle/modifiedInput/AnimeFaceRecognition"
output_dir = "/kaggle/output/AnimeFaceRecognition"
dataset_source_dir = os.path.join(download_dir, "dataset")

#variables
#classes
classes_num = 5

#model
epochs_num = 25

#files settings paths, model/classes files
model_name = "anime_face_recognition_model"
model_output_dir = "/kaggle/output/models"

Downloading from https://www.kaggle.com/api/v1/datasets/download/thedevastator/anime-face-dataset-by-character-name?dataset_version_number=1...


100%|██████████| 57.9M/57.9M [00:00<00:00, 95.7MB/s]

Extracting files...





In [8]:
if os.path.exists(dataset_source_dir):
  print(f"Dataset downloaded successfully in {dataset_source_dir}")

  os.makedirs(input_dir, exist_ok=True)
  dataset_target_dir = os.path.join(input_dir, "dataset")
  os.makedirs(dataset_target_dir, exist_ok=True)
  os.makedirs(model_output_dir, exist_ok=True)

  all_classes = os.listdir(dataset_source_dir)
  selected_classes = random.sample(all_classes, classes_num % max_classes_num)
  selected_classes.sort()

  for class_name in selected_classes:
    class_source_dir = os.path.join(dataset_source_dir, class_name)
    class_target_dir = os.path.join(dataset_target_dir, class_name)

    shutil.copytree(class_source_dir, class_target_dir)
    print(f"Copied {class_name} class")

  with open(output_dir + "/" + model_name + "_class_names.pkl", "wb") as f:
    pickle.dump(selected_classes, f)
    print(f"Class names saved to: {model_output_dir}")

  print("Dataset copied successfully")

  split_ratio = (0.8, 0.1, 0.1)

  splitfolders.ratio(
      dataset_target_dir,
      output=output_dir,
      seed=500,
      ratio=split_ratio,
      group_prefix=None
  )
else:
  print("Dataset download failed.")

Dataset downloaded successfully in /root/.cache/kagglehub/datasets/thedevastator/anime-face-dataset-by-character-name/versions/1/dataset
Copied asia_argento class
Copied formidable_(azur_lane) class


FileExistsError: [Errno 17] File exists: '/kaggle/modifiedInput/AnimeFaceRecognition/dataset/laffey_(azur_lane)'

In [None]:
train_datagen = ImageDataGenerator(
  preprocessing_function=preprocess_input,
  rotation_range=20,
  width_shift_range=0.2,
  height_shift_range=0.2,
  shear_range=0.2,
  zoom_range=0.2,
  horizontal_flip=True,
  fill_mode='nearest'
)

In [None]:
#Data augmentation for test data
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
#Data augmentation form validation data
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
train_dir = os.path.join(output_dir, "train")
test_dir = os.path.join(output_dir, "test")
valid_dir = os.path.join(output_dir, "val")

train_data = train_datagen.flow_from_directory(
  train_dir,
  target_size=img_size,
  batch_size=batch_size,
  class_mode='categorical'
)

test_data = test_datagen.flow_from_directory(
  test_dir,
  target_size=img_size,
  batch_size=batch_size,
  class_mode='categorical'
)

valid_data = valid_datagen.flow_from_directory(
  valid_dir,
  target_size=img_size,
  batch_size=batch_size,
  class_mode='categorical'
)

In [None]:
#batch of images and labels
images, labels = next(valid_data)

#select a random image from the batch
idx = np.random.randint(0, images.shape[0] - 1)

plt.imshow(images[idx])
plt.show()

In [None]:
from keras.applications.resnet50 import ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size[0], img_size[1], 3))

#freeze the convolutional base
base_model.trainable = False

In [None]:
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(train_data.num_classes, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(train_data, epochs=epochs_num, validation_data=valid_data)

In [None]:
test_loss, test_accuracy = model.evaluate(test_data)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

In [None]:
def predict_img(image, model):
  test_img = cv2.imread(image)
  test_img = cv2.resize(test_img, img_size)
  test_img = np.expand_dims(test_img, axis=0)
  result=model.predict(test_img)
  r=np.argmax(result)
  print(selected_classes[r])

In [None]:
#predict_img("/kaggle/output/AnimeFaceRecognition/test/shirogane_naoto/", model)

In [None]:
def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')
    ax1.set_title('Model Accuracy Over Epochs')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss', color='blue')
    ax2.plot(history.history['val_loss'], label='Validation Loss', color='red')
    ax2.set_title('Model Loss Over Epochs')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

def generate_confusion_matrix(model, test_data, class_names):
    # Get predictions
    predictions = model.predict(test_data)
    predicted_classes = np.argmax(predictions, axis=1)

    # Get true labels
    true_classes = test_data.classes

    # Generate confusion matrix
    cm = confusion_matrix(true_classes, predicted_classes)

    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.show()

    return cm, predicted_classes, true_classes

def generate_classification_report(true_classes, predicted_classes, class_names):
    report = classification_report(true_classes, predicted_classes,
                                 target_names=class_names, output_dict=True)

    # Convert to DataFrame for better visualization
    import pandas as pd
    df_report = pd.DataFrame(report).transpose()

    print("Classification Report:")
    print("=" * 60)
    print(df_report.round(3))

    return df_report

def visualize_predictions(model, test_data, class_names, num_images=9):
    # Get a batch of test images
    test_images, test_labels = next(test_data)
    predictions = model.predict(test_images)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(test_labels, axis=1)

    # Plot predictions
    plt.figure(figsize=(15, 15))
    for i in range(min(num_images, len(test_images))):
        plt.subplot(3, 3, i + 1)

        # Preprocess image for display
        img = test_images[i]
        if img.max() <= 1.0:
            img = (img * 255).astype('uint8')

        plt.imshow(img)

        # Determine color based on prediction correctness
        color = 'green' if predicted_classes[i] == true_classes[i] else 'red'
        confidence = predictions[i][predicted_classes[i]] * 100

        plt.title(f'True: {class_names[true_classes[i]]}\n'
                 f'Pred: {class_names[predicted_classes[i]]}\n'
                 f'Conf: {confidence:.1f}%', color=color)
        plt.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
print("Generating training history plots...")
plot_training_history(history)

print("\nGenerating confusion matrix...")
cm, predicted_classes, true_classes = generate_confusion_matrix(model, test_data, selected_classes)

print("\nGenerating classification report...")
report_df = generate_classification_report(true_classes, predicted_classes, selected_classes)

print("\nVisualizing sample predictions...")
# Reset test_data generator
test_data.reset()
visualize_predictions(model, test_data, selected_classes, num_images=9)

# Additional metrics
print("\n" + "="*60)
print("SUMMARY STATISTICS")
print("="*60)
print(f"Total test samples: {len(true_classes)}")
print(f"Number of classes: {len(selected_classes)}")
print(f"Overall accuracy: {(predicted_classes == true_classes).mean():.4f}")

# Per-class accuracy
for i, class_name in enumerate(selected_classes):
    class_mask = true_classes == i
    if class_mask.sum() > 0:
        class_accuracy = (predicted_classes[class_mask] == true_classes[class_mask]).mean()
        print(f"{class_name}: {class_accuracy:.4f} ({class_mask.sum()} samples)")

In [None]:
model.save(model_output_dir + "/" + model_name + ".h5")
model.save(model_output_dir + "/" + model_name + ".keras")

print(f"Model saved to {model_output_dir}")