In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    print(dirname)

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install mtcnn

In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt
from mtcnn.mtcnn import MTCNN

In [None]:
# Load an image as an rgb numpy array
def load_image(filename):
    image = Image.open(filename)
    image = image.convert('RGB')
    pixels = np.asarray(image)
    return pixels

In [None]:
def extract_face(model, pixels, required_size=(80, 80)):
    faces = model.detect_faces(pixels)
    # skip cases where we could not detect a face
    if len(faces) == 0:
        return None

    x1, y1, width, height = faces[0]['box']
    # force detected pixel values to be positive (bug fix)
    x1, y1 = abs(x1), abs(y1)
    # convert into coordinates
    x2, y2 = x1 + width, y1 + height
    face_pixels = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face_pixels)
    image = image.resize(required_size)
    
    return np.asarray(image)

In [None]:
# Load images and extract faces for all images in a directory
def load_faces(directory, n_faces):
    model = MTCNN()
    faces = list()

    for filename in os.listdir(directory):    
        pixels = load_image(directory + filename)
        face = extract_face(model, pixels)
        if face is None:
            continue
        faces.append(face)
    
        # stop once we have enough
        if len(faces) >= n_faces:
            break
        
    return np.asarray(faces)


In [None]:
# Plot a list of loaded faces
def plot_faces(faces):
    for i in range(100):
        plt.subplot(10, 10, 1 + i)
        plt.axis('off')
        # Plotting raw pixel data
        plt.imshow(faces[i])
    plt.show()

In [None]:
directory = '/kaggle/input/celeba-dataset/img_align_celeba/img_align_celeba/'
# Load and extract all faces
faces = load_faces(directory, 50000)
print('Loaded: ', faces.shape)
# Plotting faces
plot_faces(faces)

np.savez_compressed('img_celeba.npz', faces)