In [None]:
# Images obtained from the "Labeled Faces in the Wild" dataset (LFW)
# http://vis-www.cs.umass.edu/lfw/

In [82]:
import os
from shutil import copyfile

In [100]:
def create_train_test_data(
    rootdir = '..\data\lfw\lfw_mtcnnpy_160',
    trainImageCount = 2,
    testImageCount = 1,
    totalPeople = 100,
    duplicateSingleTrainImage = True):

    totalImageCount = trainImageCount + testImageCount
    newdir = '../data/newdir_train' + str(trainImageCount) + '_test' + str(testImageCount) + '_total' + str(totalPeople)
    traindir = newdir + '/train'
    testdir = newdir + '/test'
    directories = os.listdir(rootdir)

    personGroupCount = 0

    print('Check if directory exists...')
    
    if os.path.exists(newdir):
        print("Exists... bombing out")
        return
    else:
        print('Creating directory...')
        os.makedirs(newdir)
        os.makedirs(traindir)
        os.makedirs(testdir)


    print('Iterating through photos')

    for d in directories:    
        subdir = os.path.join(rootdir, d)

        if os.path.isdir(subdir):
            files = os.listdir(subdir)

            if len(files) >= totalImageCount:
                personGroupCount += 1
                print('Found valid PersonGroup:', d)
                fileCount = 0

                for f in files:
                    fileCount += 1
                    
                    fileToCopy = os.path.join(subdir, f)

                    if fileCount == totalImageCount:
                        personGroupTestDir = os.path.join(testdir, d)
                        
                        if not os.path.exists(personGroupTestDir):
                            os.makedirs(personGroupTestDir)
                            
                        copyfile(fileToCopy, os.path.join(personGroupTestDir, f))
                            
                    elif fileCount < totalImageCount:
                        personGroupTrainDir = os.path.join(traindir, d)
                        
                        if not os.path.exists(personGroupTrainDir):
                            os.makedirs(personGroupTrainDir)
                            
                        copyfile(fileToCopy, os.path.join(personGroupTrainDir, f))
                        
                        # If there's only 1 
                        if duplicateSingleTrainImage and trainImageCount == 1:
                            file_name_split = os.path.splitext(f)
                            copyfile(fileToCopy, os.path.join(personGroupTrainDir, file_name_split[0] + "_2." + file_name_split[1]))
                            
                    else:
                        break

                if personGroupCount >= totalPeople:
                    break

    print('Total PersonGroup:', personGroupCount)
    
    return

create_train_test_data(trainImageCount=1, totalPeople = 1000)

Check if directory exists...
Creating directory...
Iterating through photos
Found valid PersonGroup: Aaron_Peirsol
Found valid PersonGroup: Aaron_Sorkin
Found valid PersonGroup: Abdel_Nasser_Assidi
Found valid PersonGroup: Abdoulaye_Wade
Found valid PersonGroup: Abdullah
Found valid PersonGroup: Abdullah_al-Attiyah
Found valid PersonGroup: Abdullah_Gul
Found valid PersonGroup: Abdullatif_Sener
Found valid PersonGroup: Abel_Pacheco
Found valid PersonGroup: Abid_Hamid_Mahmud_Al-Tikriti
Found valid PersonGroup: Adam_Sandler
Found valid PersonGroup: Adam_Scott
Found valid PersonGroup: Adel_Al-Jubeir
Found valid PersonGroup: Adolfo_Aguilar_Zinser
Found valid PersonGroup: Adolfo_Rodriguez_Saa
Found valid PersonGroup: Adrian_McPherson
Found valid PersonGroup: Adrian_Nastase
Found valid PersonGroup: Adrien_Brody
Found valid PersonGroup: Ahmad_Masood
Found valid PersonGroup: Ahmed_Chalabi
Found valid PersonGroup: Ahmet_Necdet_Sezer
Found valid PersonGroup: Aicha_El_Ouafi
Found valid PersonGroup

Found valid PersonGroup: Carlos_Bianchi
Found valid PersonGroup: Carlos_Ghosn
Found valid PersonGroup: Carlos_Manuel_Pruneda
Found valid PersonGroup: Carlos_Menem
Found valid PersonGroup: Carlos_Mesa
Found valid PersonGroup: Carlos_Moya
Found valid PersonGroup: Carlos_Ortega
Found valid PersonGroup: Carlos_Quintanilla_Schmidt
Found valid PersonGroup: Carlos_Ruiz
Found valid PersonGroup: Carlos_Vives
Found valid PersonGroup: Carlo_Ancelotti
Found valid PersonGroup: Carly_Fiorina
Found valid PersonGroup: Carl_Reiner
Found valid PersonGroup: Carmen_Electra
Found valid PersonGroup: Carolina_Kluft
Found valid PersonGroup: Carolina_Moraes
Found valid PersonGroup: Caroline_Kennedy
Found valid PersonGroup: Carolyn_Dawn_Johnson
Found valid PersonGroup: Carol_Burnett
Found valid PersonGroup: Carol_Moseley_Braun
Found valid PersonGroup: Carrie-Anne_Moss
Found valid PersonGroup: Carson_Daly
Found valid PersonGroup: Carson_Palmer
Found valid PersonGroup: Cate_Blanchett
Found valid PersonGroup: Cath

Found valid PersonGroup: Ernie_Eves
Found valid PersonGroup: Ernie_Fletcher
Found valid PersonGroup: Ethan_Hawke
Found valid PersonGroup: Eunice_Barber
Found valid PersonGroup: Evander_Holyfield
Found valid PersonGroup: Evan_Rachel_Wood
Found valid PersonGroup: Eva_Dimas
Found valid PersonGroup: Eve_Pelletier
Found valid PersonGroup: Fabiola_Zuluaga
Found valid PersonGroup: Fabrice_Santoro
Found valid PersonGroup: Farouk_al-Sharaa
Found valid PersonGroup: Faye_Dunaway
Found valid PersonGroup: Fayssal_Mekdad
Found valid PersonGroup: Federico_Trillo
Found valid PersonGroup: Felipe_Perez_Roque
Found valid PersonGroup: Felix_Mantilla
Found valid PersonGroup: Ferenc_Madl
Found valid PersonGroup: Fernando_Gonzalez
Found valid PersonGroup: Fernando_Henrique_Cardoso
Found valid PersonGroup: Fernando_Vargas
Found valid PersonGroup: Fidel_Castro
Found valid PersonGroup: Filippo_Inzaghi
Found valid PersonGroup: Flavia_Delaroli
Found valid PersonGroup: Flor_Montulo
Found valid PersonGroup: Frances

Found valid PersonGroup: James_Schultz
Found valid PersonGroup: James_Smith
Found valid PersonGroup: James_Traficant
Found valid PersonGroup: James_Wolfensohn
Found valid PersonGroup: Jamie_Villafane
Found valid PersonGroup: Jamling_Norgay
Found valid PersonGroup: Jan-Michael_Gambill
Found valid PersonGroup: Janet_Napolitano
Found valid PersonGroup: Janet_Thorpe
Found valid PersonGroup: Jane_Fonda
Found valid PersonGroup: Jane_Kaczmarek
Found valid PersonGroup: Jane_Pauley
Found valid PersonGroup: Janica_Kostelic
Found valid PersonGroup: Jan_Ullrich
Found valid PersonGroup: Jason_Alexander
Found valid PersonGroup: Jason_Jennings
Found valid PersonGroup: Jason_Kidd
Found valid PersonGroup: Jason_Lezak
Found valid PersonGroup: Javier_Solana
Found valid PersonGroup: Javier_Weber
Found valid PersonGroup: Jayson_Williams
Found valid PersonGroup: Jay_Garner
Found valid PersonGroup: Jay_Leno
Found valid PersonGroup: Jay_Rasulo
Found valid PersonGroup: Jean-Claude_Braquet
Found valid PersonGro

Found valid PersonGroup: Kifah_Ajouri
Found valid PersonGroup: Kiki_Vandeweghe
Found valid PersonGroup: Kimi_Raikkonen
Found valid PersonGroup: Kim_Clijsters
Found valid PersonGroup: Kim_Dae-jung
Found valid PersonGroup: Kim_Jin-sun
Found valid PersonGroup: Kim_Jong-Il
Found valid PersonGroup: Kim_Ryong-sung
Found valid PersonGroup: Kim_Yong-il
Found valid PersonGroup: King_Abdullah_II
Found valid PersonGroup: Kirk_Ferentz
Found valid PersonGroup: Kirk_Johnson
Found valid PersonGroup: Kjell_Magne_Bondevik
Found valid PersonGroup: Klaus_Zwickel
Found valid PersonGroup: Kobe_Bryant
Found valid PersonGroup: Kofi_Annan
Found valid PersonGroup: Kosuke_Kitajima
Found valid PersonGroup: Kristanna_Loken
Found valid PersonGroup: Kristen_Breitweiser
Found valid PersonGroup: Kristin_Davis
Found valid PersonGroup: Kurt_Busch
Found valid PersonGroup: Kurt_Russell
Found valid PersonGroup: Kurt_Warner
Found valid PersonGroup: Kwon_Yang-sook
Found valid PersonGroup: Laila_Ali
Found valid PersonGroup: 