In [None]:
# Images obtained from the "Labeled Faces in the Wild" dataset (LFW)
# http://vis-www.cs.umass.edu/lfw/

In [82]:
import os
from shutil import copyfile

In [102]:
def create_train_test_data(
    rootdir = '..\data\lfw\lfw_mtcnnpy_160',
    trainImageCount = 2,
    testImageCount = 1,
    totalPeople = 100,
    duplicateSingleTrainImage = True):

    totalImageCount = trainImageCount + testImageCount
    newdir = '../data/newdir_train' + str(trainImageCount) + '_test' + str(testImageCount) + '_total' + str(totalPeople)
    traindir = newdir + '/train'
    testdir = newdir + '/test'
    directories = os.listdir(rootdir)

    personGroupCount = 0

    print('Check if directory exists...')
    
    if os.path.exists(newdir):
        print("Exists... bombing out")
        return
    else:
        print('Creating directory...')
        os.makedirs(newdir)
        os.makedirs(traindir)
        os.makedirs(testdir)


    print('Iterating through photos')

    for d in directories:    
        subdir = os.path.join(rootdir, d)

        if os.path.isdir(subdir):
            files = os.listdir(subdir)

            if len(files) >= totalImageCount:
                personGroupCount += 1
                print('Found valid PersonGroup:', d)
                fileCount = 0

                for f in files:
                    fileCount += 1
                    
                    fileToCopy = os.path.join(subdir, f)

                    if fileCount == totalImageCount:
                        personGroupTestDir = os.path.join(testdir, d)
                        
                        if not os.path.exists(personGroupTestDir):
                            os.makedirs(personGroupTestDir)
                            
                        copyfile(fileToCopy, os.path.join(personGroupTestDir, f))
                            
                    elif fileCount < totalImageCount:
                        personGroupTrainDir = os.path.join(traindir, d)
                        
                        if not os.path.exists(personGroupTrainDir):
                            os.makedirs(personGroupTrainDir)
                            
                        copyfile(fileToCopy, os.path.join(personGroupTrainDir, f))
                        
                        # If there's only 1 
                        if duplicateSingleTrainImage and trainImageCount == 1:
                            file_name_split = os.path.splitext(f)
                            copyfile(fileToCopy, os.path.join(personGroupTrainDir, file_name_split[0] + "_2." + file_name_split[1]))
                            
                    else:
                        break

                if personGroupCount >= totalPeople:
                    break

    print('Total PersonGroup:', personGroupCount)
    
    return

create_train_test_data(trainImageCount=2, totalPeople = 1000)

Check if directory exists...
Creating directory...
Iterating through photos
Found valid PersonGroup: Aaron_Peirsol
Found valid PersonGroup: Abdoulaye_Wade
Found valid PersonGroup: Abdullah
Found valid PersonGroup: Abdullah_al-Attiyah
Found valid PersonGroup: Abdullah_Gul
Found valid PersonGroup: Abel_Pacheco
Found valid PersonGroup: Abid_Hamid_Mahmud_Al-Tikriti
Found valid PersonGroup: Adam_Sandler
Found valid PersonGroup: Adel_Al-Jubeir
Found valid PersonGroup: Adolfo_Aguilar_Zinser
Found valid PersonGroup: Adrien_Brody
Found valid PersonGroup: Ahmed_Chalabi
Found valid PersonGroup: Aicha_El_Ouafi
Found valid PersonGroup: Ai_Sugiyama
Found valid PersonGroup: Akbar_Hashemi_Rafsanjani
Found valid PersonGroup: Akhmed_Zakayev
Found valid PersonGroup: Alan_Greenspan
Found valid PersonGroup: Alastair_Campbell
Found valid PersonGroup: Albert_Costa
Found valid PersonGroup: Alec_Baldwin
Found valid PersonGroup: Alejandro_Avila
Found valid PersonGroup: Alejandro_Toledo
Found valid PersonGroup: 

Found valid PersonGroup: Eduardo_Duhalde
Found valid PersonGroup: Eduard_Shevardnadze
Found valid PersonGroup: Edward_Kennedy
Found valid PersonGroup: Edward_Lu
Found valid PersonGroup: Edwina_Currie
Found valid PersonGroup: Edwin_Edwards
Found valid PersonGroup: Ed_Rosenthal
Found valid PersonGroup: Ed_Smart
Found valid PersonGroup: Eileen_Coparropa
Found valid PersonGroup: Elena_Bovina
Found valid PersonGroup: Eliane_Karp
Found valid PersonGroup: Elijah_Wood
Found valid PersonGroup: Elizabeth_Dole
Found valid PersonGroup: Elizabeth_Hurley
Found valid PersonGroup: Elizabeth_Smart
Found valid PersonGroup: Elsa_Zylberstein
Found valid PersonGroup: Elton_John
Found valid PersonGroup: Emanuel_Ginobili
Found valid PersonGroup: Emmanuelle_Beart
Found valid PersonGroup: Emma_Thompson
Found valid PersonGroup: Emma_Watson
Found valid PersonGroup: Enrique_Bolanos
Found valid PersonGroup: Eric_Robert_Rudolph
Found valid PersonGroup: Erika_Harold
Found valid PersonGroup: Erik_Morales
Found valid 

Found valid PersonGroup: Kalpana_Chawla
Found valid PersonGroup: Kamal_Kharrazi
Found valid PersonGroup: Kate_Hudson
Found valid PersonGroup: Kate_Winslet
Found valid PersonGroup: Katherine_Harris
Found valid PersonGroup: Kathleen_Kennedy_Townsend
Found valid PersonGroup: Katie_Harman
Found valid PersonGroup: Keanu_Reeves
Found valid PersonGroup: Keith_Bogans
Found valid PersonGroup: Kelly_Clarkson
Found valid PersonGroup: Kelvin_Sampson
Found valid PersonGroup: Kemal_Dervis
Found valid PersonGroup: Kenneth_Bowersox
Found valid PersonGroup: Ken_Macha
Found valid PersonGroup: Kevin_Costner
Found valid PersonGroup: Kevin_Spacey
Found valid PersonGroup: Kimi_Raikkonen
Found valid PersonGroup: Kim_Clijsters
Found valid PersonGroup: Kim_Dae-jung
Found valid PersonGroup: Kim_Jong-Il
Found valid PersonGroup: Kim_Ryong-sung
Found valid PersonGroup: Kim_Yong-il
Found valid PersonGroup: King_Abdullah_II
Found valid PersonGroup: Kirk_Johnson
Found valid PersonGroup: Kjell_Magne_Bondevik
Found val

Found valid PersonGroup: Peter_Struck
Found valid PersonGroup: Pete_Carroll
Found valid PersonGroup: Pete_Sampras
Found valid PersonGroup: Petria_Thomas
Found valid PersonGroup: Phan_Van_Khai
Found valid PersonGroup: Pierce_Brosnan
Found valid PersonGroup: Pierre_Pettigrew
Found valid PersonGroup: Placido_Domingo
Found valid PersonGroup: Princess_Caroline
Found valid PersonGroup: Prince_Charles
Found valid PersonGroup: Prince_Claus
Found valid PersonGroup: Prince_Harry
Found valid PersonGroup: Prince_Naruhito
Found valid PersonGroup: Prince_Willem-Alexander
Found valid PersonGroup: Pupi_Avati
Found valid PersonGroup: Queen_Beatrix
Found valid PersonGroup: Queen_Elizabeth_II
Found valid PersonGroup: Queen_Latifah
Found valid PersonGroup: Queen_Rania
Found valid PersonGroup: Rachel_Griffiths
Found valid PersonGroup: Rachel_Hunter
Found valid PersonGroup: Rafael_Ramirez
Found valid PersonGroup: Rainer_Schuettler
Found valid PersonGroup: Ralf_Schumacher
Found valid PersonGroup: Ranil_Wickr

Found valid PersonGroup: Yao_Ming
Found valid PersonGroup: Yasar_Yakis
Found valid PersonGroup: Yashwant_Sinha
Found valid PersonGroup: Yasser_Arafat
Found valid PersonGroup: Yevgeny_Kafelnikov
Found valid PersonGroup: Yoko_Ono
Found valid PersonGroup: Yoriko_Kawaguchi
Found valid PersonGroup: Yu_Shyi-kun
Found valid PersonGroup: Zhang_Ziyi
Found valid PersonGroup: Zhu_Rongji
Found valid PersonGroup: Zico
Found valid PersonGroup: Zinedine_Zidane
Found valid PersonGroup: Zoran_Djindjic
Total PersonGroup: 901
