In [1]:
import sys
!{sys.executable} -m pip install split-folders



In [2]:
import splitfolders

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

[Link to the repository in GitHub](https://github.com/jfilter/split-folders/tree/master/splitfolders)

### Before processing:

### Folder structure

![Folder structure](pics/initial_data_folder_structure.png)

### Every single folder consists 100 "*.jpg" files.

```
initial_data
    ├───Aemilian
    │       Aemilian_19441944.100.27130.obv.width175.jpg
    │       Aemilian_19441944.100.27130.rev.width175.jpg
    │       Aemilian_19441944.100.27131.obv.width175.jpg
    │       .
    │       .
    │       .
    │       Aemilian_ID70925vs_thumb.jpg
    │
    ├───Balbinus
    │       Balbinus_19221922.34.1.obv.width175.jpg
    │       Balbinus_19221922.34.1.rev.width175.jpg
    │       Balbinus_19221922.38.79.obv.width175.jpg
    │       .
    │       .
    │       .
    │       Balbinus_ID69442vs_thumb.jpg
    │
    ├───Claudius
    │       Claudius_18219473rs_thumb.jpg
    │       Claudius_18219473vs_thumb.jpg
    │       Claudius_18219474rs_thumb.jpg
    │       .
    │       .
    │       .
    │       Claudius_19411941.131.697.rev.width175.jpg
    │
    ├───Domitian
    │       Domitian_18201656vs_thumb.jpg
    │       Domitian_18202808rs_thumb.jpg
    │       Domitian_18202808vs_thumb.jpg
    │       .
    │       .
    │       .
    │       Domitian_18231804rs_thumb.jpg
    │
    └───Elagabalus
            Elagabalus_19051905.57.565.obv.width175.jpg
            Elagabalus_19051905.57.565.rev.width175.jpg
            Elagabalus_19051905.57.566.obv.width175.jpg
            .
            .
            .
            Elagabalus_19351935.117.623.rev.width175.jpg
```

### Processing:

Splitting by ratio example: **splitfolders.ratio("input_folder", output="output", seed=1337, ratio=(.8, .1, .1), group_prefix=None)**

Splitting by fixed amounts: **splitfolders.fixed("input_folder", output="output", seed=1337, fixed=(100, 100), oversample=False, group_prefix=None)**

In [3]:
# using most of the default values
splitfolders.ratio("initial_data", output="splitted_data", seed=1337, ratio=(.8, .1, .1))

Copying files: 500 files [00:00, 1205.61 files/s]


### After processing:

### Folder structure of the training data
![Train data](pics/splitted_train_data.png)
### Folder structure of the valdation data
![Val data](pics/splitted_val_data.png)
### Folder structure of the test data
![Test data](pics/splitted_test_data.png)


```
initial_data
│   ├───Aemilian
│   │       Aemilian_19441944.100.27130.obv.width175.jpg
│   │       Aemilian_19441944.100.27130.rev.width175.jpg
│   │       Aemilian_19441944.100.27131.obv.width175.jpg
│   │       .
│   │       .
│   │       .
│   │       Aemilian_ID70925vs_thumb.jpg
│   │
│   ├───Balbinus
│   │       Balbinus_19221922.34.1.obv.width175.jpg
│   │       Balbinus_19221922.34.1.rev.width175.jpg
│   │       Balbinus_19221922.38.79.obv.width175.jpg
│   │       .
│   │       .
│   │       .
│   │       Balbinus_ID69442vs_thumb.jpg
│   │
│   ├───Claudius
│   │       Claudius_18219473rs_thumb.jpg
│   │       Claudius_18219473vs_thumb.jpg
│   │       Claudius_18219474rs_thumb.jpg
│   │       .
│   │       .
│   │       .
│   │       Claudius_19411941.131.697.rev.width175.jpg
│   │
│   ├───Domitian
│   │       Domitian_18201656vs_thumb.jpg
│   │       Domitian_18202808rs_thumb.jpg
│   │       Domitian_18202808vs_thumb.jpg
│   │       .
│   │       .
│   │       .
│   │       Domitian_18231804rs_thumb.jpg
│   │
│   └───Elagabalus
│           Elagabalus_19051905.57.565.obv.width175.jpg
│           Elagabalus_19051905.57.565.rev.width175.jpg
│           Elagabalus_19051905.57.566.obv.width175.jpg
│           .
│           .
│           .
│           Elagabalus_19351935.117.623.rev.width175.jpg
│
└───splitted_data
    ├───test
    │   ├───Aemilian
    │   │       Aemilian_19441944.100.27140.rev.width175.jpg
    │   │       Aemilian_19841984.37.10.obv.width175.jpg
    │   │       Aemilian_19941994.124.5.obv.width175.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Aemilian_ID70916vs_thumb.jpg
    │   │
    │   ├───Balbinus
    │   │       Balbinus_19351935.117.112.rev.width175.jpg
    │   │       Balbinus_19441944.100.11394.obv.width175.jpg
    │   │       Balbinus_19441944.100.12995.obv.width175.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Balbinus_ID4980vs_thumb.jpg
    │   │
    │   ├───Claudius
    │   │       Claudius_18219483vs_thumb.jpg
    │   │       Claudius_18220048rs_thumb.jpg
    │   │       Claudius_18220050rs_thumb.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Claudius_19291929.66.11.rev.width175.jpg
    │   │
    │   ├───Domitian
    │   │       Domitian_18204487rs_thumb.jpg
    │   │       Domitian_18209851vs_thumb.jpg
    │   │       Domitian_18211393vs_thumb.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Domitian_18231801rs_thumb.jpg
    │   │
    │   └───Elagabalus
    │           Elagabalus_19051905.57.576.rev.width175.jpg
    │           Elagabalus_19231923.151.55.obv.width175.jpg
    │           Elagabalus_19231923.151.57.obv.width175.jpg
    │           .
    │           .
    │           .
    │           Elagabalus_19351935.117.620.rev.width175.jpg
    │
    ├───train
    │   ├───Aemilian
    │   │       Aemilian_19441944.100.27130.obv.width175.jpg
    │   │       Aemilian_19441944.100.27130.rev.width175.jpg
    │   │       Aemilian_19441944.100.27131.obv.width175.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Aemilian_ID70925vs_thumb.jpg
    │   │
    │   ├───Balbinus
    │   │       Balbinus_19221922.34.1.obv.width175.jpg
    │   │       Balbinus_19221922.34.1.rev.width175.jpg
    │   │       Balbinus_19221922.38.79.obv.width175.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Balbinus_ID69442vs_thumb.jpg
    │   │
    │   ├───Claudius
    │   │       Claudius_18219473rs_thumb.jpg
    │   │       Claudius_18219473vs_thumb.jpg
    │   │       Claudius_18219474rs_thumb.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Claudius_19411941.131.697.rev.width175.jpg
    │   │
    │   ├───Domitian
    │   │       Domitian_18201656vs_thumb.jpg
    │   │       Domitian_18202808rs_thumb.jpg
    │   │       Domitian_18202808vs_thumb.jpg
    │   │       .
    │   │       .
    │   │       .
    │   │       Domitian_18231804rs_thumb.jpg
    │   │
    │   └───Elagabalus
    │           Elagabalus_19051905.57.565.obv.width175.jpg
    │           Elagabalus_19051905.57.565.rev.width175.jpg
    │           Elagabalus_19051905.57.566.obv.width175.jpg
    │           .
    │           .
    │           .
    │           Elagabalus_19351935.117.623.rev.width175.jpg
    │
    └───val
        ├───Aemilian
        │       Aemilian_19441944.100.27136.rev.width175.jpg
        │       Aemilian_19441944.100.27144.obv.width175.jpg
        │       Aemilian_19751975.226.45.rev.width175.jpg
        │       .
        │       .
        │       .
        │       Aemilian_ID70919rs_thumb.jpg
        │
        ├───Balbinus
        │       Balbinus_19351935.117.105.rev.width175.jpg
        │       Balbinus_19351935.117.133.obv.width175.jpg
        │       Balbinus_19441944.100.11387.rev.width175.jpg
        │       .
        │       .
        │       .
        │       Balbinus_ID69410rs_thumb.jpg
        │
        ├───Claudius
        │       Claudius_18219479vs_thumb.jpg
        │       Claudius_18219487rs_thumb.jpg
        │       Claudius_18220043vs_thumb.jpg
        │       .
        │       .
        │       .
        │       Claudius_19351935.117.361.obv.width175.jpg
        │
        ├───Domitian
        │       Domitian_18203229rs_thumb.jpg
        │       Domitian_18204662vs_thumb.jpg
        │       Domitian_18207452rs_thumb.jpg
        │       .
        │       .
        │       .
        │       Domitian_18231802vs_thumb.jpg
        │
        └───Elagabalus
                Elagabalus_19051905.57.571.rev.width175.jpg
                Elagabalus_19051905.57.579.obv.width175.jpg
                Elagabalus_19161916.192.302.rev.width175.jpg
                .
                .
                .
                Elagabalus_19351935.117.622.obv.width175.jpg
```

### Now the data is ready to be used for additional processing - for example by using ImageDataGenerator class, the pixel values of the images can be automatically scaled, augmented versions of images can be automatically generated and etc.

In [4]:
# Example of using the data with ImageDataGenerator
datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

print('Total number of training images:')
train_generator = datagen.flow_from_directory(
        'splitted_data/train',
        target_size=(150, 150),
        batch_size=32,
        class_mode='categorical')

print('Total number of validation images:')
validation_generator = datagen.flow_from_directory(
        'splitted_data/val',
        target_size=(150, 150),
        batch_size=32,
        class_mode='categorical')

print('Total number of tesing images:')
test_generator = datagen.flow_from_directory(
        'splitted_data/test',
        target_size=(150, 150),
        batch_size=32,
        class_mode='categorical')

Total number of training images:
Found 400 images belonging to 5 classes.
Total number of validation images:
Found 50 images belonging to 5 classes.
Total number of tesing images:
Found 50 images belonging to 5 classes.


Example how to use in model training:

``` python
model.fit_generator(train_generator, 
                    epochs=..., 
                    shuffle=True, 
                    validation_data=validation_generator,
                    callbacks=...
                    )
```