# Train model with part of old data

In [1]:
import tensorflow as tf
import os
from utils.helper_utils import HelperUtils
from configuration import Configuration
from utils.testing_util import TestingUtil
from pathlib import Path
import time
from pandas.core.frame import DataFrame

In [2]:
def train_model(model_name, data_dir):
    print(f'Training model {model_name} using data from {data_dir}')
    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=Configuration.validation_split,
        subset="training",
        seed=123,
        image_size=(Configuration.img_height, Configuration.img_width),
        batch_size=Configuration.batch_size)

    val_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=Configuration.validation_split,
        subset="validation",
        seed=123,
        image_size=(Configuration.img_height, Configuration.img_width),
        batch_size=Configuration.batch_size)

    model, class_names = HelperUtils.load_model(Configuration.base_model_name)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)

    model.compile(optimizer=opt,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    model.fit(train_ds, validation_data=val_ds, epochs=Configuration.epochs)
    HelperUtils.save_model(model_name, model, class_names)
    return model, class_names

Train models

In [3]:
trained_models = []
for data_directory in Path(Configuration.update_data_location).glob('*'):
    model_name = str(data_directory).split(os.sep)[-1]
    start_time = time.time()
    model, class_names = train_model(model_name, data_directory)
    train_time = time.time() - start_time
    trained_models.append((model_name, model, data_directory, train_time, class_names))

Training model 00percent using data from D:\Studia\magisterka\sem2\IUI\data\subsets\00percent
Found 774 files belonging to 329 classes.
Using 620 files for training.
Found 774 files belonging to 329 classes.
Using 154 files for validation.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Saving model 00percent and it's 432 classes
INFO:tensorflow:Assets written to: ./models/00percent\assets
Training model 05percent using data from D:\Studia\magisterka\sem2\IUI\data\subsets\05percent
Found 4601 files belonging to 425 classes.
Using 3681 files for training.
Found 4601 files belonging to 425 classes.
Using 920 files for validation.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Saving model 05percent and it's 432 classes
INFO:tensorflow:Assets written to: ./models/05percent\assets
Training model 100percent using data from D:\Studia\magisterka\sem2\IUI\data\subsets\

Test trained models

In [4]:
%%capture
results = []
for model in trained_models:
    test_results = TestingUtil.test_with_test_set(model[1], model[4])
    correct = len(list(filter(lambda x: x[-1] == 'Yes', test_results)))
    total = len(test_results)
    results.append((model[0], model[2], f'{correct}/{total}', '{:.2f}%'.format(correct / total * 100), model[3]))

Display test results

In [9]:
results.sort(key = lambda x: x[0])
print(DataFrame([(x[0], x[2], x[3], x[4]) for x in results], columns=['Model name', 'Test results', 'Percentage test results', 'Training time']))

   Model name Test results Percentage test results  Training time
0   00percent        4/311                   1.29%      13.492703
1   05percent       14/311                   4.50%      27.527681
2  100percent      279/311                  89.71%     389.667623
3   10percent      283/311                  91.00%      46.699300
4   20percent      275/311                  88.42%      87.470977
5   25percent      274/311                  88.10%     105.738362
6   33percent      286/311                  91.96%     137.483157
7   50percent      273/311                  87.78%     200.742078
