------- Load packages and libraries

In [1]:
import os # Interact with the file system
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
import time # Time access and conversions
import json # Transmit structured data interchange format
import numpy as np # Work with arrays 
import pandas as pd # Data manipulation : pd.Dataframe
import tensorflow as tf # To create deep learning models
import tensorflow_datasets as tfds  
from matplotlib import pyplot as plt # Added for conf_matrix
import seaborn as sns # Added for conf_matrix
from ImageHelper import blobFromImage, imageFromBlob # Pre-processing images for pre-trained keras model
from backgroundGenerator import BackgroundGenerator
from sklearn.metrics import classification_report, confusion_matrix 

----------- configure access to GPU

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[0],'GPU')

try: 
    for gpu in physical_devices:
        tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_virtual_device_configuration(gpu,[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=25000)]) 
except: 
    print("Invalid device or cannot modify virtual devices once initialized.", flush=True) 
    pass

Num GPUs Available:  2


In [3]:
strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

Parameters to connect to the database

In [4]:
mysqlSettings = {
    "db_url": os.getenv('DB_URL', '10.182.129.115'),
    "db_port": os.getenv('DB_PORT', 3309),
    "db_user": os.getenv('DB_USER', 'root'),
    "db_pw": os.getenv('DB_PW', '31415swisens')
}

Global parameters (Make sure to choose a unique name for each run!)

In [5]:
modelName ="14Pol_Rain_noSpores_second_10Mparam_test"
tensorboardLogFolder = "/scratch/nina/logs" 
checkpointFolder = "/scratch/nina/checkpoints/" + modelName + "/"
confMatFolder = "/scratch/nina/confusion_matrix/" + modelName + "/"

In [6]:
BATCH_SIZE_PER_REPLICA = 64
chunksize = 256 # How many events should be used per dataset. TF will tain on them for x epochs before going to the next chunk of data. Choose size according to your hardware (ram, gpu, gpu-memory)
chunkPrefetch = 2 # How many chunks should be cached in the background.
batchsize = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
epochsPerDatasetChunk = 5

Set this to True if you choose to also include FL to the training

In [7]:
with_fluorescence = False
n_fl_configs=26

In [8]:
    target_names = ['alnus',
                    'betula',
                    'carpinus',
                    'corylus',
                    'cupressus',
                    'fagus',
                    'fraxinus',
                    'pinaceae',
                    'platanus',
                    'poaceae',
                    'populus',
                    'quercus',
                    'taxus',                    
                    'ulmus',
                    #'alternaria',
                    #'fusarium',
                    'rain'                
                   ]    


Configure the training set

In [9]:
datasetList = [
    #POLLEN DATASETS
    '11ea8493-7107-8db4-9bf7-ae7b87f820b4',#0alnus 'alnus_20200220_p5_1_benoit' 3990
    '11ea847a-f995-790c-830f-ae7b87f820b4',#0alnus 'alnus_20200218_p2_1_benoit' 4966
    '11ea8475-957e-347c-985a-ae7b87f820b4',#0alnus 'alnus_20200214_p4_1_benoit' 3474 TOTAL ALNUS=12'430
    '11ea8897-f50e-66a2-9876-ae7b87f820b4',#1betula 'betula_20200406_p2_1_benoit' 5770
    '11ea8632-18ed-7210-985a-ae7b87f820b4',#1betula 'betula_20200407_p4_2_benoit' 6533
    '11ea8632-1eb2-2452-bc84-ae7b87f820b4',#1betula 'betula_20200406_p4_1_benoit' 2173 TOTAL BETULA=14'476
    '11ea8f77-4ee3-aef4-b330-ae7b87f820b4',#2carpinus 'carpinus_20200319_p5_2_fiona' 643
    '11ea8f6d-3e75-9fe6-b46e-ae7b87f820b4',#2carpinus 'carpinus_20200319_p2_2_fiona' 664
    '11ea8f6d-1562-211a-8192-ae7b87f820b4',#2carpinus 'carpinus_20200319_p2_3_fiona' 545
    '11ea8f6c-b78c-d076-a542-ae7b87f820b4',#2carpinus 'carpinus_20200319_p4_2_fiona' 395 TOTAL CARPINUS=2'247
    '11ea8498-b729-d4e6-bc84-ae7b87f820b4',#3corylus 'corylus_20200225_p2_2_benoit' 3736
    '11ea8498-b083-cb92-a1a5-ae7b87f820b4',#3corylus 'corylus_20200225_p2_1_benoit' 500
    '11ea8498-afa9-cec4-a877-ae7b87f820b4',#3corylus 'corylus_20200225_p5_1_benoit' 3578 TOTAL CORYLUS=7'814
    '11ea8fa9-6c12-723a-b3dd-ae7b87f820b4',#4cupressus 'cupressus_20200317_p5_1_fiona' 421
    '11ea8fa8-fafa-aeb4-ac46-ae7b87f820b4',#4cupressus 'cupressus_20200317_p2_1_fiona' 2340
    '11ea8fa8-d163-dce2-b1cb-ae7b87f820b4',#4cupressus 'cupressus_20200317_p4_1_fiona' 583 TOTAL CUPRESSUS=3'344
    '11ea8636-313b-a6e4-a69e-ae7b87f820b4',#5fagus 'fagus_20200413_p4_1_benoit' 2759
    '11ea8635-ef91-6ab2-a877-ae7b87f820b4',#5fagus 'fagus_20200407_p2_1_benoit' 3410
    '11ea8635-eb18-6ee0-9876-ae7b87f820b4',#5fagus 'fagus_20200413_p5_1_benoit' 4143 TOTAL FAGUS=10'312
    '11ea857e-7bc5-60a0-842e-ae7b87f820b4',#6fraxinus 'fraxinus_20200402_p5_2_benoit' 5703
    '11ea857b-3d52-9034-830f-ae7b87f820b4',#6fraxinus 'fraxinus_20200330_p4_1_benoit' 2621
    '11ea857b-150e-c372-bc84-ae7b87f820b4',#6fraxinus 'fraxinus_20200330_p2_1_benoit' 1712 TOTAL FRAXINUS=10'036
    '11ea8af3-c533-f39e-8b25-ae7b87f820b4',#7pinaceae 'picea_20200423_p2_1_fiona' 1826
    '11ea8af1-91fc-9a46-8b25-ae7b87f820b4',#7pinaceae 'picea_20200423_p4_1_fiona' 2375
    '11ea8af0-83dc-6d66-b06c-ae7b87f820b4',#7pinaceae 'picea_20200423_p5_1_fiona' 1969
    '11ea863d-acf6-0ade-985a-ae7b87f820b4',#7pinaceae 'pinus_20200421_p5_1_benoit' 3403
    '11ea863c-2449-be52-8814-ae7b87f820b4',#7pinaceae 'pinus_20200421_p2_1_benoit' 8582 TOTAL PINACEAE=18'155
    '11ea8b83-25c9-8194-90d1-ae7b87f820b4',#8platanus 'platanus_20200417_p4_1_benoit' 5603
    '11ea8881-3721-9aa8-a907-ae7b87f820b4',#8platanus 'platanus_20200417_p2_1_benoit' 5544 TOTAL PLATANUS=11'147
    '11ea990f-ee01-8334-b3dd-ae7b87f820b4',#9poaceae 'gram_20200518_p2_1_benoit' 1229
    '11ea990c-b2bc-fe96-b46e-ae7b87f820b4',#9poaceae 'gram_20200518_p5_1_benoit' 1508  TOTAL POACEAE inital=4'909 
    '11eb5fd9-961a-313e-ac56-ae7b87f820b4',#9poaceae 'POCclean_cynosurus_20200520_p4_1_fiona' 5895
    '11eb5fd9-dd36-0a20-88f3-ae7b87f820b4',#9poaceae 'POCclean_cynosurus_20200520_p2_1_' 6248 
    '11ebe542-660e-0206-80be-ae7b87f820b4',#9poaceae 'poaceae_dactylis_fresh_p19_2021_tri_Nina' 3110
    '11eb5fc3-03fa-6da2-8b42-ae7b87f820b4',#9poaceae 'POCclean_dactylis_20200518_p4_1' 1127
    '11ebe540-187e-9a0c-b0e2-ae7b87f820b4',#9poaceae 'poaceae_trisetum_fresh_p19_2021_tri_Nina' 1377 
    '11ea8893-edfb-ca84-a877-ae7b87f820b4',#10populus 'populus_20200327_p5_1_benoit' 657
    '11ea84a0-e89b-43b8-a69e-ae7b87f820b4',#10populus 'populus_20200327_p2_benoit' 508
    '11ea84a0-a2f0-ab8c-a877-ae7b87f820b4',#10populus 'populus_20200327_p4_benoit' 2913 TOTAL POPULUS=4'078
    '11ea863e-1fea-0f7c-a1a5-ae7b87f820b4',#11quercus 'quercus_20200421_p4_1_benoit' 3824
    '11ea863e-1b86-8226-a1a5-ae7b87f820b4',#11quercus 'quercus_20200421_p2_1_benoit' 4768
    '11ea863d-f388-a038-a1a5-ae7b87f820b4',#11quercus 'quercus_20200421_p5_1_benoit' 2519 TOTAL QUERCUS=11'111
    '11ea8477-cede-e7dc-897d-ae7b87f820b4',#12taxus 'taxus_20200218_p4_1_benoit' 4872
    '11ea8477-b584-b690-830f-ae7b87f820b4',#12taxus 'taxus_20200218_p2_1_benoit' 5593
    '11ea8494-33a5-2e4e-bc84-ae7b87f820b4',#12taxus 'taxus_20200220_p5_1_benoit' 3411 TOTAL TAXUS=13'876    
    '11ea849c-df8f-d95e-897d-ae7b87f820b4',#13ulmus 'ulmus_20200311_p4_2_benoit' 3289
    '11ea849c-db7b-2170-8b0f-ae7b87f820b4',#13ulmus 'ulmus_20200311_p2_2_benoit' 2392
    '11ea849a-0e25-4018-8814-ae7b87f820b4',#13ulmus 'ulmus_20200304_p5_1_benoit' 4844 TOTAL ULMUS=10'525    
    # SPORES DATASETS
    #'11ebf9db-f2e9-98cc-bc67-ae7b87f820b4',#14Alternaria solani 'alternaria_solani_sophie_clean' event counts 2767  
    #'11ec01b9-d571-ea8e-b7e1-ae7b87f820b4',#15Fusarium graminearum 'fusarium_graminearum_p1' event count 25054
    # RAIN DATASETS
    '11ebe542-f782-c172-bf10-ae7b87f820b4',#16Rain 'P5_Payerne_Rain_28_04' event counts 389
    '11ebeabd-e224-d5c4-8b63-ae7b87f820b4',#16Rain 'P5_Payerne_Rain_30_04_AM' event counts 2786
    '11ebedec-0da5-47ac-8066-ae7b87f820b4',#16Rain 'P5_Payerne_Rain_30_04_PM' event counts 3179
    '11ebee15-1fea-4c68-9cd6-ae7b87f820b4'#16Rain 'P16_Locarno_Rain_29_04' event counts 7691 TOTAL PLUIE = 14045
]

In [10]:
labelList = [ # Labels corresponding to the datasetList, NB: labels pluie added
    0,
    0,
    0,
    1,
    1,
    1,
    2,
    2,
    2,
    2,
    3,
    3,
    3,
    4,
    4,
    4,
    5,
    5,
    5,
    6,
    6,
    6,
    7,
    7,
    7,
    7,
    7,
    8,
    8,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    10,
    10,
    10,
    11,
    11,
    11,
    12,
    12,
    12,
    13,
    13,
    13,
    #14,
    #15,
    14,
    14,
    14,
    14
]

In [11]:
NUM_CLASSES = 15 

Configure testset:<br>
Valid values:<br>
fromFirstChunk:   First chunk of data is used as test set<br>
fromDataset:      Get test data from a dataset defined below

In [12]:
testsetMode = "fromFirstChunk"

Define the datasets for testsetMode="fromDataset"

In [13]:
testsetList = [
    ]
testsetLabels = [ # Labels corresponding to the testsetList
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13, 
    #14,
    #15, 
    14 
    ]


In [14]:
def processFlInput(input):
    path = tf.keras.layers.Conv1D(32, 3)(input)
    path = tf.keras.layers.Conv1D(32, 3)(path)
    path = tf.keras.layers.MaxPool1D(2)(path)
    path = tf.keras.layers.Conv1D(32, 3)(path)
    path = tf.keras.layers.Conv1D(32, 3)(path)
    path = tf.keras.layers.MaxPool1D(2)(path)
    path = tf.keras.layers.Flatten()(path)
    return path

In [15]:
def get_compiled_model(nClasses, with_fluorescence=False, n_fl_configs=1,strategy=strategy):
    with strategy.scope():

        in_img0 = tf.keras.layers.Input((200,200,1))
        in_img1 = tf.keras.layers.Input((200,200,1))
    
        # If you want to train a model including fluorescence, you need to include these inputs in your model
        if with_fluorescence:
            in_fl_avg = tf.keras.layers.Input((n_fl_configs*6, 1))
            in_fl_pha = tf.keras.layers.Input((n_fl_configs*6, 1))
            in_fl_corrMag = tf.keras.layers.Input((n_fl_configs*6, 1))

        # Define your model here!

        #Image Processing
        path1 = tf.keras.layers.Conv2D(64, (5,5), padding='same', activation='relu')(in_img0)
        path1 = tf.keras.layers.Conv2D(64, (5,5), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.MaxPool2D(2, strides=(2,2),padding='same')(path1)
        path1 = tf.keras.layers.Dropout(0.2)(path1)
        path1 = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.MaxPool2D(2, strides=(2,2),padding='same')(path1)
        path1 = tf.keras.layers.Dropout(0.2)(path1)
        path1 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.MaxPool2D((2,2), strides=(2,2),padding='same')(path1)
        path1 = tf.keras.layers.Dropout(0.2)(path1)
        path1 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path1)
        path1 = tf.keras.layers.MaxPool2D((2,2), strides=(2,2),padding='same')(path1)
        path1 = tf.keras.layers.Dropout(0.2)(path1)

        #path1 = tf.keras.layers.Dropout(0.3)(path1)
        #path1 = tf.keras.layers.Dropout(0.4)(path1)
        path2 = tf.keras.layers.Conv2D(64, (5,5), padding='same', activation='relu')(in_img1)
        path2 = tf.keras.layers.Conv2D(64, (5,5), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.MaxPool2D(2, strides=(2,2),padding='same')(path2)
        path2 = tf.keras.layers.Dropout(0.2)(path2)
        path2 = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.MaxPool2D(2, strides=(2,2),padding='same')(path2)
        path2 = tf.keras.layers.Dropout(0.2)(path2)
        path2 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.MaxPool2D((2,2), strides=(2,2),padding='same')(path2)
        path2 = tf.keras.layers.Dropout(0.2)(path2)
        path2 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(path2)
        path2 = tf.keras.layers.MaxPool2D((2,2), strides=(2,2),padding='same')(path2)
        path2 = tf.keras.layers.Dropout(0.2)(path2)
        #path2 = tf.keras.layers.Dropout(0.3)(path2)

        path1Flat = tf.keras.layers.Flatten()(path1)
        path2Flat = tf.keras.layers.Flatten()(path2)

        # FL Processing
        if with_fluorescence:
            fl_avg_path = processFlInput(in_fl_avg)
            fl_pha_path = processFlInput(in_fl_pha)
            fl_corrMag_path = processFlInput(in_fl_corrMag)
            path = tf.keras.layers.Concatenate()(
                [path1Flat, path2Flat, fl_avg_path, fl_pha_path, fl_corrMag_path]
            )
        else:
            path = tf.keras.layers.Concatenate()([path1Flat, path2Flat])

        #Densely(fully)-connected layer
        path = tf.keras.layers.Dense(64)(path)
        path = tf.keras.layers.Dropout(0.2)(path)
        #Densely(fully)-connected layer
        path = tf.keras.layers.Dense(nClasses)(path)
        #Softmax activation fct
        output = tf.keras.layers.Softmax()(path)
    
        # If we work with fluorescence, we need to add all the inputs to the final model
        if with_fluorescence:
            model = tf.keras.Model(
                inputs=[in_img0, in_img1, in_fl_avg, in_fl_pha, in_fl_corrMag],
                outputs=output
            )
        else:
            model = tf.keras.Model(inputs=[in_img0, in_img1], outputs=output)
        
        opt = tf.keras.optimizers.Adam(learning_rate=0.00005)
        model.compile(optimizer=opt,
                        loss=tf.keras.losses.CategoricalCrossentropy(),
                        metrics=['accuracy'])
        
    return model

######################################################## DO NOT CHANGE CODE AFTER THIS LINE

In [16]:
def getPrepareFunc(with_fluorescence, label):
    def processFLColumn(x, mapping=lambda x: x):
        x = json.loads(x)
        result = []
        i = 0
        while str(i) in x:
            result.extend(x[str(i)])
            i += 1
        result = [mapping(a) for a in result]
        return result
    def processDf(df):
        df["img0"] = df["img0"].apply(imageFromBlob)
        df["img0"] = df["img0"].apply(lambda x: np.array(x, dtype=np.float))
        df["img0"] = df["img0"].apply(lambda x: x/(2**16-1))
        
        df["img1"] = df["img1"].apply(imageFromBlob)
        df["img1"] = df["img1"].apply(lambda x: np.array(x, dtype=np.float))
        df["img1"] = df["img1"].apply(lambda x: x/(2**16-1))
        if with_fluorescence:
            df["avg"] = df["avg"].apply(
                processFLColumn,
                mapping = lambda x: x/0.5
            )
            df["corrPha"] = df["corrPha"].apply(
                processFLColumn,
                mapping = lambda x: x/np.pi
            )
            df["corrMag"] = df["corrMag"].apply(
                processFLColumn,
                mapping = lambda x: x/0.5
            )
        df["label"] = label
        return df
    return processDf

In [17]:
def datasetFromItList(itList, num_classes, batchsize, first=False):
    df = None
    for i, it in enumerate(itList):
        if first:
            dfTmp : pd.DataFrame = it.getFirst()
        else:
            dfTmp : pd.DataFrame = next(it)
        if df is None:
            df = dfTmp
        else:
            df = df.append(dfTmp)
    print("Randomizing the sample in the set", flush=True)
    df = df.sample(frac=1).reset_index(drop=True) # NB df contains 12000 events(=250[events/(chunk*dataset)]*48[datasets])
    print("Building TF-Dataset", flush=True)
    if with_fluorescence:
        datasetData = tf.data.Dataset.from_tensor_slices(
            (
                np.array(df["img0"].to_list()).reshape((len(df),200,200,1)), 
                np.array(df["img1"].to_list()).reshape((len(df),200,200,1)),
                np.array(df["avg"].to_list()).reshape((len(df), n_fl_configs*6, 1)),
                np.array(df["corrPha"].to_list()).reshape((len(df), n_fl_configs*6, 1)),
                np.array(df["corrMag"].to_list()).reshape((len(df), n_fl_configs*6, 1))
            ))
    else:
        datasetData = tf.data.Dataset.from_tensor_slices(
            (
                np.array(df["img0"].to_list()).reshape((len(df),200,200,1)), 
                np.array(df["img1"].to_list()).reshape((len(df),200,200,1))
            ))
    datasetLabels = tf.data.Dataset.from_tensor_slices(
        (
            tf.one_hot(df["label"].values, num_classes)
        ))

    dataset = tf.data.Dataset.zip((datasetData, datasetLabels)).batch(batchsize) #NB dataset into batch
    return dataset

In [18]:
itList = []
for i, dataset in enumerate(datasetList):
    itList.append(
        BackgroundGenerator(
            dataset,
            with_fl=with_fluorescence,
            prefetch=chunkPrefetch,
            mysqlSettings=mysqlSettings, 
            chunksize=chunksize,
            reserveFirst= testsetMode=="fromFirstChunk",
            prepareFunc=getPrepareFunc(with_fluorescence=with_fluorescence, label=labelList[i])
        )
    )


next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  139
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
next batch elements:  256
Iterator Empty!!
Restarting iterator
next batch elements:  256
n

In [19]:
if testsetMode=="fromDataset":
    testItList = []
    for i, dataset in enumerate(testsetList):
        testItList.append(
            BackgroundGenerator(dataset, mysqlSettings=mysqlSettings, chunksize=chunksize, prepareFunc=getPrepareFunc(with_fluorescence=with_fluorescence, label=labelList[i]))
        )

In [20]:
print("Iterators are built")

Iterators are built


In [21]:
print("Building model...", flush=True)
model = get_compiled_model(NUM_CLASSES, with_fluorescence=with_fluorescence, n_fl_configs=n_fl_configs,strategy = strategy)
print("Model is built:", flush=True)
model.summary()

Building model...
Model is built:
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 200, 200, 1) 0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 200, 200, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 200, 200, 64) 1664        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_10 (Conv2D)              (None, 200, 200, 64) 1664        input_2[0][0]                    
____________________________________________________________

In [22]:

logger = tf.keras.callbacks.TensorBoard(log_dir=f"{tensorboardLogFolder}/{modelName}")
saver = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointFolder)


In [23]:
print("Building testset", flush=True)
if testsetMode=="fromFirstChunk":
    testset = datasetFromItList(itList=itList, batchsize=batchsize, num_classes=NUM_CLASSES, first=True)
if testsetMode=="fromDataset":
    testset = datasetFromItList(itList=testItList, batchsize=batchsize, num_classes=NUM_CLASSES)


Building testset
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data ready
Fetching first data...
Data rea

oss, acc = model.e valuate(testset, verbose=2)<br>
odel.load_weights(f"{checkpointFolder}/{modelName}")

In [24]:
datasetCounter = 0
bestAccuracy = 0.00
bestValAccuracy = 0.00
confusion_final = np.zeros((NUM_CLASSES,NUM_CLASSES)) # NB: added bench.py (sophie)
while True:
    
    # Reset confusion matrix to zeros every 5 loops
    if (datasetCounter % 5 == 0):
        confusion_final = np.zeros((NUM_CLASSES,NUM_CLASSES))
        
    confusion_temp = np.zeros((NUM_CLASSES,NUM_CLASSES))
    print("Prepare next chunk for training...", flush=True)
    trainingSet = datasetFromItList(itList=itList, batchsize=batchsize, num_classes=NUM_CLASSES)
    print(f"Training model on the current TF-Dataset (nr: {datasetCounter})", flush=True)
    history=model.fit(trainingSet, validation_data=testset,  epochs=epochsPerDatasetChunk, verbose=1, callbacks=[logger, saver]) 

    # CONFUSION MATRIX
    Y_pred = model.predict(testset)
    y_pred = np.argmax(Y_pred, axis = 1)
    true_categories = tf.concat([y for x, y in testset], axis=0)
    np_testset = tfds.as_numpy(true_categories)
    np_testset = np.argmax(np_testset,axis=1)
    print('True_class argmax')
    print(np_testset[0:chunksize])
    print('Pred_class_argmax')
    print(y_pred[0:chunksize])   
    print('Confusion Matrix')
    confusion_temp=confusion_matrix(y_pred = y_pred, y_true = np_testset)
    confusion_final=(confusion_final+confusion_temp)
    df_conf_mat = pd.DataFrame(confusion_final, columns = target_names, index = target_names)
    # normalized confusion matrix
    confusion_final_norm = np.around(confusion_final.astype('float') / confusion_final.sum(axis=1)[:, np.newaxis], decimals=2)
    df_conf_mat_norm = pd.DataFrame(confusion_final_norm, columns = target_names, index = target_names)
    print(df_conf_mat_norm)
    figure = plt.figure(figsize=(8, 8))
    sns.heatmap(df_conf_mat_norm, annot=True,cmap=plt.cm.Blues)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    
          
    # Retrieve accuracy of this while loop for storing best weights
    accuraciesOfThisLoop = history.history['accuracy']
    valAccuraciesOfThisLoop = history.history['val_accuracy']
    # Compare the accuarcy and valAcc of the last epoch with bestAccuracy      
    idx = len(accuraciesOfThisLoop)-1
    print("Loop while number: ", datasetCounter, "current bestAccuracy: ", bestAccuracy, "accuraciesOfThisLoop : ", accuraciesOfThisLoop[idx], "valAccuraciesOfThisLoop: ", valAccuraciesOfThisLoop[idx], "acc - val_acc: ", (accuraciesOfThisLoop[idx] - valAccuraciesOfThisLoop[idx]))
    if (bestAccuracy < accuraciesOfThisLoop[idx]) and ((accuraciesOfThisLoop[idx] - valAccuraciesOfThisLoop[idx]) < 0.1):
        
        bestAccuracy = '{:.3f}'.format(round(valAccuraciesOfThisLoop[idx], 3))
        bestAccuracy = float(bestAccuracy)
        bestValAccuracy = '{:.3f}'.format(round(valAccuraciesOfThisLoop[idx], 3))
        bestValAccuracy = float(bestValAccuracy)          
        print("The weights are saved...")        
        model.save_weights(checkpointFolder + "weights.best.hdf5")
        print("The confusion matrix is saved...")       
        figure.savefig(confMatFolder +'Conf_mat_valAcc_'+str(bestValAccuracy)+'.jpg', bbox_inches = 'tight')
        print('\n')
    
    datasetCounter += 1


Prepare next chunk for training...
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
done
fetching data
d

KeyboardInterrupt: 