In [1]:
# Disable TensorFlow debugging info and warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 2: Info and warnings not displayed 

In [2]:
import numpy as np
#import skimage.io
import matplotlib.pyplot as plt
#import pathlib
#import tqdm
#import annotation, misc, hyspec_io, image_render
#import skimage.exposure
import tensorflow as tf
#import pathlib

In [3]:
# Disable GPUs (in case of Tensorflow trying to use GPUs and raising errors)
tf.config.set_visible_devices([], 'GPU')

In [22]:
# Paths (Note: Use double backslash on Windows)
tiles_dataset_path = 'D:\\Larvik_Olberg\\Hyperspectral\\20210825\\OlbergAreaS\\5c_Rad_Georef_SGC_PCA_Tiles\\20210825_Olberg_PCA_TrainValDataset'

In [67]:
# Load dataset (or, rather, pointer to dataset)
dataset = tf.data.experimental.load(tiles_dataset_path)

In [30]:
# Show dataset details
n_tiles = int(dataset.cardinality())
print(f'Dataset specification: {dataset}')
print(f'Number of tiles: {n_tiles}')

Dataset specification: <_LoadDataset element_spec=(TensorSpec(shape=(128, 128, 8), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128), dtype=tf.int32, name=None))>
Number of tiles: 459


In [28]:
shuffled_dataset = dataset.shuffle(buffer_size=n_tiles)

In [13]:
small_dataset = shuffled_dataset.take(3)

In [15]:
for image_tile, label_tile in small_dataset.as_numpy_iterator():
    print(label_tile.shape)
    print(type(label_tile))

(128, 128)
<class 'numpy.ndarray'>
(128, 128)
<class 'numpy.ndarray'>
(128, 128)
<class 'numpy.ndarray'>


In [68]:
def kfold_generator(dataset,k):
    """ Generator for K-fold splitting into training and validation datasets
    
    # Arguments:
    dataset    Tensorflow dataset
    k          Number of folds (see https://scikit-learn.org/stable/modules/cross_validation.html)
    
    # Returns
    training_dataset      Tensorflow dataset
    validation_dataset    Tensorflow dataset
    
    # Notes:
    The generator returns k sets of training and validation datasets when iterated over.
    
    # Example use:
    dataset = tf.data.Dataset.from_tensor_slices((np.arange(9),np.arange(9)%3))
    for data,label in dataset.as_numpy_iterator():
        print(f'Data: {data}, label: {label}')
    for training_dataset, validation_dataset in kfold_generator(dataset,3):
        print('----')
        for data,label in training_dataset.as_numpy_iterator():
            print(f'Training data: {data}, label: {label}')
        for data,label in validation_dataset.as_numpy_iterator():
            print(f'Validation data: {data}, label: {label}')
    """
    n_datapoints = dataset.cardinality()
    dataset = dataset.shuffle(n_datapoints,reshuffle_each_iteration=False)
    samples_per_fold = n_datapoints//k
    for i in range(k):
        validation_dataset = dataset.skip(i*samples_per_fold).take(samples_per_fold)
        # Merge parts before/after validation dataset to create training dataset
        training_dataset = dataset.take(i*samples_per_fold)
        training_dataset = training_dataset.concatenate(dataset.skip((i+1)*samples_per_fold).take((k-i-1)*samples_per_fold))
        yield (training_dataset,validation_dataset)

In [69]:
for training_dataset, validation_dataset in kfold_generator(dataset,3):
    print(f'Size of training dataset: {training_dataset.cardinality()}')
    print(f'Size of validation dataset: {validation_dataset.cardinality()}')
    

Size of training dataset: 306
Size of validation dataset: 153
Size of training dataset: 306
Size of validation dataset: 153
Size of training dataset: 306
Size of validation dataset: 153


In [42]:
a = dataset.take(0)
b = dataset.skip(3).take(3)
c = a.concatenate(b)

In [44]:
print(a.cardinality())
print(b.cardinality())
print(c.cardinality())

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)


In [70]:
dataset = tf.data.Dataset.from_tensor_slices((np.arange(9),np.arange(9)%3))

In [71]:
for data,label in dataset.as_numpy_iterator():
    print(f'Data: {data}, label: {label}')
    #print(label)

Data: 0, label: 0
Data: 1, label: 1
Data: 2, label: 2
Data: 3, label: 0
Data: 4, label: 1
Data: 5, label: 2
Data: 6, label: 0
Data: 7, label: 1
Data: 8, label: 2


In [72]:
for training_dataset, validation_dataset in kfold_generator(dataset,3):
    print('----')
    for data,label in training_dataset.as_numpy_iterator():
        print(f'Training data: {data}, label: {label}')
    for data,label in validation_dataset.as_numpy_iterator():
        print(f'Validation data: {data}, label: {label}')
        

----
Training data: 7, label: 1
Training data: 0, label: 0
Training data: 3, label: 0
Training data: 4, label: 1
Training data: 1, label: 1
Training data: 2, label: 2
Validation data: 6, label: 0
Validation data: 5, label: 2
Validation data: 8, label: 2
----
Training data: 6, label: 0
Training data: 5, label: 2
Training data: 8, label: 2
Training data: 4, label: 1
Training data: 1, label: 1
Training data: 2, label: 2
Validation data: 7, label: 1
Validation data: 0, label: 0
Validation data: 3, label: 0
----
Training data: 6, label: 0
Training data: 5, label: 2
Training data: 8, label: 2
Training data: 7, label: 1
Training data: 0, label: 0
Training data: 3, label: 0
Validation data: 4, label: 1
Validation data: 1, label: 1
Validation data: 2, label: 2
