In [1]:
!pip install wandb
import wandb
from wandb.keras import WandbMetricsLogger

import keras
from keras.callbacks import Callback
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
from keras.layers import Input, Conv2D, Conv2DTranspose, Add, Activation
from keras.models import Model
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import math
from torch.utils.data import Dataset, DataLoader, TensorDataset
from pathlib import Path
import torch
from tqdm import tqdm
from collections import defaultdict
import os
import zipfile
import random
from datetime import datetime

print('GPU name: ', tf.config.experimental.list_physical_devices('GPU'))

GPU name:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
try:
    from google.colab import drive
    drive.mount('/content/drive/')
    cwd = Path().cwd()

    moritz = "/content/drive/My Drive/Colab/CV23_Data/preprocessed_data/Datasets.zip"
    david = "/content/drive/My Drive/CV23_Data/preprocessed_data/Datasets.zip"
    max = cwd / 'drive' / 'My Drive' / 'cvue23' / 'Datasets.zip'
    waad = cwd / 'drive' / 'My Drive' / 'CV24' / 'Datasets.zip'


    zip_ref = zipfile.ZipFile(max, 'r')
    zip_ref.extractall("/tmp")
    zip_ref.close()

    checkpoint_path = cwd / 'drive' / 'My Drive' / 'cvue23' / 'model_checkpoints'
    hyperparameter_path = cwd / 'drive' / 'My Drive' / 'cvue23' / 'hyperparameter_logs'
    checkpoint_path.mkdir(exist_ok=True)
    datasets_path = cwd.parent.parent / 'tmp'

except:
    cwd = Path().cwd().parent
    checkpoint_path = cwd / 'model_checkpoints'
    hyperparameter_path = cwd / 'hyperparameter_logs'
    checkpoint_path.mkdir(exist_ok=True)
    datasets_path = cwd / 'data'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [3]:
train_set_path = datasets_path / 'training_set'
val_set_original_path = datasets_path / 'validation_set_original'
val_set_crop_path = datasets_path / 'validation_set_cropped'

assert (train_set_path.exists() and val_set_original_path.exists() and val_set_crop_path.exists())

In [6]:
map_label_to_name = ['no_person', 'idle','sitting', 'laying']

class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        basedir: Path,
        batch_size: int = None,
        included_poses: list = None,
        included_trees: list = None,
        shuffle=False,
        only_use_n: int = None,
        random_rotation: bool = False,
        random_flip: bool = False,
    ):
        if not basedir.exists():
            ValueError('Datafolder does not exist. Add it to your drive and try again. Maybe restart the runtime.')

        self.basedir = basedir
        self.batch_size = batch_size
        self.included_poses = [map_label_to_name.index(pose) for pose in included_poses] if included_poses is not None else None
        self.included_trees  = included_trees
        self.filenames = self.__filter(shuffle, only_use_n)
        self.random_flip = random_flip
        self.random_rotation = random_rotation

    def __filter(self, shuffle, only_use_n):

        files = []
        self.pose_distribution = defaultdict(int)
        self.trees_distribution = defaultdict(int)
        self.pose_distribution_filtered = defaultdict(int)
        self.trees_distribution_filtered = defaultdict(int)

        unfiltered = list(self.basedir.iterdir())

        if shuffle:
            random.shuffle(unfiltered)

        total = len(unfiltered)
        if only_use_n is not None:
            total = only_use_n

        for path in tqdm(unfiltered, total=total):

            loaded = np.load(path)
            pose, trees = loaded['pose'], loaded['trees']

            self.pose_distribution[pose.item()] += 1
            self.trees_distribution[trees.item()] += 1

            fname = path.name
            if self.included_poses is not None and pose not in self.included_poses:
                continue

            if self.included_trees is not None and trees not in self.included_trees:
                continue

            files.append(fname)
            self.pose_distribution_filtered[pose.item()] += 1
            self.trees_distribution_filtered[trees.item()] += 1

            if only_use_n is not None and len(files) == only_use_n:
                break

        return files

    def load(self, path):
        loaded = np.load(path)
        x = loaded['x'] / 255
        y = loaded['y'] / 255
        return x, y

    def __len__(self):
        if self.batch_size is None:
            return len(self.filenames)

        return math.ceil(len(self.filenames) / self.batch_size)

    def __getitem__(self, idx):

        if self.batch_size is None:
            batch = self.filenames
        else:
            low = idx * self.batch_size
            high = min(low + self.batch_size, len(self.filenames))
            batch = self.filenames[low:high]

        X, Y = [],[]
        for fname in batch:
            x,y = self.load(self.basedir / fname)

            flip = self.random_flip and bool(random.getrandbits(1))

            x = np.fliplr(x) if flip else x
            y = np.fliplr(y) if flip else y
            X.append(x)
            Y.append(y)

        return np.stack(X), np.stack(Y)

    def print_info(self):
        print()
        shape = self.load(self.basedir / self.filenames[0])[0].shape
        print(f'{len(self.filenames)} samples with shape : {shape}')

        print(f'Pose distribution total')
        ("{:<15} {:<15}".format('pose', 'number of samples'))
        for key, value in self.pose_distribution.items():
            print("{:<15} {:<15}".format(map_label_to_name[key], value))
        print()
        print(f'Pose distribution filtered')
        ("{:<15} {:<15}".format('pose', 'number of samples'))
        for key, value in self.pose_distribution_filtered.items():
            print("{:<15} {:<15}".format(map_label_to_name[key], value))

        print()
        print(f'Trees distribution total')
        print("{:<15} {:<15}".format('num trees per ha', 'number of samples'))

        for key, value in self.trees_distribution.items():
            print("{:<15} {:<15}".format(key, value))

        print()
        print(f'Trees distribution filtered')
        print("{:<15} {:<15}".format('num trees per ha', 'number of samples'))

        for key, value in self.trees_distribution_filtered.items():
            print("{:<15} {:<15}".format(key, value))


In [66]:
print('Validation Dataset')
validation_data = DataGenerator(
    val_set_crop_path,
    only_use_n=128,
    shuffle=True,
)
validation_data.print_info()

Validation Dataset


 99%|█████████▉| 127/128 [00:00<00:00, 1770.58it/s]


128 samples with shape : (128, 128, 6)
Pose distribution total
sitting         37             
laying          43             
idle            31             
no_person       17             

Pose distribution filtered
sitting         37             
laying          43             
idle            31             
no_person       17             

Trees distribution total
num trees per ha number of samples
0               13             
100             76             
200             39             

Trees distribution filtered
num trees per ha number of samples
0               13             
100             76             
200             39             





In [67]:
print('Training Dataset')

train_data = DataGenerator(
    train_set_path,
    batch_size=128,
    included_poses=['idle','sitting', 'laying'],
    shuffle=True,
    random_flip=True,
)
train_data.print_info()

Training Dataset


100%|██████████| 27281/27281 [00:15<00:00, 1757.02it/s]


24632 samples with shape : (128, 128, 6)
Pose distribution total
idle            8182           
sitting         8237           
no_person       2649           
laying          8213           

Pose distribution filtered
idle            8182           
sitting         8237           
laying          8213           

Trees distribution total
num trees per ha number of samples
0               2807           
200             8133           
100             16341          

Trees distribution filtered
num trees per ha number of samples
0               2552           
200             7371           
100             14709          





In [68]:
def encoder(x, num_features, num_layers, residual_every=2):
    x = Conv2D(num_features, kernel_size=3, strides=2, padding='same', activation='relu')(x)

    # Save the output of conv layers at even indices
    residuals = []

    # Encoder
    for i in range(num_layers - 1):
        x = Conv2D(num_features, kernel_size=3, padding='same', activation='relu')(x)
        if (i + 1) % residual_every == 0:
            residuals.append(x)

    return x, residuals

def decoder(x, num_features, num_layers, residuals, residual_every=2):

    # Decoder
    for i in range(num_layers - 1):
        x = Conv2DTranspose(num_features, kernel_size=3, padding='same')(x)

        if (i + 1 + num_layers) % residual_every == 0 and residuals:
            res = residuals.pop()
            x = Add()([x, res])

        x = Activation('relu')(x)

    if residuals: raise ValueError('There are unused residual connections')

    # create 1-channel output
    x = Conv2DTranspose(1, kernel_size=3, strides=2, padding='same')(x)

    return x

def REDNet(num_layers, num_features, channel_size):
    '''Model definition with keras functional layers api'''

    inputs = Input(shape=(None, None, channel_size))

    x, residuals = encoder(inputs, num_features, num_layers)

    x = decoder(x, num_features, num_layers, residuals)

    # Add input residual, needed to do 1x1 conv to adapt channels
    residual = Conv2DTranspose(1, kernel_size=1, padding='same')(inputs)
    outputs = Add()([x, residual])
    outputs = Activation('relu')(outputs)

    # Create model
    model = Model(inputs=inputs, outputs=outputs, name=f'REDNet{num_layers*2}')
    return model

In [70]:
# compile the model
model = REDNet(
    num_layers=11,
    num_features=64,
    channel_size=6
)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9,
    staircase=True
)
opt = keras.optimizers.Adam(
    learning_rate=lr_schedule
)
loss = keras.losses.MeanSquaredError( reduction="sum_over_batch_size")

model.compile(loss=loss,optimizer=opt)

In [71]:
checkpoint_dir = Path.cwd() / 'checkpoints_from_scratch'

#model.load_weights((Path.cwd() / 'checkpoints' / '04-0.01.keras'))

In [72]:
callbacks = [
    ModelCheckpoint((checkpoint_dir / 'ep{epoch:02d}_loss{val_loss:.4f}.keras').as_posix(), save_best_only=True),
    CSVLogger(checkpoint_dir / 'training_max.csv', append=True),
    #keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0005),
    #keras.callbacks.ProgbarLogger(count_mode='steps'),
    #WandbMetricsLogger(),
]
# train on the dataset
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=20,
    callbacks=callbacks,
    shuffle=True,
    verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [75]:
# train on the dataset
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=10,
    callbacks=callbacks,
    shuffle=True,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [76]:
model.optimizer.learning_rate

<tf.Variable 'current_learning_rate:0' shape=() dtype=float32, numpy=0.00059048994>

In [83]:
# train on the dataset
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=40,
    callbacks=callbacks,
    shuffle=True,
    verbose=1,
    initial_epoch=31,
)

Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [80]:
model.optimizer.learning_rate

<tf.Variable 'current_learning_rate:0' shape=() dtype=float32, numpy=0.0004782968>

In [82]:
opt = keras.optimizers.Adam(
    learning_rate=0.0001
)
loss = keras.losses.MeanSquaredError( reduction="sum_over_batch_size")

model.compile(loss=loss,optimizer=opt)

In [84]:
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=60,
    callbacks=callbacks,
    shuffle=True,
    verbose=1,
    initial_epoch=49,
)

Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


now train on the full size validation, but with a split

In [100]:
full_train_data = DataGenerator(
    val_set_original_path,
    shuffle=False,
    random_flip=True,
    batch_size=32,
    only_use_n=4815-128
)

100%|█████████▉| 4686/4687 [00:02<00:00, 1816.27it/s]


In [101]:
valset = DataGenerator(
    val_set_crop_path,
    shuffle=False,
    random_flip=True,
)
valset.filenames = valset.filenames[-128:]

100%|██████████| 4815/4815 [00:02<00:00, 1791.33it/s]


In [102]:
history = model.fit(
    full_train_data,
    validation_data=valset,
    epochs=70,
    callbacks=callbacks,
    shuffle=True,
    verbose=1,
    initial_epoch=60,
)

Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [103]:
model.save(checkpoint_dir/'with_retrain.model.keras')