# Abstract Crowd Count Trainer

In [1]:
# Import all necessary modules
# Arrange by alphabetical order
# imports first, then import as, then from imports.
import cv2
import os
import shutil
import sys
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from scipy.io import loadmat
from tqdm import tqdm
from utils.callbacks import CalculateScore
from utils.generator import CustomGenerator
from utils.utils import gen_density_map_gaussian
%matplotlib inline
plt.ioff()

## GPU check

Before we do anything, we'll make sure that our GPU is being used otherwise it'll be sad to leave your model to train for hours only to come back and find out that your model had been training on CPU the whole time and you're only at epoch 8.

In [2]:
physical_device = tf.config.experimental.list_physical_devices('GPU')
if len(physical_device) == 0:
    print("No GPU detected!")
    sys.exit(-1)

## Parameters and model

| Parameter       | Type      | Comment                                                                                                         |
|-----------------|-----------|-----------------------------------------------------------------------------------------------------------------|
| `dataset_path`  | `string`  | Path to your dataset, i.e. `data/ShanghaiTech/part_B`.                                                          |
| `generate_dmap` | `boolean` | If True, generates the density map of the dataset in `{dataset_path}/density-map` with filename `DMAP_{n}.npy`. |
| `epochs`        | `int`     | Number of epochs to train for.                                                                                  |
| `network_name`  | `string`  | Name of your network that you are training. Used while saving model.                                            |
| `loss_name`     | `string`  | Name of your network's loss function. Use while saving model.                                                   |

In [3]:
# Dataset path should look like that
# of the original dataset, e.g.
#
# ShanghaiTech/
# ├── part_A
# │   ├── test_data
# │   │   ├── ground-truth
# │   │   └── images
# │   └── train_data
# │   │   ├── ground-truth
# │   │   └── images
# └── part_B
#     ├── test_data
#     │   ├── ground-truth
#     │   └── images
#     └── train_data
#         ├── ground-truth
#         └── images
dataset_path = "data/ShanghaiTech/part_B"
generate_dmap = False
epochs = 300
network_name = "CSRNet"
loss_name = "MSE"

## Model compilation

Define your model here. The model should be compiled with your preferred optimiser, loss function, etc.

In [4]:
from keras.optimizers import Adam
from models.CSRNet import CSRNet

optimizer = Adam(lr=1e-5)
model = CSRNet(input_shape=(None, None, 3))
model.compile(optimizer=optimizer, loss='mse')
# model.summary()

## Custom functions for generator

Since people may deal with different datasets, custom input and output reading
functions need to be written for the generator. The data reading function should
accept just one `path` argument.

In [5]:
# List all files in a directory for generator
def list_files(path):
    return [os.path.join(path, x) for x in os.listdir(path)]

# Custom data loading function for generator
def get_input_data(path):
    img = cv2.imread(path)

    # Some pre-processing
    img = img / 255.0
    img[:,:,0] = (img[:,:,0]-0.485) / 0.229
    img[:,:,1] = (img[:,:,1]-0.456) / 0.224
    img[:,:,2] = (img[:,:,2]-0.406) / 0.225

    return img

def get_output_data(path):
    return np.load(path)

generator = CustomGenerator(
    list_files(os.path.join(dataset_path, "train_data", "images")),
    list_files(os.path.join(dataset_path, "train_data", "density-map")), 
    get_input_data, get_output_data
)

## (Optional) Generate density map

This generates numpy files which store the density map value with prefix "DMAP_" stored in `ShanghaiTech/part_{dataset}/{train_or_test}_data/density-map

In [6]:
if generate_dmap:
    data_folders = [dataset_path + folder for folder in ['train_data', 'test_data']]
    
    IMAGE_FOLDER_NAME = "images"
    GT_FOLDER_NAME = "ground-truth"
    DMAP_FOLDER_NAME = "density-map"

    IMAGE_PREFIX = "IMG"
    GT_PREFIX = "GT_IMG"
    DMAP_PREFIX = "DMAP"

    # Create necessary folders
    for folder in data_folders:
        os.makedirs(os.path.join(folder, DMAP_FOLDER_NAME), exist_ok=True)

    for folder in data_folders:
        for file_ in tqdm(os.listdir(os.path.join(folder, IMAGE_FOLDER_NAME))):
            img_path = os.path.join(folder, IMAGE_FOLDER_NAME, file_)
            gt_path = img_path.replace(IMAGE_PREFIX, GT_PREFIX).replace('jpg', 'mat').replace(IMAGE_FOLDER_NAME, GT_FOLDER_NAME)
            dmap_path = img_path.replace(IMAGE_PREFIX, DMAP_PREFIX).replace('jpg', 'npy').replace(IMAGE_FOLDER_NAME, DMAP_FOLDER_NAME)

            image = cv2.imread(img_path)
            points = loadmat(gt_path)['image_info'][0, 0][0, 0][0]
            
            dmap = utils.gen_density_map_gaussian(image, points)
            np.save(dmap_path, dmap)

## Training

Run this and go grab coffee and exercise and play games.

In [7]:
train_gen = generator.train_generator()
test_gen = generator.test_generator()

model.fit(
    train_gen,
    epochs=1,
    verbose=1,
    steps_per_epoch=generator.steps_per_epoch,
    callbacks=[CalculateScore(generator)]
)

Validating...: 400it [00:53,  7.43it/s]
Epoch 1 - MAE: 61.760101318359375, MSE: 96.96749877929688



<tensorflow.python.keras.callbacks.History at 0x7fe6d4700d30>