In [1]:
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow.keras.layers as layers

In [2]:
img_size=(240, 320)
img_channels = 3
batch_size=32

In [None]:
model_name = 'simple_covnet_model.h5'

In [19]:
train_size = 1000
validation_size = 500
test_size = 500

# Load the DataSet

Dataset citation:
- From Semi-Supervised to Transfer Counting of Crowds
C. C. Loy, S. Gong, and T. Xiang
in Proceedings of IEEE International Conference on Computer Vision, pp. 2256-2263, 2013 (ICCV)
- Cumulative Attribute Space for Age and Crowd Density Estimation
K. Chen, S. Gong, T. Xiang, and C. C. Loy
in Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, pp. 2467-2474, 2013 (CVPR, Oral)
- Crowd Counting and Profiling: Methodology and Evaluation
C. C. Loy, K. Chen, S. Gong, T. Xiang
in S. Ali, K. Nishino, D. Manocha, and M. Shah (Eds.), Modeling, Simulation and Visual Analysis of Crowds, Springer, vol. 11, pp. 347-382, 2013
- Feature Mining for Localised Crowd Counting
K. Chen, C. C. Loy, S. Gong, and T. Xiang
British Machine Vision Conference, 2012 (BMVC)

In [3]:
dataset_path = '/Users/olove/Library/CloudStorage/OneDrive-Personal/AI datasets/CrowdCounter'

In [4]:
labels_df = pd.read_csv(dataset_path + '/labels.csv')
labels_df['image_name'] = labels_df['id'].map('seq_{:06d}.jpg'.format)
labels_df.drop("id", axis=1,inplace=True)
display(labels_df)

Unnamed: 0,count,image_name
0,35,seq_000001.jpg
1,41,seq_000002.jpg
2,41,seq_000003.jpg
3,44,seq_000004.jpg
4,41,seq_000005.jpg
...,...,...
1995,27,seq_001996.jpg
1996,27,seq_001997.jpg
1997,25,seq_001998.jpg
1998,26,seq_001999.jpg


In [18]:
labels_df = labels_df.sample(frac=1).reset_index(drop=True)
display(labels_df)

Unnamed: 0,count,image_name
0,25,seq_000943.jpg
1,24,seq_000645.jpg
2,20,seq_000533.jpg
3,22,seq_000627.jpg
4,29,seq_001303.jpg
...,...,...
1995,19,seq_000653.jpg
1996,18,seq_000474.jpg
1997,38,seq_000187.jpg
1998,26,seq_000190.jpg


In [4]:
if (train_size+validation_size+test_size) != len(labels_df):
    print('Dataset size is different from specified class sizes')
    exit(1)

training_df = labels_df[:train_size]
validation_df = labels_df[train_size:train_size+validation_size]
test_df = labels_df[train_size+validation_size:]

# Define the model

In [5]:
inputs = keras.Input(shape= img_size + (img_channels,))

## Custom Simple Covnet

Downsizing using strides instead of MaxPolling in order to conserve location data

In [11]:
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, strides=2, activation="relu")(inputs)
x = layers.Conv2D(filters=64, kernel_size=3, strides=2, activation="relu")(x)
x = layers.Conv2D(filters=128, kernel_size=3, strides=2, activation="relu")(x)
x = layers.Flatten()(x)
# x = layers.Dropout(0.5)(x)

# Output Layer

In [12]:
outputs = layers.Dense(128, activation="relu")(x)
outputs = layers.Dropout(0.5)(outputs)
outputs = layers.Dense(1)(outputs)

In [13]:
model = keras.Model(inputs=inputs, outputs=outputs)

In [14]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 240, 320, 3)]     0         
                                                                 
 conv2d_6 (Conv2D)           (None, 119, 159, 32)      896       
                                                                 
 conv2d_7 (Conv2D)           (None, 59, 79, 64)        18496     
                                                                 
 conv2d_8 (Conv2D)           (None, 29, 39, 128)       73856     
                                                                 
 flatten_2 (Flatten)         (None, 144768)            0         
                                                                 
 dense_2 (Dense)             (None, 128)               18530432  
                                                                 
 dropout_3 (Dropout)         (None, 128)               0   

# Train model

In [None]:
model.compile(loss="mse", optimizer="rmsprop", metrics=["mae"])
# TODO: Try mae vs accuracy. mae should be better since we are adjusting it to get closer to the actual value

In [None]:
callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_accuracy", patience=4
    ),
    keras.callbacks.ModelCheckpoint(
        filepath=model_name,
        monitor="val_loss",
        save_best_only=True
    )
]

In [None]:
model.fit(train_images, train_labels,
        epochs=10,
        callbacks = callbacks_list,
        validation_data=(val_images, val_labels)
          )