# Addestramento MobileNet-V2 + ConvLSTM e MobileNet-V3 + BiLSTM



*   MobileNet-V2 + BiLSTM: 0.221144 kWh
*   MobileNEt-V2 + ConvLSTM: 0.174687 kWh



## 1 Operazioni preliminari e Data Pre-Processing (DPP)


In [None]:
# Monto drive Google

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
!pip install codecarbon

In [None]:
import os

def delete_npy_files(directory):

  if not os.path.isdir(directory):
    print(f"{directory} non è una directory valida.")
    return


  count = 0


  for filename in os.listdir(directory):

    file_path = os.path.join(directory, filename)

    if filename.endswith('.npy') and os.path.isfile(file_path):
      os.remove(file_path)
      count += 1
      print(f"File {file_path} cancellato.")
      print(f"{count} file .npy cancellati da {directory}")


path = "/content/gdrive/My Drive/"
delete_npy_files(path)

In [None]:

import os
import cv2
import numpy as np

def count_chunks(videoBasePath):
    """Counts the 16 frames lenght chunks available in a dataset organized in violent and non-violent,
    cam1 and cam2 folders, placed at videoBasePath.

    Parameters
    ----------
    videoBasePath : str
                    Base path of the dataset

    Returns
    -------
    cnt : int
          number of 16 frames lenght chunks in the dataset
    """

    folders = ['Violence', 'NonViolence']
    cams = ['cam1', 'cam2']
    cnt = 0

    for folder in folders:
        for camName in cams:
            path = os.path.join(videoBasePath, folder, camName)

            videofiles = os.listdir(path)
            for videofile in videofiles:
                filePath = os.path.join(path, videofile)
                video = cv2.VideoCapture(filePath)
                numframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(video.get(cv2.CAP_PROP_FPS))
                chunks = numframes//16
                cnt += chunks


    return cnt

def preprocessVideos(videoBasePath, mainDir, featureBasePath, verbose=True):
    """Preproccess all the videos.

    It extracts samples from the videos organised in violent and non-violent, cam1 and cam2 folders.
    The samples and the labels are store on two memmap numpy arrays, called samples.mmap and labels.mmap, at "featureBasePath".
    The numpy array with samples has shape (Chunk #, 16, 224, 224, 3), the labels array has shape (Chunk # 16, 224, 224, 3).
    For the AIRTLab dataset the number of chunks is 3537.

    Parameters
    ----------
    videoBasePath : str
                    Pathname to the base of the video repository, which contains two directories,
                    violent and non-violent, which are divided into cam1 and cam2.
    mainDir: str
             Pathaname to store the files with sample filenames and labels.
    featureBasePath : str
                      it is the pathname of a base where the numpy arrays have to be saved.
    verbose : bool
              if True print debug logs (default True)

    """

    folders = ['Violence', 'NonViolence']
    cams = ['cam1', 'cam2']
    total_chunks = count_chunks(videoBasePath)
    fileNames = []
    npLabels = np.zeros(total_chunks)
    cnt = 0

    for folder in folders:
        for camName in cams:
            path = os.path.join(videoBasePath, folder, camName)

            videofiles = os.listdir(path)
            for videofile in videofiles:
                filePath = os.path.join(path, videofile)
                video = cv2.VideoCapture(filePath)
                numframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = int(video.get(cv2.CAP_PROP_FPS))
                chunks = numframes//16
                if verbose:
                    print(filePath)
                    print("*** [Video Info] Number of frames: {} - fps: {} - chunks: {}".format(numframes, fps, chunks))
                vid = []
                videoFrames = []
                while True:
                    ret, img = video.read()
                    if not ret:
                        break
                    videoFrames.append(cv2.resize(img, (224, 224)))
                vid = np.array(videoFrames, dtype=np.float32)
                filename = os.path.splitext(videofile)[0]
                chunk_cnt = 0
                for i in range(chunks):
                    X = vid[i*16:i*16+16]
                    chunk_cnt += 1
                    filename = folder + '_' + camName + '_' + videofile + '_chunk_' + str(chunk_cnt) + '.npy'
                    fileNames.append(filename)
                    savepath = os.path.join(featureBasePath, filename)
                    np.save(savepath, np.array(X, dtype=np.float32))
                    if folder == 'Violence':
                        npLabels[cnt] = np.int8(1)
                    else:
                        npLabels[cnt] = np.int8(0)
                    cnt += 1

    fileNamesNp = os.path.join(mainDir, 'filenames.npy')
    np.save(fileNamesNp, fileNames)

    labelsNp = os.path.join(mainDir, 'labels.npy')
    np.save(labelsNp, npLabels)

    if verbose:
        print("** Labels **")
        print(npLabels.shape)
        print('\n****\n')
        print("** Samples **")
        print(len(fileNames))
        print('\n****\n')

    del fileNames
    del npLabels

## 2 Esecuzione Data Pre-Processing (DPP)


In [None]:
# Cartelle per memorizzare i campioni (le sottosequenze di 16 frame) da usare
# per training, validation e test.
#!rm -rf airtlabDataset
#!mkdir airtlabDataset
#!mkdir airtlabDataset/features
#!mkdir airtlabDataset/results


paths = ["/airtlabDataset", "/airtlabDataset/features", "/airtlabDataset/results"]
for path in paths:
  if not os.path.isdir(path):
    os.mkdir(path)

In [None]:
preprocessVideos('/content/gdrive/My Drive/Dataset/AirtLab-Dataset', '/airtlabDataset', '/airtlabDataset/features', True)

## 3 Esperimento


In [None]:

import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score, confusion_matrix, classification_report
from keras.callbacks import EarlyStopping
import matplotlib.pylab as plt
import os
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Flatten, ConvLSTM2D, TimeDistributed, Bidirectional, LSTM
#from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as vgg16_preprocess_input
#from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input as mobilenet_v2_preprocess_input
from keras.utils import Sequence

class DataGen(Sequence):
    """ A sequence of data for training/test/validation, loaded from memory
    batch by batch. Extends the tensorflow.keras.utils.Sequence: https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence

    Attributes
    ----------
    base_path : str
                path to the folder including the samples.
    filenames : list<str>
                list of sample filenames.
    labels : list<str>
             list of sample labels.
    batch_size : int
                 batch size to load samples

    """

    def __init__(self, base_path, filenames, labels, batch_size, Preprocess_input):
        self.base_path = base_path
        self.filenames = filenames
        self.labels = labels
        self.batch_size = batch_size
        self.Preprocess_input = Preprocess_input

    def __len__(self):
        return (np.ceil(len(self.filenames) / float(self.batch_size))).astype(int)

    def __getitem__(self, idx):
        batch_x = self.filenames[idx * self.batch_size: (idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]

        return np.array([self.Preprocess_input(np.load(os.path.join(self.base_path, file_name))) for file_name in batch_x]), np.array(batch_y)

def GetPretrainedModel(ModelConstructor, input_shape=(224, 224, 3), print_summary=True):
    """ Builds the VGG16 2D CNN with the Imagenet weights, freezing all layers except layers_to_finetune

    Parameters
    ----------
    ModelConstructor : Callable[[bool], [str], [tuple], Sequential]
                       Function that download the pretrained model, i.e. one of the Keras applications:
                       https://keras.io/api/applications/
                       The arguments are include_top, weights, and input_shape.
    input_shape : tuple
                  The input shape for the pretrained model.
    print_summary : bool
                    If True prints the model summary.

    Returns
    -------
    model : Sequential
          The instantiated model.
    """

    model = ModelConstructor(include_top=False, weights="imagenet", input_shape=input_shape)

    for layer in model.layers:
        layer.trainable = False

    return model

def getLSTMModel(getConvModel, ModelConstructor, pretrained_input_shape=(224, 224, 3), verbose=True):
    """Creates the BiLSTM + fully connected layers end-to-end model object
    with the sequential API: https://keras.io/models/sequential/

    Parameters
    ----------
    getConvModel : Callable[Callable[[bool], [str], [tuple], Sequential], [tuple], [bool], Sequential]
                Function that instantiates the pretrained Convolutional model
                to be applied in a time distributed fashion.
    ModelConstructor : Callable[[bool], [str], [tuple], Sequential]
                       Function that download the pretrained model, i.e. one of the Keras applications:
                       https://keras.io/api/applications/
                       The arguments are include_top, weights, and input_shape.
    input_shape : tuple
                  The input shape for the pretrained model.
    verbose : bool
              if True prints the model summary (default True)

    Returns
    -------
    model : Sequential
            The instantiated model
    """
    model = Sequential()
    model.add(TimeDistributed(getConvModel(ModelConstructor, pretrained_input_shape, verbose), input_shape=(16, 224, 224, 3)))

    model.add(TimeDistributed(Flatten()))
    model.add(Bidirectional(LSTM(units=128, return_sequences=False)))
    #model.add(LSTM(units=128, return_sequences=False))

    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))

    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    if verbose:
        model.summary()
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

def getConvLSTMModel(getConvModel, ModelConstructor, pretrained_input_shape=(224, 224, 3), verbose=True):
    """Creates the BiLSTM + fully connected layers end-to-end model object
    with the sequential API: https://keras.io/models/sequential/

    Parameters
    ----------
    getConvModel : Callable[Callable[[bool], [str], [tuple], Sequential], [tuple], [bool], Sequential]
                Function that instantiates the pretrained Convolutional model
                to be applied in a time distributed fashion.
    ModelConstructor : Callable[[bool], [str], [tuple], Sequential]
                       Function that download the pretrained model, i.e. one of the Keras applications:
                       https://keras.io/api/applications/
                       The arguments are include_top, weights, and input_shape.
    input_shape : tuple
                  The input shape for the pretrained model.
    verbose : bool
              if True prints the model summary (default True)

    Returns
    -------
    model : Sequential
            The instantiated model
    """
    model = Sequential()
    model.add(TimeDistributed(getConvModel(ModelConstructor, pretrained_input_shape, verbose), input_shape=(16, 224, 224, 3)))

    model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3)))

    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))

    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    if verbose:
        model.summary()
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

from keras.callbacks import ModelCheckpoint, EarlyStopping
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split





from codecarbon import track_emissions
@track_emissions(project_name="MV2")
def runEndToEndExperiment(getLSTMModel, getConvModel, ModelConstructor, pretrained_input_shape, Preprocess_input, batchSize, datasetBasePath, npyBasePath, featuresPath, samplesMMapName, lablesMMapName, endToEndModelName, rState, savePath):
    """Runs the tests with end to end models.

    Parameters
    ----------
    getLSTMModel : Callable[[Callable[Callable[[bool], [str], [tuple], Sequential],
                   [tuple], [bool], Sequential]], [Callable[[bool], [str], [tuple],
                   Sequential]], [tuple], [bool], Sequential]
                   Function that instantiates the model to be tested. The parameters
                   are a function that returns the Convolutional model to be tested
                   in a time distributed fashion, and a boolean for verbose output
    getConvModel : Callable[Callable[[bool], [str], [tuple], Sequential], [tuple], [bool], Sequential]
                Function that instantiates the pretrained Convolutional model
                to be applied in a time distributed fashion.
    ModelConstructor : Callable[[bool], [str], [tuple], Sequential]
                       Function that download the pretrained model, i.e. one of the Keras applications:
                       https://keras.io/api/applications/
                       The arguments are include_top, weights, and input_shape.
    input_shape : tuple
                  The input shape for the pretrained model.
    batchSize : int
                Batch size to be used for training and testing
    datasetBasePath : str
                      Pathname to the base of the feature files repository,
                      which contains two directories, violent and non-violent,
                      which are divided into cam1 and cam2.
    npyBasePath : str
                  Pathname where the files with sample filenames and labels are
                  stored.
    featuresPath : str
                  Folder containing the actual files with the samples.
    samplesMMapName : str
                      Name of the file storing the list with sample filenames.
    lablesMMapName : str
                     Name of the file storing the list of sample labels.
    endToEndModelName : str
                        Model name to be used in the AUC-ROC plot.
    rState : int, RandomState instance or None
             Controls the randomness of the training and testing indices produced.
             Pass an int for reproducible output across multiple function calls.
    savePath : str
               Path to the directory where the model and weights will be saved.
    """
    chunk_number = count_chunks(datasetBasePath)
    X = np.load(os.path.join(npyBasePath, samplesMMapName))
    y = np.load(os.path.join(npyBasePath, lablesMMapName))

    nsplits = 5
    cv = StratifiedShuffleSplit(n_splits=nsplits, train_size=0.8, random_state=rState)

    tprs = []
    aucs = []
    scores = []
    sens = np.zeros(shape=(nsplits))
    specs = np.zeros(shape=(nsplits))
    f1Scores = np.zeros(shape=(nsplits))
    mean_fpr = np.linspace(0, 1, 100)
    plt.figure(num=1, figsize=(10, 10))
    i = 1

    for train, test in cv.split(X, y):

        X_train, X_val, y_train, y_val = train_test_split(X[train][:], y[train], test_size=0.125, random_state=rState)

        filepath = os.path.join(npyBasePath, featuresPath)

        training_batch_generator = DataGen(filepath, X_train, y_train, batchSize, Preprocess_input)
        validation_batch_generator = DataGen(filepath, X_val, y_val, batchSize, Preprocess_input)
        test_batch_generator = DataGen(filepath, X[test][:], y[test], batchSize, Preprocess_input)

        model = getLSTMModel(getConvModel, ModelConstructor, pretrained_input_shape, i == 1)


        checkpoint_path = os.path.join(savePath, f"best_model_fold_{i}_epoch_{{epoch:02d}}.h5")
        mc = ModelCheckpoint(checkpoint_path, monitor='val_loss', mode='min', save_best_only=True, save_weights_only=True, verbose=1)

        es = EarlyStopping(monitor='val_loss', mode='min', patience=5, verbose=1, restore_best_weights=True)

        model.fit(x=training_batch_generator, validation_data=validation_batch_generator, epochs=50, verbose=1, callbacks=[es, mc])

        del X_train
        del X_val

        print("Computing scores...")
        evaluation = model.evaluate(x=test_batch_generator)
        scores.append(evaluation)
        print("Computing probs...")
        probas = model.predict(x=test_batch_generator, verbose=1).ravel()

        fpr, tpr, thresholds = roc_curve(y[test], probas)
        tprs.append(np.interp(mean_fpr, fpr, tpr))
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=2, alpha=0.3, label='ROC split %d (AUC = %0.4f)' % (i, roc_auc))

        y_pred = np.round(probas)
        report = classification_report(y[test], y_pred, target_names=['non-violent', 'violent'], output_dict=True)
        sens[i - 1] = report['violent']['recall']
        specs[i - 1] = report['non-violent']['recall']
        f1Scores[i - 1] = report['violent']['f1-score']

        print('confusion matrix split ' + str(i))
        print(confusion_matrix(y[test], y_pred))
        print(classification_report(y[test], y_pred, target_names=['non-violent', 'violent']))
        print('Loss: ' + str(evaluation[0]))
        print('Accuracy: ' + str(evaluation[1]))
        print('\n')


        final_model_path = os.path.join(savePath, f'final_model_fold_{i}.h5')
        model.save(final_model_path)

        i += 1

        del report
        del model

    plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)

    mean_tpr = np.mean(tprs, axis=0)
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.4f $\pm$ %0.4f)' % (mean_auc, std_auc), lw=2, alpha=.8)

    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ 1 std. dev.')

    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.xlabel('False Positive Rate', fontsize=18)
    plt.ylabel('True Positive Rate', fontsize=18)
    plt.title('Cross-Validation ROC of ' + endToEndModelName + ' model', fontsize=18)
    plt.legend(loc="lower right", prop={'size': 15})

    np_scores = np.array(scores)
    losses = np_scores[:, 0:1]
    accuracies = np_scores[:, 1:2]
    print('Losses')
    print(losses)
    print('Accuracies')
    print(accuracies)
    print('Sensitivities')
    print(sens)
    print('Specificities')
    print(specs)
    print('F1-scores')
    print(f1Scores)
    print("Avg loss: {0} +/- {1}".format(np.mean(losses), np.std(losses)))
    print("Avg accuracy: {0} +/- {1}".format(np.mean(accuracies), np.std(accuracies)))
    print("Avg sensitivity: {0} +/- {1}".format(np.mean(sens), np.std(sens)))
    print("Avg specificity: {0} +/- {1}".format(np.mean(specs), np.std(specs)))
    print("Avg f1-score: {0} +/- {1}".format(np.mean(f1Scores), np.std(f1Scores)))

    plt.savefig(endToEndModelName.replace('+', '') + '.pdf')
    plt.show()


In [None]:
from keras.applications import MobileNetV2
from keras.applications.mobilenet_v2 import preprocess_input as mobilenet_v2_preprocess_input

runEndToEndExperiment(getLSTMModel,
                      GetPretrainedModel,
                      MobileNetV2, (224, 224, 3),
                      mobilenet_v2_preprocess_input,
                      8,
                      '/content/gdrive/My Drive/Dataset/AirtLab-Dataset',
                      '/airtlabDataset',
                      'features',
                      'filenames.npy',
                      'labels.npy',
                      'MobileNetV2 + BiLSTM',
                      42,
                      '/content/gdrive/My Drive/MobileNetV2_BiLSTM')


In [None]:
from keras.applications import MobileNetV2
from keras.applications.mobilenet_v2 import preprocess_input as mobilenet_v2_preprocess_input

runEndToEndExperiment(getConvLSTMModel,
                      GetPretrainedModel,
                      MobileNetV2,
                       (224, 224, 3),
                      mobilenet_v2_preprocess_input,
                      8,
                      '/content/gdrive/My Drive/Dataset/AirtLab-Dataset',
                      '/airtlabDataset',
                      'features',
                      'filenames.npy',
                      'labels.npy',
                      'MobileNetV2 + ConvLSTM',
                      42,
                      '/content/gdrive/My Drive/MobileNetV2_ConvLSTM')

In [None]:
from keras import __version__
from keras import backend as K
import sklearn

print('Using Keras version:', __version__, 'backend:', K.backend())

if K.backend() == "tensorflow":
    import tensorflow as tf
    device_name = tf.test.gpu_device_name()
    if device_name == '':
        device_name = "None"
    print('Using TensorFlow version:', tf.__version__, ', GPU:', device_name)

print('The scikit-learn version is {}.'.format(sklearn.__version__))

In [None]:
!nvidia-smi