In [None]:
!pip install codecarbon

# MobileNet-V3 small + convLSTM

In [None]:
# definitions of two end-to-end models + definitions of experiments

import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score, confusion_matrix, classification_report
from keras.callbacks import EarlyStopping
import matplotlib.pylab as plt
import os
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Flatten, ConvLSTM2D, TimeDistributed, Bidirectional, LSTM
from keras.utils import Sequence
from tensorflow.keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
import tensorflow as tf



# Definisce il callback personalizzato
class InferenceTimeCallback(Callback):
    def on_predict_batch_begin(self, batch, logs=None):
        self.start_time = time.time()

    def on_predict_batch_end(self, batch, logs=None):
        self.end_time = time.time()
        self.inference_time = self.end_time - self.start_time
        print(f"Inference time for batch {batch}: {self.inference_time} seconds")




### **Test sui tempi di inferenza per l'ultimo split sul modello ConvLSTM**

In [None]:
# Monto il drive Google

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
# Esporto la MobileNEt-V3 Small

from tensorflow.keras.models import load_model

MobileNetV3Small_ConvLSTM= load_model('/content/gdrive/MyDrive/Modelli/MobileNet_V3_Small/ConvLSTM/final_model_fold_5.h5')


# Stampo il summary
MobileNetV3Small_ConvLSTM.summary()


In [None]:
# Funzioni per il pre processamento dei video e l'inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import time

def load_keras_model(model_path):
    model = load_model(model_path)
    return model

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32') #/ 255.0
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(model, input_data):
    output_data = model.predict(input_data)
    return output_data

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels

def inferenceV3_ConvLSTM_NOquantizzata():
    model_path = '/content/gdrive/MyDrive/Modelli/MobileNet_V3_Small/ConvLSTM/final_model_fold_5.h5'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    model = load_keras_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        start_time = time.time()
        output_data = run_inference(model, video)
        end_time = time.time()
        inference_time = end_time - start_time
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time per batch
    average_inference_time_per_batch = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time per Batch: {average_inference_time_per_batch:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred, target_names=['NonViolent', 'Violent']))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm')

if __name__ == '__main__':
    inferenceV3_ConvLSTM_NOquantizzata()


# Quantizzazione intera 8 bit con fallback a float(utilizzando input/output float predefinito)



*   Peso modello: circa 3.33 MB
*   Accuratezza modello: 87.8 %
*   Auc: circa 90.6 %
*   Tempo inferenza medio per batch: 0.33 s
*   Consumo Energetico medio per 102 secondi di video:  0.002  kW/h







In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os


# Dimensioni dei batch di input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.resize(frame, (width, height))

            frames.append(frame)

            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

# Imposta il convertitore
converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_ConvLSTM)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)

# Abilita Select TF ops e disabilita experimental lowering per tensor list ops
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()

with open('converted_model_ConvLSTM_Quantizzazione_Int_fallbackFloat.tflite', 'wb') as f:
    f.write(tflite_quant_model)


model_quantized_size = os.path.getsize('converted_model_ConvLSTM_Quantizzazione_Int_fallbackFloat.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'converted_model_ConvLSTM_Quantizzazione_Int_fallbackFloat.tflite'  Peso modello: {model_quantized_size_mb} MB")



In [None]:
import tensorflow as tf

def tflite_model_summary(interpreter):
    # Allocate tensors
    interpreter.allocate_tensors()

    # Get input and output tensor details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    all_tensor_details = interpreter.get_tensor_details()

    layers = {}
    # Details for each tensor
    for tensor_detail in all_tensor_details:
        layer_name = tensor_detail['name'].split('/')[0]
        if layer_name not in layers:
            layers[layer_name] = {
                'name': layer_name,
                'output_shape': [],
                'type': [],
                'param_count': 0
            }
        layers[layer_name]['output_shape'].append(tensor_detail['shape'])
        layers[layer_name]['type'].append(str(tensor_detail['dtype']))

    # Calculate the number of parameters for each layer
    total_params = 0
    for tensor_detail in all_tensor_details:
        shape = tensor_detail['shape']
        param_count = 1
        for dim in shape:
            param_count *= dim
        total_params += param_count
        layer_name = tensor_detail['name'].split('/')[0]
        layers[layer_name]['param_count'] += param_count

    # Print the summary
    print("_________________________________________________________________")
    print(" Layer (type)                Output Shape              Param #   ")
    print("=================================================================")
    for layer_name, layer_info in layers.items():
        output_shape_str = ' / '.join([str(shape) for shape in layer_info['output_shape']])
        dtype_str = ' / '.join(layer_info['type'])
        print(f" {layer_name} ({dtype_str})  {output_shape_str}     {layer_info['param_count']}")
    print("=================================================================")
    print(f"Total params: {total_params}")
    print("_______________________________________________________________")

interpreter = tf.lite.Interpreter(model_path="converted_model_ConvLSTM_Quantizzazione_Int_fallbackFloat.tflite")

tflite_model_summary(interpreter)


In [None]:
# Funzioni di pre processamento dei video e inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels


from codecarbon import track_emissions
@track_emissions(project_name="MV3_Small_Inference_ConvLSTM_int8Bit_fallbackFLoat")
def inferenceV3_ConvLSTM_int8Bit_fallbackFloat():
    model_path = '/content/converted_model_ConvLSTM_Quantizzazione_Int_fallbackFloat.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm_int8Bit_fallbackFLoat')

if __name__ == '__main__':
    inferenceV3_ConvLSTM_int8Bit_fallbackFloat()


# Quantizzazione intera 8 bit



*   Peso modello: circa  3.33 MB
*   Accuratezza modello:  51 %
*   Auc: circa 59 %
*   Tempo inferenza medio per batch: 0.32 s
*   Consumo Energetico medio per 102 secondi di video:  0.0018  kW/h

**Attenzione**: Prima della riconversione a Float, alcune probabilità escono negative, questo potrebbe essere dovuto a problemi di overflow o underflow numerici, in alcuni passaggi di quantizzazione e dequantizzazione.


Si ricorda che tale fenomeno non è presente, con la stessa tipologia di quantizzazione, nel BiLSTM.


In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensioni dei batch di input della rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

# Imposta il convertitore
converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_ConvLSTM)

converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()


with open('converted_model_ConvLSTM_Quantizzazione_Int.tflite', 'wb') as f:
    f.write(tflite_quant_model)

model_quantized_size = os.path.getsize('converted_model_ConvLSTM_Quantizzazione_Int.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'converted_model_ConvLSTM_Quantizzazione_Int.tflite'  Peso modello: {model_quantized_size_mb} MB")


In [None]:
import tensorflow as tf

def tflite_model_summary(interpreter):
    # Allocate tensors
    interpreter.allocate_tensors()

    # Get input and output tensor details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    all_tensor_details = interpreter.get_tensor_details()

    layers = {}
    # Details for each tensor
    for tensor_detail in all_tensor_details:
        layer_name = tensor_detail['name'].split('/')[0]
        if layer_name not in layers:
            layers[layer_name] = {
                'name': layer_name,
                'output_shape': [],
                'type': [],
                'param_count': 0
            }
        layers[layer_name]['output_shape'].append(tensor_detail['shape'])
        layers[layer_name]['type'].append(str(tensor_detail['dtype']))

    # Calculate the number of parameters for each layer
    total_params = 0
    for tensor_detail in all_tensor_details:
        shape = tensor_detail['shape']
        param_count = 1
        for dim in shape:
            param_count *= dim
        total_params += param_count
        layer_name = tensor_detail['name'].split('/')[0]
        layers[layer_name]['param_count'] += param_count

    # Print the summary
    print("_________________________________________________________________")
    print(" Layer (type)                Output Shape              Param #   ")
    print("=================================================================")
    for layer_name, layer_info in layers.items():
        output_shape_str = ' / '.join([str(shape) for shape in layer_info['output_shape']])
        dtype_str = ' / '.join(layer_info['type'])
        print(f" {layer_name} ({dtype_str})  {output_shape_str}     {layer_info['param_count']}")
    print("=================================================================")
    print(f"Total params: {total_params}")
    print("_______________________________________________________________")

interpreter = tf.lite.Interpreter(model_path="converted_model_ConvLSTM_Quantizzazione_Int.tflite")

tflite_model_summary(interpreter)


In [None]:
# Funzioni di pre processamento dei video e di inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('int8')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels


#from codecarbon import track_emissions
#@track_emissions(project_name="MV3_Small_Inference_ConvLSTM_int8Bit")
def inferenceV3_BiLSTM():
    model_path = 'converted_model_ConvLSTM_Quantizzazione_Int.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0] / 255.0)

    y_probs = np.array(y_probs)
    print(f"Probabilità {y_probs}")
    y_pred = np.round(y_probs)


    # Verifica i valori unici
    print("Unique values in all_labels:", np.unique(all_labels))
    print("Unique values in y_pred:", np.unique(y_pred))

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm_int8Bit')

if __name__ == '__main__':
    inferenceV3_BiLSTM()





# Quantizzazione Float a 16 bit

**ATTENZIONE**: La quantizzazione avverrà solamente in presenza di GPU. In caso di utilizzo del modello su CPU, avverrà una de-quantizzazione automatica a float-32




*   Peso modello: circa 6 MB
*   Accuratezza modello: 95 %
*   Auc: 99 %
*   Tempo inferenza medio per batch: 0.40 s
*   Consumo Energetico medio per 102 secondi di video:  0.0018  kW/h




In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensione dei batch in input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_ConvLSTM)

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]

# Configurazione della quantizzazione float16
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

converter._experimental_lower_tensor_list_ops = False
tflite_quant_model = converter.convert()

with open('model_quantized_float16_ConvLSTM.tflite', 'wb') as f:
    f.write(tflite_quant_model)


model_quantized_size = os.path.getsize('model_quantized_float16_ConvLSTM.tflite')
model_quantized_size_mb = model_quantized_size  / (1024*1024)
print(f"Quantizzazione completata e modello salvato come 'model_quantized_float16_ConvLSTM.tflite'  , Peso modello: {model_quantized_size_mb} MB")


In [None]:
# Funzioni di pre processamento dei video e di inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels


from codecarbon import track_emissions
@track_emissions(project_name="MV3_Small_Inference_ConvLSTM_Float16")
def inferenceV3_ConvLSTM():
    model_path = '/content/model_quantized_float16_ConvLSTM.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm_Float16')
if __name__ == '__main__':
    inferenceV3_ConvLSTM()


# Quantizzazione con attivazioni a 16 bit e pesi a 8 bit (Funzione sperimentale)

Da problemi, non funziona !!!

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensioni dei batch in input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]


converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_ConvLSTM)  # O carica il modello se necessario
converter.representative_dataset = lambda: generate_video_batches(
    ['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4',
     '/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/NonViolence/cam1/1.mp4'
     ],
    seed=42
)

# Configurazione delle ottimizzazioni e operazioni supportate
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS,
    tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
]
converter._experimental_lower_tensor_list_ops = False


try:
    tflite_quant_model = converter.convert()
    with open('model_quantized_16BitActivation_8BitWeights.tflite', 'wb') as f:
        f.write(tflite_quant_model)

    model_quantized_size = os.path.getsize('model_quantized_16BitActivation_8BitWeights.tflite')
    model_quantized_size_mb = model_quantized_size / (1024 * 1024)
    print(f"Quantizzazione completata e modello salvato come 'model_quantized_16BitActivation_8BitWeights.tflite', Peso modello: {model_quantized_size_mb:.2f} MB")
except Exception as e:
    print("Errore nella conversione:", e)


# Test sui tempi di inferenza per l'ultimo split sul modello BiLSTM

In [None]:
# Esporto la MobileNEt-V3 Small

from tensorflow.keras.models import load_model


MobileNetV3Small_BiLSTM= load_model('/content/gdrive/MyDrive/Modelli/MobileNet_V3_Small/BiLSTM/final_model_fold_5.h5')

MobileNetV3Small_BiLSTM.summary()


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import time

def load_keras_model(model_path):
    model = load_model(model_path)
    return model

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32') #/ 255.0
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    # Divide frames into batches of batch_size
    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]  # Truncate to a multiple of batch_size
        batches = np.split(frames, num_batches)
    else:
        # If there are fewer than batch_size frames, ignore this video
        batches = []

    return batches

def run_inference(model, input_data):
    output_data = model.predict(input_data)
    return output_data

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels

def inferenceV3_BiLstm_NOquantizzata():
    model_path = '/content/gdrive/MyDrive/Modelli/MobileNet_V3_Small/BiLSTM/final_model_fold_5.h5'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)  # Change this to match your model's expected input shape
    batch_size = 16

    model = load_keras_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        start_time = time.time()
        output_data = run_inference(model, video)
        end_time = time.time()
        inference_time = end_time - start_time
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time per batch
    average_inference_time_per_batch = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time per Batch: {average_inference_time_per_batch:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred, target_names=['NonViolent', 'Violent']))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm')

if __name__ == '__main__':
    inferenceV3_BiLstm_NOquantizzata()

# Quantizzazione intera-intera 8 bit

*   Peso modello: circa  28.9 MB
*   Accuratezza modello: 51 %
*   Auc: 74.6 %
*   Tempo inferenza medio per batch:  0.42 s   
*   Consumo Energetico medio per 102 secondi di video:  0.0018  kW/h




Per capire quali operazioni e/o layer possono essere quantizzati a 8 bit, si può far riferimento al codice sorgente presente al link: [quantizzazioni ammesse](https://github.com/tensorflow/model-optimization/blob/master/tensorflow_model_optimization/python/core/quantization/keras/default_8bit/default_8bit_quantize_registry.py)



In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensioni dei batch in input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()


    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_BiLSTM)

converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()

with open('converted_model_BiLSTM_Quantizzazione_Int.tflite', 'wb') as f:
    f.write(tflite_quant_model)

model_quantized_size = os.path.getsize('converted_model_BiLSTM_Quantizzazione_Int.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'converted_model_BiLSTM_Quantizzazione_Int.tflite'  Peso modello: {model_quantized_size_mb} MB")


In [None]:
import tensorflow as tf

def tflite_model_summary(interpreter):
    # Allocate tensors
    interpreter.allocate_tensors()

    # Get input and output tensor details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    all_tensor_details = interpreter.get_tensor_details()

    layers = {}
    # Details for each tensor
    for tensor_detail in all_tensor_details:
        layer_name = tensor_detail['name'].split('/')[0]
        if layer_name not in layers:
            layers[layer_name] = {
                'name': layer_name,
                'output_shape': [],
                'type': [],
                'param_count': 0
            }
        layers[layer_name]['output_shape'].append(tensor_detail['shape'])
        layers[layer_name]['type'].append(str(tensor_detail['dtype']))

    # Calculate the number of parameters for each layer
    total_params = 0
    for tensor_detail in all_tensor_details:
        shape = tensor_detail['shape']
        param_count = 1
        for dim in shape:
            param_count *= dim
        total_params += param_count
        layer_name = tensor_detail['name'].split('/')[0]
        layers[layer_name]['param_count'] += param_count

    # Print the summary
    print("_________________________________________________________________")
    print(" Layer (type)                Output Shape              Param #   ")
    print("=================================================================")
    for layer_name, layer_info in layers.items():
        output_shape_str = ' / '.join([str(shape) for shape in layer_info['output_shape']])
        dtype_str = ' / '.join(layer_info['type'])
        print(f" {layer_name} ({dtype_str})  {output_shape_str}     {layer_info['param_count']}")
    print("=================================================================")
    print(f"Total params: {total_params}")
    print("_______________________________________________________________")

interpreter = tf.lite.Interpreter(model_path="converted_model_BiLSTM_Quantizzazione_Int.tflite")

tflite_model_summary(interpreter)


In [None]:
# Funzioni di pre processamento dei video e di inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('int8')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels



from codecarbon import track_emissions
@track_emissions(project_name="MV3_Small_Inference_ConvLSTM_int8Bit")
def inferenceV3_BiLSTM():
    model_path = 'converted_model_BiLSTM_Quantizzazione_Int.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0] / 255.0)

    y_probs = np.array(y_probs)
    print(f"Probabilità {y_probs}")
    y_pred = np.round(y_probs)


    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_BiLSTM_int8Bit')

if __name__ == '__main__':
    inferenceV3_BiLSTM()



# Quantizzazione intera-intera 8 bit con fallback a Float(Input ed Output sono Float)

*   Peso modello: circa   28.9 MB  
*   Accuratezza modello:  80 %
*   Auc:  83 %
*   Tempo inferenza medio per batch: 0.408  s
*   Consumo Energetico medio per 102 secondi di video:  0.0018  kW/h




In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os


# Dimensioni dei batch di input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_BiLSTM)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()

with open('model_quantized_BiLSTM_8bit_fallback_Float.tflite', 'wb') as f:
    f.write(tflite_quant_model)


model_quantized_size = os.path.getsize('model_quantized_BiLSTM_8bit_fallback_Float.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'model_quantized_BiLSTM_8bit_fallback_Float.tflite'  ,   peso = {model_quantized_size_mb} MB")


In [None]:
# Funzioni di preprocessamento e di inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches


def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels


from codecarbon import track_emissions
@track_emissions(project_name="MV3_Small_Inference_ConvLSTM_int8Bit_FallBackFloat")
def inferenceV3_BiLSTM():
    model_path = '/content/model_quantized_BiLSTM_8bit_fallback_Float.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_BiLStm_int8Bit_fallbackFloat')

if __name__ == '__main__':
    inferenceV3_BiLSTM()


# Quantizzazione Float 16

**Attenzione**: Eseguire solo con la GPU, altrimenti dequantizzerà a Float-32


*   Peso modello: circa  57 MB
*   Accuratezza modello:  90 %
*   Auc:  97.7 %
*   Tempo inferenza medio per batch:  0.278 s
*   Consumo Energetico medio per 102 secondi di video:  0.0017  kW/h



In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensioni dei batch in input alla rete
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_BiLSTM)

converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()

with open('converted_model_BiLSTM_Quantizzazione_Float16.tflite', 'wb') as f:
    f.write(tflite_quant_model)

model_quantized_size = os.path.getsize('converted_model_BiLSTM_Quantizzazione_Float16.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'converted_model_BiLSTM_Quantizzazione_Float16.tflite'  Peso modello: {model_quantized_size_mb} MB")


In [None]:
import tensorflow as tf

def tflite_model_summary(interpreter):
    # Allocate tensors
    interpreter.allocate_tensors()

    # Get input and output tensor details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    all_tensor_details = interpreter.get_tensor_details()

    layers = {}
    # Details for each tensor
    for tensor_detail in all_tensor_details:
        layer_name = tensor_detail['name'].split('/')[0]
        if layer_name not in layers:
            layers[layer_name] = {
                'name': layer_name,
                'output_shape': [],
                'type': [],
                'param_count': 0
            }
        layers[layer_name]['output_shape'].append(tensor_detail['shape'])
        layers[layer_name]['type'].append(str(tensor_detail['dtype']))

    # Calculate the number of parameters for each layer
    total_params = 0
    for tensor_detail in all_tensor_details:
        shape = tensor_detail['shape']
        param_count = 1
        for dim in shape:
            param_count *= dim
        total_params += param_count
        layer_name = tensor_detail['name'].split('/')[0]
        layers[layer_name]['param_count'] += param_count

    # Print the summary
    print("_________________________________________________________________")
    print(" Layer (type)                Output Shape              Param #   ")
    print("=================================================================")
    for layer_name, layer_info in layers.items():
        output_shape_str = ' / '.join([str(shape) for shape in layer_info['output_shape']])
        dtype_str = ' / '.join(layer_info['type'])
        print(f" {layer_name} ({dtype_str})  {output_shape_str}     {layer_info['param_count']}")
    print("=================================================================")
    print(f"Total params: {total_params}")
    print("_______________________________________________________________")

interpreter = tf.lite.Interpreter(model_path="converted_model_BiLSTM_Quantizzazione_Float16.tflite")

tflite_model_summary(interpreter)


In [None]:
# Funzioni di preprocessamento e di inferenza

import os
import numpy as np
import cv2
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import time

def load_tflite_model(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_video(video_path, input_shape, batch_size):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (input_shape[1], input_shape[0]))
        frame = frame.astype('float32')
        frames.append(frame)

    cap.release()
    frames = np.array(frames)

    if len(frames) >= batch_size:
        num_batches = len(frames) // batch_size
        frames = frames[:num_batches * batch_size]
        batches = np.split(frames, num_batches)
    else:
        batches = []

    return batches

def run_inference(interpreter, input_data):
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Calcola il tempo di inferenza
    start_time = time.time()
    interpreter.invoke()
    end_time = time.time()
    inference_time = end_time - start_time

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data, inference_time

def calculate_metrics(y_true, y_pred, y_probs):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    fpr, tpr, thresholds = roc_curve(y_true, y_probs)
    roc_auc = auc(fpr, tpr)

    return accuracy, precision, recall, specificity, roc_auc, fpr, tpr

def plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve'):
    plt.figure()
    plt.plot(fpr, tpr, color='b', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='r', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

def load_videos_from_folder(folder_path, label, input_shape, batch_size):
    videos = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4") or filename.endswith(".avi"):
            video_path = os.path.join(folder_path, filename)
            video_batches = preprocess_video(video_path, input_shape, batch_size)
            videos.extend(video_batches)
            labels.extend([label] * len(video_batches))
    return videos, labels


from codecarbon import track_emissions
@track_emissions(project_name="MV3_Small_Inference_BiLSTM_Float16")
def inferenceV3_BiLSTM():
    model_path = '/content/converted_model_BiLSTM_Quantizzazione_Float16.tflite'
    violence_path = '/content/gdrive/MyDrive/VideoInferenza/Violence'
    nonviolence_path = '/content/gdrive/MyDrive/VideoInferenza/NonViolence'
    input_shape = (224, 224)
    batch_size = 16

    interpreter = load_tflite_model(model_path)

    violence_videos, violence_labels = load_videos_from_folder(violence_path, 1, input_shape, batch_size)
    nonviolence_videos, nonviolence_labels = load_videos_from_folder(nonviolence_path, 0, input_shape, batch_size)

    all_videos = violence_videos + nonviolence_videos
    all_labels = violence_labels + nonviolence_labels

    all_videos = np.array([np.expand_dims(video, axis=0) for video in all_videos])
    all_labels = np.array(all_labels)

    total_inference_time = 0
    y_probs = []
    for video in all_videos:
        output_data, inference_time = run_inference(interpreter, video)
        total_inference_time += inference_time
        y_probs.append(output_data.ravel()[0])

    y_probs = np.array(y_probs)
    y_pred = np.round(y_probs)

    # Calculate metrics
    accuracy, precision, recall, specificity, roc_auc, fpr, tpr = calculate_metrics(all_labels, y_pred, y_probs)

    # Calculate average inference time
    average_inference_time = total_inference_time / len(all_videos)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Specificity: {specificity:.4f}')
    print(f'ROC AUC: {roc_auc:.4f}')
    print(f'Average Inference Time: {average_inference_time:.4f} seconds')

    # Print classification report and confusion matrix
    print('Classification Report:')
    print(classification_report(all_labels, y_pred))

    print('Confusion Matrix:')
    print(confusion_matrix(all_labels, y_pred))

    # Plot ROC curve
    plot_roc_curve(fpr, tpr, roc_auc, title='ROC Curve for tf_model_mv3_convLStm')

if __name__ == '__main__':
    inferenceV3_BiLSTM()


# Solo numeri interi: attivazioni a 16 bit con pesi a 8 bit (sperimentale)

**Attenzione**: Non funziona e da problemi, probabilmente è un errore di qualche funzione di libreria, essendo una funzione ancora sperimentale


In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Dimensioni dei batch di input alle reti
batch_size = 16
height = 224
width = 224
channels = 3

# Funzione per estrarre frame da un video e creare i batch
def generate_video_batches(video_paths, seed=None):
    batches = []
    for video_path in video_paths:
        cap = cv2.VideoCapture(video_path)
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (width, height))
            frames.append(frame)
            if len(frames) == batch_size:
                batches.append(frames)
                frames = []
        cap.release()

    for batch in batches:
        yield [np.array(batch, dtype=np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(MobileNetV3Small_BiLSTM)

converter.representative_dataset = lambda: generate_video_batches(['/content/gdrive/MyDrive/Dataset/AirtLab-Dataset/Violence/cam1/1.mp4'],
                                                                 seed=42)



converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8,
tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False

tflite_quant_model = converter.convert()


with open('converted_model_BiLSTM_Quantizzazione_Float16.tflite', 'wb') as f:
    f.write(tflite_quant_model)

model_quantized_size = os.path.getsize('converted_model_BiLSTM_Quantizzazione_Float16.tflite')
model_quantized_size_mb = model_quantized_size / (1024 * 1024)
print(f"Quantizzazione completata e modello salvato come 'onverted_model_BiLSTM_Quantizzazione_Float16.tflite'  Peso modello: {model_quantized_size_mb} MB")
