IMPORT AND SETTINGS

In [1]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))
import matplotlib.pyplot as plt
import pandas as pd
import cv2 
import itertools
import numpy as np
from sklearn.model_selection import train_test_split
import random

try:
    tf.config.experimental.enable_op_determinism()
    print("✅ Op Determinism Abilitato!")
except AttributeError:
    print("⚠️ Attenzione: La tua versione di TF è troppo vecchia per enable_op_determinism.")

def reset_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
SEEDS = [555, 123,42,7,999]

I0000 00:00:1765551576.621140 3441118 port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
I0000 00:00:1765551576.645494 3441118 cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1765551577.209768 3441118 port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


2.21.0-dev20251210
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
✅ Op Determinism Abilitato!


W0000 00:00:1765551577.864879 3441118 gpu_device.cc:2456] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0a. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


DATASET LOADING AND MODELLING

In [2]:
def load_dataset():
    folder='dataset/images'
    data=[]
    for filename in sorted(os.listdir(folder)):
        img_path=os.path.join(folder,filename)
        img=cv2.imread(img_path) #opencv save in bgr
        data.append({
            'image':img,
            'filename':filename
        })

    print(len(data),'images loaded')
    print('file name is: ',data[0]['filename'], 'shape of the image is:  ', data[0]['image'].shape )

    label=pd.read_csv('dataset/raw/bbx_annotations.csv')
    print(label.shape, label.iloc[0]['filename'])
    #images order is random, and for 1 image you can have more class

    print('we have', len(label['class'].unique()), 'different classes')

    #replace biggger img with half sized ones
    #cv2.imwrite('resize_image/last_img_pre_downsampling.jpg',data[-100]['image'])
    for i,item in enumerate(data):
        if "upper" in item["filename"].lower():
            data[i]['image']=cv2.resize(
                data[i]['image'],
                (data[i]['image'].shape[1]//2,data[i]['image'].shape[0]//2)
                ,interpolation=cv2.INTER_AREA
            )
    #cv2.imwrite('resize_image/last_img_post_downsampling.jpg',data[-100]['image'])
       
    return data, label

FROM THE PURE DATASET TO THE TRAIN AND TEST DATA AND LABEL

In [3]:
def dataset_modelling(dataset,annotation):   
    dataset_df = pd.DataFrame(dataset) 
    label_map={'goalpost':0,
               'ball':1,
               'robot':2,
               'goalspot':3,
               'centerspot':4}
    def get_vector(classes_found):
        vec=np.zeros(5,dtype=int)

        for c in classes_found:
            if c in label_map:
                vec[label_map[c]]=1
        return list(vec)
    
    grouped = annotation.groupby('filename')['class'].apply(list).reset_index()
    grouped['label']=grouped['class'].apply(get_vector)
    final_annotation=grouped[['filename','label']]

    final_dataset= pd.merge(dataset_df, final_annotation[['filename', 'label']], on='filename', how='inner')
    final_dataset.to_csv('csv/temp/final_dataset.csv')
    final_dataset=final_dataset.drop(columns=['filename'])
    df_train, df_test = train_test_split(final_dataset, test_size=0.2, random_state=42)
    x_train = np.array(df_train['image'].tolist()).astype('float32') /255.0
    y_train = np.array(df_train['label'].tolist()).astype('float32')
    
    x_test = np.array(df_test['image'].tolist()).astype('float32') / 255.0
    y_test = np.array(df_test['label'].tolist()).astype('float32')
    return x_train, y_train,x_test,y_test
    

DOUBLING THE DATA 

In [4]:
def augment_train_set(x_train,y_train,aug_type):
    rng = np.random.RandomState(42)    
    if aug_type=='flip':
        x_flipped = np.flip(x_train, axis=2)
        y_flipped = y_train
        x_train_aug = np.concatenate([x_train, x_flipped], axis=0)
        y_train_aug = np.concatenate([y_train, y_flipped], axis=0)
    elif aug_type=='noise':
        noise = rng.normal(loc=0.0, scale=0.05, size=x_train.shape)
        x_noisy = x_train + noise
        x_noisy = np.clip(x_noisy, 0., 1.)
        x_train_aug = np.concatenate([x_train, x_noisy], axis=0)
        y_noise = y_train
        y_train_aug = np.concatenate([y_train, y_noise], axis=0)
    elif aug_type=='both':
        x_flipped = np.flip(x_train, axis=2)
        y_flipped = y_train
        noise = rng.normal(loc=0.0, scale=0.05, size=x_train.shape)
        x_noisy = x_train + noise
        x_noisy = np.clip(x_noisy, 0., 1.)
        y_noise = y_train
        x_train_aug = np.concatenate([x_train,x_flipped, x_noisy], axis=0)
        y_train_aug = np.concatenate([y_train,y_flipped, y_noise], axis=0)

    #avoid to have all noisy data in validation--> shuffle
    indices = np.arange(x_train_aug.shape[0])
    rng.shuffle(indices)
    x_train_aug = x_train_aug[indices]
    y_train_aug = y_train_aug[indices]

    return x_train_aug, y_train_aug

MODEL BUILIDNG, 
kernel dimesnsion, pooling dimension, fc layers dimension, number of conv layer and learning rate have different combination.
instead, i fixed:
pooling stride=2 
pooling type: avg pooling
number of kernel per layer: 16, 32, 64...
last pooling: glob avg pool

In [5]:
def build_model():
    model=models.Sequential()
    model.add(tf.keras.Input(shape=(240,320,3)))
    model.add(layers.RandomFlip("horizontal"))
    model.add(layers.GaussianNoise(0.05))

    for i in range(4):
        kernel_number=16*(2**i)
        model.add(layers.Conv2D(kernel_number,(7,7),activation='relu',padding='same'))
        model.add(layers.AveragePooling2D((3,3),strides=2,padding='same'))
    
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dense(5,activation='sigmoid'))

    #model.summary()
    return model


MAIN

In [6]:
dataset, annotation=load_dataset()
x_train,y_train, x_test, y_test=dataset_modelling(dataset, annotation)


for seed in SEEDS:
    bestf1=0
    all_results = []

    print(f"\n ================== INIZIO CICLO CON SEED: {seed} ================== ")

    
    tf.keras.backend.clear_session()
    reset_seeds(seed)
    model=build_model()

    opt=tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt,
                loss='binary_crossentropy',
                metrics=[tf.keras.metrics.Precision(name='precision'),
                        tf.keras.metrics.Recall(name='recall'),
                        ])
    early_stop=EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
    )
    reset_seeds(seed)
    #class_weights_dict=compute_class_weight(y_train)
    history=model.fit(
        x_train,y_train,
        epochs=120,
        batch_size=16, 
        validation_split=0.2, 
        callbacks=[early_stop]
        )

    y_pred=model.predict(x_test)
    predictions_binary = (y_pred > 0.5).astype(int)
    target_names = ['goalpost','ball','robot','goalspot','centerspot']
    report_dict = classification_report(y_test, predictions_binary, target_names=target_names, output_dict=True)
    f1_macro = report_dict['macro avg']['f1-score']
    

    df_report = pd.DataFrame(report_dict).transpose()
    df_report = df_report.round(2)
    df_report['support'] = df_report['support'].astype(int)
    csv_path = f'csv/report/report_7dyn_{seed}.csv'
    df_report.to_csv(csv_path)




2452 images loaded
file name is:  lower_100056_jpg.rf.ec9852c66b4eee4a185317210a378f16.jpg shape of the image is:   (240, 320, 3)
(8125, 8) upper_604302_jpg.rf.6215ee30a829ec658154eb4d067dfdf5.jpg
we have 5 different classes



W0000 00:00:1765551579.893928 3441118 gpu_device.cc:2456] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0a. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
I0000 00:00:1765551579.990758 3441118 gpu_device.cc:2040] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1348 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 5070, pci bus id: 0000:01:00.0, compute capability: 12.0a


Epoch 1/120


W0000 00:00:1765551591.623091 3441272 bfc_allocator.cc:502] Allocator (GPU_0_bfc) ran out of memory trying to allocate 14.06MiB (rounded to 14745600)requested by op SameWorkerRecvDone
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
I0000 00:00:1765551591.623152 3441272 bfc_allocator.cc:1049] BFCAllocator dump for GPU_0_bfc
I0000 00:00:1765551591.623153 3441272 bfc_allocator.cc:1056] Bin (256): 	Total Chunks: 47, Chunks in use: 47. 11.8KiB allocated for chunks. 11.8KiB in use in bin. 1.1KiB client-requested in use in bin.
I0000 00:00:1765551591.623157 3441272 bfc_allocator.cc:1056] Bin (512): 	Total Chunks: 8, Chunks in use: 8. 4.0KiB allocated for chunks. 4.0KiB in use in bin. 3.2KiB client-requested in use in bin.
I0000 00:00:1765551591.623159 3441272 bfc_allocator.cc:1056] Bin (1024): 	Total Chunks: 1, Chunks in use: 1. 1.2KiB a

KeyboardInterrupt: 