In [1]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.models import  Model
import numpy as np

from data.id_dataloader import load_cifar10, load_intel_image, load_mnist, load_cifar100
from data.classes import cifar10_classes, mnist_classes, intel_image_classes, cifar100_classes

from models.models import resnet50, wideresnet2810, vgg16, inceptionv3, efficientnetb2
from models.pretrained_models import pretrained_resnet50, pretrained_vgg16

from rscnn_functions.budgeting import train_embeddings, fit_gmm, ellipse, overlaps
from rscnn_functions.bf_encoding_gt import groundtruthmod
from rscnn_functions.belief_mass_betp import belief_to_mass, mass_coeff, final_betp
from rscnn_functions.rscnn_loss import BinaryCrossEntropy

from utils.train_utils import lr_schedule, train_val_split, data_generator, lr_callbacks, save_model_and_weights

2024-05-22 18:28:09.190145: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-22 18:28:09.229809: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    num_gpus = len(gpus)
    print(f"Number of GPUs available: {num_gpus}")
    # Set GPUs to use. For example, limit TensorFlow to use 3 GPUs
    tf.config.experimental.set_visible_devices(gpus[:3], 'GPU')
    
# Create a MirroredStrategy for multi-GPU use
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

In [None]:
# Initializing parameters 
k = 20  #number of number of non-singleton focal sets 
batch_size = 128
epochs = 100

In [None]:
num_classes = {"cifar10": 10, "mnist": 10, "intel_image": 6, "cifar100": 100, "svhn": 10, "fmnist": 10, "kmnist":10}

dataset_loader = {
 "cifar10": load_cifar10, 
 "mnist": load_mnist, 
 "intel_image": load_intel_image, 
 "cifar100": load_cifar100, 
}

models = {
    "resnet50": resnet50, 
    "wideresnet_28_10": wideresnet2810, 
    "vgg16": vgg16,
    "inception_v3": inceptionv3,
    "efficientnet_b2": efficientnetb2
}

pretrained_models = {
    "pretrained_resnet50": pretrained_resnet50, 
    "pretrained_vgg16": pretrained_vgg16,
}

class_list_functions = {
    "cifar10": cifar10_classes,
     "mnist": mnist_classes, 
    "intel_image": intel_image_classes, 
    "cifar100": cifar100_classes, 
}

In [None]:
# Define configurations
selected_dataset = "cifar10"  # Choose the dataset
selected_model = "resnet50"   # Choose the model
batch_size = 128
epochs = 100

# Class list
classes = class_list_functions[selected_dataset]()
print("Classes:", classes)

num_clusters = len(classes)
classes_dict = {c:num for c,num in zip(classes, range(len(classes)))}
classes_dict_inverse = {num:c for c,num in zip(classes, range(len(classes)))}

# Load dataset based on selected_dataset
x_train, y_train, x_test_org, x_test, y_test = dataset_loader[selected_dataset]()

# Infer input_shape based on selected_dataset
input_shape = x_train.shape[1:]

# Train-validation split
x_train, y_train, y_train_one_hot, x_val, y_val, y_val_one_hot = train_val_split(x_train, y_train, num_classes[selected_dataset], val_samples=-10000)

print("Shape of x_train:", x_train.shape)
print("Shape of x_test:", x_test.shape)
print("Shape of x_val:", x_val.shape)

# Learning rate scheduler
callbacks = lr_callbacks(lr_schedule)

# Data augmentation
datagen = data_generator(x_train)

Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


Shape of x_train: (40000, 32, 32, 3)
Shape of x_test: (10000, 32, 32, 3)
Shape of x_val: (10000, 32, 32, 3)


In [None]:
# Multi-GPU run
with strategy.scope():      
    # Create the model based on selected_model
    if selected_model in pretrained_models:
        model = pretrained_models[selected_model](input_shape=input_shape,  num_classes=num_classes[selected_dataset], final_activation='softmax')
    else:
        model = models[selected_model](input_shape=input_shape, num_classes=num_classes[selected_dataset], final_activation='softmax')

    # Compile the model 
    model.compile(loss='categorical_crossentropy',
                optimizer="adam",
                metrics=['accuracy'])

model.summary()

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 up_sampling2d (UpSampling2  (None, 224, 224, 3)       0         
 D)                                                              
                             

In [None]:
history = model.fit(datagen.flow(x_train, y_train_one_hot, batch_size=batch_size),
                    validation_data=(x_val, y_val_one_hot),
                    epochs=1, verbose=1, workers=2)



In [None]:
# # Save model and weights
# save_model_and_weights(model, selected_model, selected_dataset, model_type='CNN')

## BUDGETING

In [None]:
# Extracting features from the penultimate layer
aux_model = Model(model.input, model.layers[-2].output)

# 3D feature space respresentation of class embeddings
train_embedded_tsne = train_embeddings(aux_model, x_train, batch_size)

# Fitting Gaussian Mixture Models (GMM) to individual classes
individual_gms = fit_gmm(classes, train_embedded_tsne, y_train)

# Calculating clusters for each class
regions, means, max_len = ellipse(individual_gms, num_classes[selected_dataset])

# Compute the overlap and choose the sets of classes with highest overlap
new_classes = overlaps(k, classes, num_clusters, classes_dict, regions, means, max_len)

# np.save('new_classes.npy', new_classes)
print(new_classes)



: 

In [None]:
new_classes = np.load('new_classes.npy', allow_pickle=True)

In [None]:
# Belief-encoding of the ground truth
y_train_modified = groundtruthmod(y_train, classes, new_classes, classes_dict_inverse)
y_val_modified = groundtruthmod(y_val, classes, new_classes, classes_dict_inverse)
y_test_modified = groundtruthmod(y_test, classes, new_classes, classes_dict_inverse)

In [None]:
# Multi-GPU run
with strategy.scope():      
    # Create the model based on selected_model
    if selected_model in pretrained_models:
        new_model = pretrained_models[selected_model](input_shape=input_shape,  num_classes=len(new_classes), final_activation='sigmoid')
    else:
        new_model = models[selected_model](input_shape=input_shape, num_classes=len(new_classes), final_activation='sigmoid')

    # Compile the model 
    new_model.compile(loss=BinaryCrossEntropy,
                optimizer="adam",
                metrics=['binary_accuracy'])

new_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 up_sampling2d_1 (UpSamplin  (None, 224, 224, 3)       0         
 g2D)                                                            
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d_1  (None, 2048)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 flatten_1 (Flatten)         (None, 2048)              0         
                                                                 
 dense_2 (Dense)             (None, 1024)              2098

In [None]:
history_new = new_model.fit(datagen.flow(x_train, y_train_modified, batch_size=batch_size),
                    validation_data=(x_val, y_val_modified),
                    epochs=epochs, verbose=1, workers=2,
                   callbacks=callbacks)

2023-11-26 03:53:54.567004: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:786] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorDataset/_1"
op: "TensorDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_INT32
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 1
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\017TensorDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_INT32
        }
      }
    }
  }
}



Learning rate:  0.001
Epoch 1/100
INFO:tensorflow:Collective all_reduce tensors: 218 all_reduces, num_devices = 8, group_size = 8, implementation = CommunicationImplementation.NCCL, num_packs = 1
INFO:tensorflow:Collective all_reduce tensors: 218 all_reduces, num_devices = 8, group_size = 8, implementation = CommunicationImplementation.NCCL, num_packs = 1


2023-11-26 03:55:52.780674: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-11-26 03:55:53.276082: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-11-26 03:55:53.809108: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600

2023-11-26 03:55:54.013763: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:231] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 7.5
2023-11-26 03:55:54.013773: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:234] Used ptxas at ptxas
2023-11-26 03:55:54.013818: W tensorflow/compiler/xla/stream_executor/gpu/redzone_allocator.cc:318] UNIMPLEMENTED: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2023-11-26 03:55:54.298428: I tensorflow/compiler

Learning rate:  0.001
Epoch 2/100
Learning rate:  0.001
Epoch 3/100
Learning rate:  0.001
Epoch 4/100
Learning rate:  0.001
Epoch 5/100
Learning rate:  0.001
Epoch 6/100
Learning rate:  0.001
Epoch 7/100
Learning rate:  0.001
Epoch 8/100
Learning rate:  0.001
Epoch 9/100
Learning rate:  0.001
Epoch 10/100
Learning rate:  0.001
Epoch 11/100
Learning rate:  0.001
Epoch 12/100
Learning rate:  0.001
Epoch 13/100
Learning rate:  0.001
Epoch 14/100
Learning rate:  0.001
Epoch 15/100
Learning rate:  0.001
Epoch 16/100
Learning rate:  0.001
Epoch 17/100
Learning rate:  0.001
Epoch 18/100
Learning rate:  0.001
Epoch 19/100
Learning rate:  0.001
Epoch 20/100
Learning rate:  0.001
Epoch 21/100
Learning rate:  0.001
Epoch 22/100
Learning rate:  0.001
Epoch 23/100
Learning rate:  0.001
Epoch 24/100
Learning rate:  0.001
Epoch 25/100
Learning rate:  0.001
Epoch 26/100
Learning rate:  0.001
Epoch 27/100
Learning rate:  0.001
Epoch 28/100
Learning rate:  0.001
Epoch 29/100
Learning rate:  0.001
Epoch 

In [None]:
history_new = new_model.fit(datagen.flow(x_train, y_train_modified, batch_size=batch_size),
                    validation_data=(x_val, y_val_modified),
                    epochs=epochs, verbose=1, workers=2,
                   callbacks=callbacks)

2023-11-26 05:10:21.002969: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:786] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorDataset/_1"
op: "TensorDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_INT32
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 1
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\022TensorDataset:5754"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_INT32
        }
      }
    }
  }
}



Learning rate:  0.001
Epoch 1/100
Learning rate:  0.001
Epoch 2/100
Learning rate:  0.001
Epoch 3/100
Learning rate:  0.001
Epoch 4/100
Learning rate:  0.001
Epoch 5/100
Learning rate:  0.001
Epoch 6/100
Learning rate:  0.001
Epoch 7/100
Learning rate:  0.001
Epoch 8/100
Learning rate:  0.001
Epoch 9/100
Learning rate:  0.001
Epoch 10/100
Learning rate:  0.001
Epoch 11/100
Learning rate:  0.001
Epoch 12/100
Learning rate:  0.001
Epoch 13/100
Learning rate:  0.001
Epoch 14/100
Learning rate:  0.001
Epoch 15/100
Learning rate:  0.001
Epoch 16/100
Learning rate:  0.001
Epoch 17/100
Learning rate:  0.001
Epoch 18/100
Learning rate:  0.001
Epoch 19/100
Learning rate:  0.001
Epoch 20/100
Learning rate:  0.001
Epoch 21/100
Learning rate:  0.001
Epoch 22/100
Learning rate:  0.001
Epoch 23/100
Learning rate:  0.001
Epoch 24/100
Learning rate:  0.001
Epoch 25/100
Learning rate:  0.001
Epoch 26/100
Learning rate:  0.001
Epoch 27/100
Learning rate:  0.001
Epoch 28/100
Learning rate:  0.001
Epoch 2

In [None]:
# # Save model and weights
# save_model_and_weights(new_model, selected_model, selected_dataset, model_type='RSCNN')