# Create a Bagging Model in order to overcome memory space problems



In [None]:
!pip install open3d
import os
import open3d
from random import sample
import numpy as np
from keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.callbacks import EarlyStopping

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting open3d
  Using cached open3d-0.16.0-cp38-cp38-manylinux_2_27_x86_64.whl (422.5 MB)
Collecting configargparse
  Downloading ConfigArgParse-1.5.3-py3-none-any.whl (20 kB)
Collecting dash>=2.6.0
  Downloading dash-2.8.1-py3-none-any.whl (9.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
Collecting nbformat==5.5.0
  Using cached nbformat-5.5.0-py3-none-any.whl (75 kB)
Collecting addict
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting pyquaternion
  Downloading pyquaternion-0.9.9-py3-none-any.whl (14 kB)
Collecting pillow>=8.2.0
  Downloading Pillow-9.4.0-cp38-cp38-manylinux_2_28_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0
  Downloading dash_html_components-2.0

# MODEL PREPARATION

In [None]:
def model_definition():
  # create model
  model = Sequential()

  # add convolutional layers
  model.add(Conv3D(16, kernel_size=(3, 3, 3), input_shape=(32, 32, 32, 1), activation='relu'))
  model.add(MaxPooling3D(pool_size=(3, 3, 3)))
  model.add(Conv3D(32, kernel_size=(3, 3, 3), activation='relu'))
  model.add(MaxPooling3D(pool_size=(3, 3, 3)))

  # add flatten layer
  model.add(Flatten())

  # add dense layers
  model.add(Dense(64, activation='relu'))
  model.add(Dense(10, activation='softmax'))

  # compile model
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return model

# DATA PREPARATION:
Splitting the train set into 5 set

In [None]:
#Extrapolate the voxels from the file 
def process_off_file(filepath):

    voxel = open3d.io.read_voxel_grid(filepath)
    return voxel


In [None]:
def getKPartitionsFolderized(folderName, K, folderTest):
    
    # dooing for all the models
    baseDIR = os.path.dirname("/content/drive/MyDrive/")
    rootModelsDirName = os.path.join(baseDIR, folderName)

    models = ["bathtub", "bed", "chair", "desk", "dresser",
              "monitor", "night_stand", "sofa", "table", "toilet"]
    # models = ["desk"]

    # PUSHING ALL models in allModels
    allModels = {}

    for modelFolder in models:
        # print(f'current modelFolderName= {modelFolder} and isTestFolder={isTestModel}')
        # PRELIMINARY STEPS for getting the input folder and creating respective output folder

        # 1) Getting INPUT FILES
        inputDIR = os.path.join(
            rootModelsDirName, modelFolder, "test" if folderTest else "train")
        #print(f'Working on {modelFolder} in folder {inputDIR}')
        #print(os.path.isdir(inputDIR))

        # Getting the list of all mesh in the directory 'modelFolder'
        inputModels = []
        INPUT_EXTENTION = ".ply"
        # Iterate directory
        for path in os.listdir(inputDIR):
            # check if current path is an expected file
            if os.path.isfile(os.path.join(inputDIR, path)) and os.path.join(inputDIR, path).endswith(INPUT_EXTENTION):
                # append only the file name
                inputModels.append(os.path.splitext(path)[0])
        # print(2, inputModels)
        allModels[modelFolder] = inputModels

    # DIVIDING allModels into K Partitions
    k_sets_indexes = [i for i in range(0, K)]
    randomsGlobal = [0 for _ in range(0, K)]

    # array di K dictionaries inizializzati a empty
    k_sets = [{} for _ in range(0, K)]
    for key in allModels:
        #print(key, len(allModels[key]))
        for v in allModels[key]:
            # of the bucket
            index = sample(k_sets_indexes, 1)[0]
            if not k_sets[index].keys().__contains__(key):
                k_sets[index][key] = []
            k_sets[index][key].append(v)
            # print(index)
            randomsGlobal[index] += 1

    #print(f"Randoms: {randomsGlobal}")
    return k_sets

# Prepare validation set

In [None]:
#From a set create the train set
def prepare_data(partition, middlefolder):

  train = []
  labels = []

  #Read all the data from a single partition of the dataset
  #Create the filepath for each 
  train_data_raw = []  
  i = -1
  ordered_keys = sorted(partition.keys())
  for key in ordered_keys:
    i = i + 1
    for value in partition[key]:
      filepath = f'/content/drive/MyDrive/Output_ROTATED_v7/{key}/{middlefolder}/{value}.ply'
      points = process_off_file(filepath)
      train_data_raw.append(points)
      labels.append(i)      

  # Transform the Voxel Grid into Numpy Array containing a list of voxels 
  train_data_numpy = []
  
  #First step: Each array to voxels 
  for i in range(len(train_data_raw)):
    train_data_numpy.append(np.asarray(train_data_raw[i].get_voxels()))

  array_train_ready = []
  array_train_normalized = []
  
  #Create the compact 32x32x32 vector
  for i in range(len(train_data_numpy)):
    array_temp = np.zeros((32, 32, 32,1))
    for j in range(len(train_data_numpy[i])):
        array_temp[train_data_numpy[i][j].grid_index[0],train_data_numpy[i][j].grid_index[1],train_data_numpy[i][j].grid_index[2]] = 1
    #Perform normalization
    #mean = np.mean(array_temp, axis = 0)
    #std = np.std(array_temp, axis = 0)
    #array_train_ready.append((array_temp - mean)/std)
    array_train_ready.append(array_temp)

  array_train_ready = np.asarray(array_train_ready) 
  train = np.array(array_train_ready)

  #From list to array
  labels = np.array(labels)

  return train,labels

In [None]:
#Create a unique validation set for every model

# MODEL PREPARATION

In [None]:
#Create now several models using the train/validation data

#Print Labels 
class_folders = os.listdir('/content/drive/MyDrive/Output_ROTATED_v7')
print(class_folders)

#Prepare DATA
folder = 'Output_ROTATED_v7'

#SPLIT THE TRAIN SET
#Sets will contain an array of 5 elements, containing 5 partition of the Train set
#Each of these partition is divided thanks to a map, that contains:
#Key: the name of the class, Value: an array of path

sets = getKPartitionsFolderized(folder, 5, False)

#Split the test set, in order to create a Validation Set and a Test set
test = getKPartitionsFolderized(folder,2,True)

['bathtub', 'bed', 'chair', 'desk', 'dresser', 'monitor', 'night_stand', 'sofa', 'table', 'toilet']


In [None]:
from keras.utils import to_categorical
X_validation, y_validation = prepare_data(test[1], "test")

X_test, y_test = prepare_data(test[0], "test")

y_test = to_categorical(y_test, 10)
y_validation = to_categorical(y_validation, 10) 

In [None]:
def cutout_3d(volume, cutout_size=(8,8,8)):
    # Shape is 32x32x32
    x = np.random.randint(0, 32 - cutout_size[0])
    y = np.random.randint(0, 32 - cutout_size[1])
    z = np.random.randint(0, 32 - cutout_size[2])
    cutout_cube = (x, y, z, cutout_size[0], cutout_size[1], cutout_size[2])
    volume[x:x+cutout_size[0], y:y+cutout_size[1], z:z+cutout_size[2]] = 0
    return volume

In [None]:
idt = np.random.permutation(len(X_test))
X_test, y_test = X_test[idt], y_test[idt]

idt = np.random.permutation(len(X_validation))
X_validation, y_validation = X_validation[idt], y_validation[idt]

In [None]:
Histories = []
#Create Early stop and 30 epochs

model_ = np.empty(5, dtype=object)
#Repeat 5 times
for i in range(5):
  #Prepare data to be executed
  #Extrapolate train data from the bucket
  X_train, y_train = prepare_data(sets[i], "train") 
  y_train = to_categorical(y_train, 10)
  #Shuffle data
  idx = np.random.permutation(len(X_train))
  X_train, y_train = X_train[idx], y_train[idx]
  #Apply CUTOUT
  for j in range(int(len(X_train)*0.25)):
    X_train[j] = cutout_3d(X_train[j])
  #Reapply Shuffle to mix the cutted samples
  idx = np.random.permutation(len(X_train))
  X_train, y_train = X_train[idx], y_train[idx]
  #Compile the model
  model_[i] = model_definition()
  # Create a ModelCheckpoint object that saves the model's weights only when the accuracy improves
  # I need to re-initialize it every time to save the best model of each iteration
  #Save the best model
  checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)
  #Early stop callback
  early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  history = model_[i].fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation), callbacks=[checkpoint, early_stop])
  print("Early Stop called or End of Epochs reaches: Saving the model")
  #Save the model
  model_name = "Model" + str(i) + ".h5"
  os.rename("best_model.h5", model_name)
  Histories.append(history)

Epoch 1/30
Epoch 1: val_loss improved from inf to 1.29753, saving model to best_model.h5
Epoch 2/30
Epoch 2: val_loss improved from 1.29753 to 1.04527, saving model to best_model.h5
Epoch 3/30
Epoch 3: val_loss improved from 1.04527 to 0.92732, saving model to best_model.h5
Epoch 4/30
Epoch 4: val_loss improved from 0.92732 to 0.81240, saving model to best_model.h5
Epoch 5/30
Epoch 5: val_loss did not improve from 0.81240
Epoch 6/30
Epoch 6: val_loss improved from 0.81240 to 0.76848, saving model to best_model.h5
Epoch 7/30
Epoch 7: val_loss did not improve from 0.76848
Epoch 8/30
Epoch 8: val_loss did not improve from 0.76848
Epoch 9/30
Epoch 9: val_loss improved from 0.76848 to 0.69778, saving model to best_model.h5
Epoch 10/30
Epoch 10: val_loss did not improve from 0.69778
Epoch 11/30
Epoch 11: val_loss did not improve from 0.69778
Epoch 12/30
Epoch 12: val_loss did not improve from 0.69778
Epoch 13/30
Epoch 13: val_loss did not improve from 0.69778
Epoch 14/30
Epoch 14: val_loss d

In [None]:
for i in range(5):
  score = model_[i].evaluate(X_validation,y_validation)
  print("The score of Model " + str(i) + " is")
  print(score[1])

The score of Model 0 is
0.7600864768028259
The score of Model 1 is
0.7687320113182068
The score of Model 2 is
0.7384726405143738
The score of Model 3 is
0.7327089309692383
The score of Model 4 is
0.7485590577125549


In [None]:
print(type(model_[i]))

# MODEL PREDICTION
2 possible ways:
 - Highest of "Sum of all the probabilities results"
 - Most voted

In [None]:
#This function take in input the bagging model and the sample that need to make the prediction
#Return the results of the prediction as softmax array
def prediction(models,value,true_label):

  #Create this to pass the value as input for our models
  tensor = []
  tensor.append(value)
  tensor = np.array(tensor)
  #Extrapolate the label from the softmax representation
  label = np.argmax(true_label)
  
  #Doing the prediction on all 5 the models
  y = []
  for i in range(5):
    y.append(models[i].predict(tensor, verbose = 0))
  #Sum the results of all the models to obtain the final result
  results = np.sum(y,axis=0)  
  
  max_index = np.unravel_index(np.argmax(results), results.shape)
  
  #Check if the value was at unanimity
  unanimity = True
  for i in range(len(models)):
    if np.argmax(y[0]) != np.argmax(y[i]):
      unanimity = False
    
  
  #print("True Label: " + str(label))
  #print("Predicted label: " + str(np.argmax(results)))
  #Return True if the prediction was correct, and if the prediction was taken at the unanimity

  return label == np.argmax(results), unanimity
  

In [None]:
prediction(model_,X_test[0],y_test[0])




(True, True)

In [None]:
#Use our function to evaluate boosting :
positive_unanimity = 0
positive = 0
negative_unanimity = 0
negative = 0
for i in range(len(X_test)):
  isCorrect, unanimity = prediction(model_,X_test[i],y_test[i])
  if isCorrect:
    if unanimity:
      positive_unanimity = positive_unanimity + 1
    else:
      positive = positive + 1
  else:
    if unanimity:
      negative_unanimity = negative_unanimity + 1
    else:
      negative = negative + 1

print("Length of test set: " + str(len(X_test)))
print("Positive at unanimity: " + str(positive_unanimity))
print("Positive NOT at unanimity: " + str(positive))
print("Negative at unanimity: " + str(negative_unanimity))
print("Negative NOT at unanimity: " + str(negative))


Length of test set: 1336
Positive at unanimity: 740
Positive NOT at unanimity: 334
Negative at unanimity: 48
Negative NOT at unanimity: 214


In [None]:
total_positive = positive_unanimity + positive
print(total_positive)
positive_percentual = total_positive/len(X_test)
print(positive_percentual)

1074
0.8038922155688623


In [None]:
from google.colab import files
files.download("Model0.h5")
files.download("Model1.h5")
files.download("Model2.h5")
files.download("Model3.h5")
files.download("Model4.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>