# Create a Bagging Model in order to overcome memory space problems



In [None]:
!pip install open3d
import os
import open3d
from random import sample
import numpy as np
from keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras.layers import Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting open3d
  Downloading open3d-0.17.0-cp39-cp39-manylinux_2_27_x86_64.whl (420.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m420.5/420.5 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting addict
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting ipywidgets>=8.0.4
  Downloading ipywidgets-8.0.5-py3-none-any.whl (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 KB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting configargparse
  Downloading ConfigArgParse-1.5.3-py3-none-any.whl (20 kB)
Collecting pyquaternion
  Downloading pyquaternion-0.9.9-py3-none-any.whl (14 kB)
Collecting dash>=2.6.0
  Downloading dash-2.9.1-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
Collecting pillow>=9.3.0
  Downloading Pillow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# MODEL PREPARATION

In [None]:
def model_definition():
  # create model
  model = Sequential()

  # add convolutional layers
  model.add(Conv3D(16, kernel_size=(3, 3, 3), padding='same', input_shape=(32, 32, 32, 1), activation='relu'))
  model.add(Dropout(0.2))#Dropout
  model.add(MaxPooling3D(pool_size=(3, 3, 3)))

  model.add(Conv3D(32, kernel_size=(3, 3, 3), padding='same', activation='relu'))
  model.add(Dropout(0.2))#Dropout
  model.add(MaxPooling3D(pool_size=(3, 3, 3)))
  
  model.add(Conv3D(64, kernel_size=(3, 3, 3), padding='same', input_shape=(32, 32, 32, 1), activation='relu'))
  model.add(Dropout(0.2))#Dropout
  model.add(MaxPooling3D(pool_size=(3, 3, 3)))


  # add flatten layer
  model.add(Flatten())

  # add dense layers
  model.add(Dense(64, activation='relu'))
  model.add(Dropout(0.2))#Dropout
  model.add(Dense(10, activation='softmax'))

  # compile model
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return model

# DATA PREPARATION:
Splitting the train set into 5 set

In [None]:
#Extrapolate the voxels from the file 
def process_off_file(filepath):

    voxel = open3d.io.read_voxel_grid(filepath)
    return voxel


In [None]:
def getKPartitionsFolderized(folderName, K, folderTest):
    
    # dooing for all the models
    baseDIR = os.path.dirname("/content/drive/MyDrive/")
    rootModelsDirName = os.path.join(baseDIR, folderName)

    models = ["bathtub", "bed", "chair", "desk", "dresser",
              "monitor", "night_stand", "sofa", "table", "toilet"]
    # models = ["desk"]

    # PUSHING ALL models in allModels
    allModels = {}

    for modelFolder in models:
        # print(f'current modelFolderName= {modelFolder} and isTestFolder={isTestModel}')
        # PRELIMINARY STEPS for getting the input folder and creating respective output folder

        # 1) Getting INPUT FILES
        inputDIR = os.path.join(
            rootModelsDirName, modelFolder, "test" if folderTest else "train")
        #print(f'Working on {modelFolder} in folder {inputDIR}')
        #print(os.path.isdir(inputDIR))

        # Getting the list of all mesh in the directory 'modelFolder'
        inputModels = []
        INPUT_EXTENTION = ".ply"
        # Iterate directory
        for path in os.listdir(inputDIR):
            # check if current path is an expected file
            if os.path.isfile(os.path.join(inputDIR, path)) and os.path.join(inputDIR, path).endswith(INPUT_EXTENTION):
                # append only the file name
                inputModels.append(os.path.splitext(path)[0])
        # print(2, inputModels)
        allModels[modelFolder] = inputModels

    # DIVIDING allModels into K Partitions
    k_sets_indexes = [i for i in range(0, K)]
    randomsGlobal = [0 for _ in range(0, K)]

    # array di K dictionaries inizializzati a empty
    k_sets = [{} for _ in range(0, K)]
    for key in allModels:
        #print(key, len(allModels[key]))
        for v in allModels[key]:
            # of the bucket
            index = sample(k_sets_indexes, 1)[0]
            if not k_sets[index].keys().__contains__(key):
                k_sets[index][key] = []
            k_sets[index][key].append(v)
            # print(index)
            randomsGlobal[index] += 1

    #print(f"Randoms: {randomsGlobal}")
    return k_sets

# Prepare validation set

In [None]:
#From a set create the train set
def prepare_data(partition, middlefolder):

  train = []
  labels = []

  #Read all the data from a single partition of the dataset
  #Create the filepath for each 
  train_data_raw = []  
  i = -1
  ordered_keys = sorted(partition.keys())
  for key in ordered_keys:
    i = i + 1
    for value in partition[key]:
      filepath = f'/content/drive/MyDrive/ModelNet10_Voxel/{key}/{middlefolder}/{value}.ply'
      points = process_off_file(filepath)
      train_data_raw.append(points)
      labels.append(i)      

  # Transform the Voxel Grid into Numpy Array containing a list of voxels 
  train_data_numpy = []
  
  #First step: Each array to voxels 
  for i in range(len(train_data_raw)):
    train_data_numpy.append(np.asarray(train_data_raw[i].get_voxels()))

  array_train_ready = []
  array_train_normalized = []
  
  #Create the compact 32x32x32 vector
  for i in range(len(train_data_numpy)):
    array_temp = np.zeros((32, 32, 32,1))
    for j in range(len(train_data_numpy[i])):
        array_temp[train_data_numpy[i][j].grid_index[0],train_data_numpy[i][j].grid_index[1],train_data_numpy[i][j].grid_index[2]] = 1
    #Perform normalization
    #mean = np.mean(array_temp, axis = 0)
    #std = np.std(array_temp, axis = 0)
    #array_train_ready.append((array_temp - mean)/std)
    array_train_ready.append(array_temp)

  array_train_ready = np.asarray(array_train_ready) 
  train = np.array(array_train_ready)

  #From list to array
  labels = np.array(labels)

  return train,labels

In [None]:
#Create a unique validation set for every model

# MODEL PREPARATION

In [None]:
#Create now several models using the train/validation data

#Print Labels 
class_folders = os.listdir('/content/drive/MyDrive/ModelNet10_Voxel')
print(class_folders)

#Prepare DATA
folder = 'ModelNet10_Voxel'

#SPLIT THE TRAIN SET
#Sets will contain an array of 5 elements, containing 5 partition of the Train set
#Each of these partition is divided thanks to a map, that contains:
#Key: the name of the class, Value: an array of path

#1 ONLY
sets = getKPartitionsFolderized(folder, 1, False)

#Split the test set, in order to create a Validation Set and a Test set
test = getKPartitionsFolderized(folder,2,True)

['night_stand', 'bathtub', 'monitor', 'dresser', 'table', 'bed', 'chair', 'sofa', 'toilet', 'desk']


In [None]:
from keras.utils import to_categorical

X_validation, y_validation = prepare_data(test[1], "test")
y_validation = to_categorical(y_validation, 10) 

In [None]:
def cutout_3d(volume, cutout_size=(8,8,8)):
    # Shape is 32x32x32
    x = np.random.randint(0, 32 - cutout_size[0])
    y = np.random.randint(0, 32 - cutout_size[1])
    z = np.random.randint(0, 32 - cutout_size[2])
    cutout_cube = (x, y, z, cutout_size[0], cutout_size[1], cutout_size[2])
    volume[x:x+cutout_size[0], y:y+cutout_size[1], z:z+cutout_size[2]] = 0
    return volume

In [None]:
idt = np.random.permutation(len(X_validation))
X_validation, y_validation = X_validation[idt], y_validation[idt]

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool3d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool3d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv3d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool3d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64, 64)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(64, 10)
        #self.softmax = nn.Softmax(dim=10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        #x = self.softmax(x)
        return x


In [None]:
#MODEL IN PYTORCH

import torch
import torch.nn as nn
import torch.nn.functional as F

class ModelDefinition(nn.Module):
    def __init__(self):
        super(ModelDefinition, self).__init__()
        self.conv1 = nn.Conv3d(1, 32, kernel_size=(3, 3, 3), padding=1)
        self.dropout1 = nn.Dropout(0.2)
        self.pool1 = nn.MaxPool3d( kernel_size=(3, 3, 3))
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=1)
        self.dropout2 = nn.Dropout(0.2)
        self.pool2 = nn.MaxPool3d( kernel_size=(3, 3, 3))
        self.conv3 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=1)
        self.dropout3 = nn.Dropout(0.2)
        self.pool3 = nn.MaxPool3d(kernel_size=(3, 3, 3))
        #self.flatten = nn.Flatten(1,4)
        self.fc1 = nn.Linear(128, 128)
        self.dropout4 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.dropout1(x)
        x = self.pool1(x)
      
        x = F.relu(self.conv2(x))
        x = self.dropout2(x)
        x = self.pool2(x)

        x = F.relu(self.conv3(x))
        x = self.dropout3(x)
        x = self.pool3(x)
      
        x = x.view(x.size(0)) #[64,1]
        #x = self.flatten(x)
   
        x = F.relu(self.fc1(x))
       
        x = self.dropout4(x)
        x = self.fc2(x)
        
        return x

In [None]:
X_train, y_train = prepare_data(sets[0], "train") 
y_train = to_categorical(y_train, 10)

In [None]:
#X_train.shape
X_train = np.transpose(X_train, (0, 4, 1, 2,3))

In [None]:
X_validation = np.transpose(X_validation, (0,4,1,2,3))

In [None]:
#Preparing the data for the numpy


X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
#print(X_train_tensor.shape)
num_samples = X_train_tensor.shape[0]

X_validation_tensor = torch.from_numpy(X_validation).float()
y_validation_tensor = torch.from_numpy(y_validation).float()
num_validation = X_validation_tensor.shape[0]
print(num_validation)

449


In [None]:
from torchsummary import summary
network = ModelDefinition()
summary(network, (1,32,32,32))
print(network)

RuntimeError: ignored

In [None]:
import torch
import torch.optim as optim
num_models = 1
num_samples = X_train_tensor.shape[0]
Histories = []
model_ = np.empty(num_models, dtype=object)
#Repeat 5 times
for i in range(num_models):
  #Prepare data to be executed
  #Extrapolate train data from the bucket
  #X_train, y_train = prepare_data(sets[i], "train") 
  #y_train = to_categorical(y_train, 10)
  #Shuffle data
  #idx = np.random.permutation(len(X_train))
  #X_train, y_train = X_train[idx], y_train[idx]
  #Apply CUTOUT
  #for j in range(int(len(X_train_tensor)*0.25)):
  #  X_train[j] = cutout_3d(X_train[j])
  #Reapply Shuffle to mix the cutted samples
  #idx = np.random.permutation(len(X_train))
  #X_train, y_train = X_train[idx], y_train[idx]
  #Compile the model
  model_[i] = CNN()
  # Create a ModelCheckpoint object that saves the model's weights only when the accuracy improves
  # I need to re-initialize it every time to save the best model of each iteration
  #Save the best model
  #with tf.device('/device:GPU:0'):
  #checkpoint = torch.utils.ModelCheckpoint('best_model.pth', save_best_only=True, mode='max', verbose=True)
  #Early stop callback
  #early_stop = torch.nn.EarlyStopping(patience=10, verbose=False, mode='max')
  #history = model_[i].fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation), callbacks=[ early_stop, checkpoint])
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model_[i].parameters(), lr=0.01)
  patience = 10
  train_losses = []
  val_losses = []
  best_acc = 0.0
  counter = 0

  for epoch in range(30):
      # Train
      model_[i].train()
      train_loss = 0.0
      #X_train_tensor = torch.from_numpy(X_train).float()

      for inputs, labels in zip(X_train_tensor, y_train_tensor):
          #print(inputs.shape)
          optimizer.zero_grad()
          outputs = model_[i](inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()
          train_loss += loss.item()

      #print(train_loss)
      #print(num_samples)
      train_losses.append(train_loss / num_samples)

      # Validate
      model_[i].eval()
      val_loss = 0.0
      correct = 0
      total = 0
      with torch.no_grad():
          for inputs, labels in zip(X_validation_tensor, y_validation_tensor):
              outputs = model_[i](inputs)
              #print("OUTPUTS" + str(outputs.shape))
              loss = criterion(outputs, labels)
              val_loss += loss.item()
              _, predicted = torch.max(outputs.data, 0)
              total += labels.size(0)
              correct += (predicted == labels).sum().item()

      val_loss /= num_validation
      val_losses.append(val_loss)
      acc = 100. * correct / total

      if acc > best_acc:
          counter = 0
          best_acc = acc
          torch.save(model_[i].state_dict(), 'best_model.pth')
          #early_stop.best = acc
      else:
        counter = counter + 1
        if counter >= patience:
            print("Early stopping")
            break

  print("Early Stop called or End of Epochs reaches: Saving the model")
  #Save the model
  model_name = "Model" + str(i) + ".pth"
  os.rename("best_model.h5", model_name)
  #Histories.append(history)

ValueError: ignored

In [None]:
for i in range(5):
  score = model_[i].evaluate(X_validation,y_validation)
  print("The score of Model " + str(i) + " is")
  print(score[1])

In [None]:
import matplotlib.pyplot as plt

#plot the train/validation loss and accuracy
def plot_graphs(history, metric):
    
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])


plt.figure(figsize=(16, 6))
plt.subplot(1, 2, 1)
plot_graphs(Histories[0], 'accuracy')
plt.subplot(1, 2, 2)
plot_graphs(Histories[0], 'loss')

# MODEL PREDICTION
2 possible ways:
 - Highest of "Sum of all the probabilities results"
 - Most voted

In [None]:
X_test, y_test = prepare_data(test[0], "test")
y_test = to_categorical(y_test, 10)

idt = np.random.permutation(len(X_test))
X_test, y_test = X_test[idt], y_test[idt]

In [None]:
#This function take in input the bagging model and the sample that need to make the prediction
#Return the results of the prediction as softmax array
def prediction(models,value,true_label):

  #Create this to pass the value as input for our models
  tensor = []
  tensor.append(value)
  tensor = np.array(tensor)
  #Extrapolate the label from the softmax representation
  label = np.argmax(true_label)
  
  #Doing the prediction on all 5 the models
  y = []
  for i in range(5):
    y.append(models[i].predict(tensor, verbose = 0))
  #Sum the results of all the models to obtain the final result
  results = np.sum(y,axis=0)  
  
  max_index = np.unravel_index(np.argmax(results), results.shape)
  
  #Check if the value was wrong/correct at unanimity
  isFinalDecisionCorrect = (label == np.argmax(results))

  
  if(isFinalDecisionCorrect):
    #Final decision is correct, check if 
    unanimity = True
    for i in range(len(models)):
      if label != np.argmax(y[i]):
        unanimity = False
  else:
    #Check if it is wrong at unanimity    
      unanimity = False
      for i in range(len(models)):
        if label == np.argmax(y[i]):
          unanimity = True
  #print("True Label: " + str(label))
  #print("Predicted label: " + str(np.argmax(results)))
  #Return True if the prediction was correct, and if the prediction was taken at the unanimity

  return isFinalDecisionCorrect, unanimity
  

In [None]:
prediction(model_,X_test[0],y_test[0])


In [None]:
#Use our function to evaluate boosting :
positive_unanimity = 0
positive = 0
negative_unanimity = 0
negative = 0
for i in range(len(X_test)):
  isCorrect, unanimity = prediction(model_,X_test[i],y_test[i])
  if isCorrect:
    if unanimity:
      positive_unanimity = positive_unanimity + 1
    else:
      positive = positive + 1
  else:
    if unanimity:
      negative_unanimity = negative_unanimity + 1
    else:
      negative = negative + 1

print("Length of test set: " + str(len(X_test)))
print("Positive at unanimity: " + str(positive_unanimity))
print("Positive NOT at unanimity: " + str(positive))
print("Negative at unanimity: " + str(negative_unanimity))
print("Negative NOT at unanimity: " + str(negative))


In [None]:
total_positive = positive_unanimity + positive
print(total_positive)
positive_percentual = total_positive/len(X_test)
print(positive_percentual)

In [None]:
from google.colab import files
files.download("Model0.h5")
files.download("Model1.h5")
files.download("Model2.h5")
files.download("Model3.h5")
files.download("Model4.h5")
