In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

print(tf.__version__)
print(len(tf.config.experimental.list_physical_devices('GPU')))
device_name = tf.test.gpu_device_name()
print(device_name)

2.7.0
0



In [2]:
#IMPORTAÇÃO E NORRMALIZAÇÃO
(x_train, y_train), (x_test,y_test) = keras.datasets.cifar10.load_data()
#x_train = x_train.reshape(-1, 28, 28, 1)
#x_test = x_test.reshape(-1, 28, 28, 1)

x_train = x_train.astype(float)/255
x_test = x_test.astype(float)/255

#CRIAR DATASET
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(50000).batch(64)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(64)

In [6]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
acc_metric = keras.metrics.SparseCategoricalAccuracy()
test_accuracy = tf.keras.metrics.Accuracy()
test_loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_accuracy = tf.keras.metrics.Accuracy()

In [7]:
model = keras.Sequential(
    [
        keras.layers.InputLayer(input_shape=(32, 32,3),batch_size=64),
     
        keras.layers.Conv2D(32, (3, 3), name="conv1", activation='relu',padding='same'),
        keras.layers.BatchNormalization(name="bn1"),  
        keras.layers.Conv2D(32, (3, 3),name="conv2", activation='relu', padding='same'),
        keras.layers.BatchNormalization(name="bn2"),
        keras.layers.MaxPooling2D(pool_size=(2, 2),name="mp1"),
        #keras.layers.Dropout(0.2),
        keras.layers.Conv2D(64, (3, 3),name="conv3", activation='relu', padding='same'),
        keras.layers.BatchNormalization(name="bn3"),
        keras.layers.Conv2D(64, (3, 3),name="conv4", activation='relu', padding='same'),
        keras.layers.BatchNormalization(name="bn4"),
        keras.layers.MaxPooling2D(pool_size=(2, 2),name="mp2"),
        #keras.layers.Dropout(0.2),
        keras.layers.Conv2D(128, (3, 3),name="conv5", activation='relu', padding='same'),
        keras.layers.BatchNormalization(name="bn5"),
        keras.layers.Conv2D(128, (3, 3),name="conv6", activation='relu', padding='same'),
        keras.layers.BatchNormalization(name="bn6"),
        keras.layers.MaxPooling2D(pool_size=(2, 2),name="mp3"),
        #keras.layers.Dropout(0.2),
     
        keras.layers.Flatten(name = "flatten"),
        keras.layers.Dropout(0.2, name="dp1"),
        keras.layers.Dense(1024, name="dense1", activation='relu'),
        keras.layers.Dropout(0.2),
        #keras.layers.Dense(256, name="dense2", activation='relu'),
        #keras.layers.Dropout(0.4),
        #keras.layers.Dense(128, name="dense3", activation='relu'),
        #keras.layers.BatchNormalization(name="bn7"),
        #keras.layers.Dropout(0.4),
        keras.layers.Dense(10, name="dense4", activation='softmax')
    ],
    name="modelo-0.0",
)

for layer in model.trainable_variables:
    #print(layer.name)
    if 'bias' in layer.name:
        new_bias = tf.cast(tf.where(tf.abs(layer) >= 0, 0.1, 0.1), tf.float32)
        layer.assign(new_bias)

model.summary()

Model: "modelo-0.0"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (64, 32, 32, 32)          896       
                                                                 
 bn1 (BatchNormalization)    (64, 32, 32, 32)          128       
                                                                 
 conv2 (Conv2D)              (64, 32, 32, 32)          9248      
                                                                 
 bn2 (BatchNormalization)    (64, 32, 32, 32)          128       
                                                                 
 mp1 (MaxPooling2D)          (64, 16, 16, 32)          0         
                                                                 
 conv3 (Conv2D)              (64, 16, 16, 64)          18496     
                                                                 
 bn3 (BatchNormalization)    (64, 16, 16, 64)          2

In [8]:
epochs = 300
alpha = 0.5
n_bits = 32

learning_rate = 0.03
momentum = 0.9

np.set_printoptions(threshold=np.inf)

model_sparsity = np.array([])
model_train_loss = np.array([])
model_train_acc = np.array([])
model_test_loss = np.array([])
model_test_acc = np.array([])
sparsity = 0

#metrics to save model
last_test_acc = 0
model_name = "alpha_05_bits_32_lr_003.h5"

#TODO: Increase batch_size and just quantize batch_norm layer
with tf.device('/device:GPU:0'):
    for epoch in range(epochs):
        # Iterate over the batches of the dataset.
        loss_batch = np.array([])
        loss_test_batch = np.array([])
        for step, (x_batch_train, y_batch_train) in enumerate(train_ds):
          n_zeros = 0
          size = 0
          #pruning
          if alpha > 0:
            bk = []
            for layer_weights in model.trainable_variables:
              if 'bn' in layer_weights.name:
                  bk.append(-1)
              else:
                  #flatten weights
                  f_weights = tf.reshape(layer_weights,[-1])
                  #get standard deviation of each layer
                  lim = alpha*tf.math.reduce_std(f_weights)
                  bk.append(lim)
                  #create a mask
                  mask = tf.cast(tf.where(tf.abs(layer_weights)>lim,1,0), tf.float32)
                  #assign pruned weights to the layer
                  layer_weights.assign(tf.math.multiply(layer_weights,mask))
                  #check sparsity
                  flat_array = np.array((tf.reshape(mask,[-1])))
                  n_zeros += np.count_nonzero(np.array(flat_array) == 0)
                  size += flat_array.shape[0]
                  sparsity = n_zeros*100/size
          else:
            bk = [0] * len(model.trainable_weights)

          #Cópia do modelo
          if n_bits > 0 and alpha > 0:
              model_copy = keras.models.clone_model(model)
              model_copy.set_weights(model.get_weights())
          
          #Quantização
          if n_bits > 0 and alpha > 0:
              for i, layer_weights in enumerate(model.trainable_variables):
                  if 'bn' in layer_weights.name:
                      pass
                  else:
                      qk_line = (tf.reduce_max(tf.math.abs(layer_weights)) - bk[i]) / (2 ** (n_bits - 1) - 1)
                      ck = tf.math.round(layer_weights / qk_line) * qk_line
                      layer_weights.assign(ck)

          with tf.GradientTape() as tape:
            pred = model(x_batch_train, training=True)
            loss = loss_fn(y_batch_train, pred)
            
          grads = tape.gradient(loss, model.trainable_weights)
          if step==0 and epoch==0:
              v = np.zeros_like(grads)
          if n_bits > 0 and alpha > 0:
              for i, (layer_weights, copied_weights) in enumerate(zip(model.trainable_variables, model_copy.trainable_variables)):
                  #TODO: Add momentum and velocity
                  grads[i] = grads[i] * learning_rate
                  #Get value of velocity
                  v[i] = tf.math.subtract(momentum*v[i], grads[i])
                  # WEIGHT UPDATE
                  layer_weights.assign(tf.math.add(copied_weights, v[i]))
          else:
              for i, layer_weights in enumerate(model.trainable_variables):
                  #TODO: Include momentum here
                  grads[i] = grads[i] * learning_rate
                  #WEIGHT UPDATE
                  layer_weights.assign(tf.math.subtract(layer_weights, grads[i]))
              #optimizer.apply_gradients(zip(grads, model.trainable_weights))
          
          #optimizer.apply_gradients(zip(grads, model.trainable_weights))

          predictions = tf.argmax(pred, axis=1, output_type=tf.int32)
          acc = train_accuracy(y_batch_train, predictions)

          #keep track of loss by batch
          loss_batch = np.append(loss_batch, loss)

        #mean of loss by epoch
        model_train_loss = np.append(model_train_loss, np.mean(loss_batch))
        #train accuracy by epoch
        model_train_acc = np.append(model_train_acc, acc*100)
        #sparsity by epoch
        model_sparsity = np.append(model_sparsity, sparsity)

        if alpha > 0:
            bk = []
            for layer_weights in model.trainable_variables:
                if 'bn' in layer_weights.name:
                    bk.append(-1)
                else:
                    #flatten weights
                    f_weights = tf.reshape(layer_weights,[-1])
                    #get standard deviation of each layer
                    lim = alpha*tf.math.reduce_std(f_weights)
                    bk.append(lim)
                    #create a mask
                    mask = tf.cast(tf.where(tf.abs(layer_weights)>lim,1,0), tf.float32)
                    #assign pruned weights to the layer
                    layer_weights.assign(tf.math.multiply(layer_weights,mask))
                    #check sparsity
                    flat_array = np.array((tf.reshape(mask,[-1])))
                    n_zeros += np.count_nonzero(np.array(flat_array) == 0)
                    size += flat_array.shape[0]
                    sparsity = n_zeros*100/size
        else:
            bk = [0] * len(model.trainable_weights)

        #Quantização
        if n_bits > 0 and alpha > 0:
            for i, layer_weights in enumerate(model.trainable_variables):
                if 'bn' in layer_weights.name:
                    pass
                else:
                    qk_line = (tf.reduce_max(tf.math.abs(layer_weights)) - bk[i]) / (2 ** (n_bits - 1) - 1)
                    ck = tf.math.round(layer_weights / qk_line) * qk_line
                    layer_weights.assign(ck)
        
        bk.clear()
        
        #Test
        for step, (x_batch_test, y_batch_test) in enumerate(test_ds):
          test_pred = model(x_batch_test, training=False)
          test_loss = loss_fn(y_batch_test,test_pred)
          test_prediction = tf.argmax(test_pred, axis=1, output_type=tf.int32)
          test_acc = test_accuracy(y_batch_test, test_prediction)
          #TODO: test accuracy by mean of the batch_valudation
          loss_test_batch = np.append(loss_batch,test_loss)

        model_test_acc = np.append(model_test_acc, test_acc*100)
        model_test_loss = np.append(model_test_loss,np.mean(loss_test_batch))
        
        print("Epoch {}/{} \t Loss = {:.3f} \t Train Acc = {:.3f}% \t Sparsity = {:.3f}% \t Test Acc = {:.3f}%".format(epoch+1,epochs,float(loss),float(acc*100),sparsity,float(test_acc*100)))

        #Save model with best test accuracy
        if test_acc > last_test_acc:
            print('New test is {:.3f}%, Model saved'.format(test_acc*100))
            last_test_acc = test_acc
            model.save(model_name)





Epoch 1/300 	 Loss = 1.147 	 Train Acc = 46.220% 	 Sparsity = 47.545% 	 Test Acc = 46.380%
New test is 46.380%, Model saved
Epoch 2/300 	 Loss = 1.128 	 Train Acc = 54.883% 	 Sparsity = 51.866% 	 Test Acc = 55.865%
New test is 55.865%, Model saved
Epoch 3/300 	 Loss = 0.918 	 Train Acc = 60.570% 	 Sparsity = 54.937% 	 Test Acc = 61.113%
New test is 61.113%, Model saved
Epoch 4/300 	 Loss = 1.036 	 Train Acc = 64.536% 	 Sparsity = 57.381% 	 Test Acc = 64.268%
New test is 64.268%, Model saved
Epoch 5/300 	 Loss = 0.408 	 Train Acc = 67.498% 	 Sparsity = 59.724% 	 Test Acc = 66.816%
New test is 66.816%, Model saved
Epoch 6/300 	 Loss = 0.476 	 Train Acc = 69.936% 	 Sparsity = 61.884% 	 Test Acc = 68.505%
New test is 68.505%, Model saved
Epoch 7/300 	 Loss = 0.716 	 Train Acc = 71.971% 	 Sparsity = 63.876% 	 Test Acc = 69.874%
New test is 69.874%, Model saved
Epoch 8/300 	 Loss = 0.192 	 Train Acc = 73.737% 	 Sparsity = 65.704% 	 Test Acc = 70.957%
New test is 70.957%, Model saved
Epoch 9/

In [None]:
l = len(model.trainable_weights)
for i in range(l):
  a = tf.reshape(model.trainable_weights[i],[-1])
  b = a.numpy()
  #print(a)
  #plt.ylim(0,300)
  plt.title(str(i))
  plt.hist(b,200)
  plt.show()

In [None]:
plt.title("test acc x train acc")
plt.plot(model_train_acc)
plt.plot(model_test_acc)
plt.show()

In [None]:
plt.title("train loss x test loss")
plt.plot(model_train_loss)
plt.plot(model_test_loss)
plt.show()

In [None]:
plt.title("Sparsity")
plt.plot(model_sparsity)
plt.show()

In [None]:
%matplotlib inline
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    figure(figsize=(10, 7), dpi=80)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(10)
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm[:,0])

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, str(cm[i, j]*100/1000) + "%",
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Classe real')
    plt.xlabel('Classe predita')

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()
logits = model(x_test, training=False)
prediction = tf.argmax(logits, axis=1, output_type=tf.int32)
test_accuracy(prediction, y_test)
print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

In [None]:
classes = ["avião","carro","pássaro","gato","veado","cachorro","sapo","cavalo","navio","caminhão"]
cm = confusion_matrix(y_true=y_test, y_pred=prediction)
plot_confusion_matrix(cm=cm, classes=classes, title='Matriz de confusão')