# Curriculum Learning for instances

Before starting the code execution, make the following change: **Ambiente de execução -> Alterar o tipo de ambiente de execução -> GPU**

In [1]:
import numpy as np
from PIL import Image

from keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Reshape, UpSampling2D, Conv2DTranspose, Dense, AveragePooling2D
from tensorflow.keras.models import load_model
from keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD

import matplotlib.pyplot as plt

**Parameters definition:**

In [2]:
batchSize = 32
numberEpochs = 100
numberClasses = 10

**lowSampleDataset(X,Y)** function just reduces the number of examples so that the execution is faster in our example!

In [3]:
def lowSampleDataset(X, Y):
    perm = np.random.permutation(X.shape[0])
    X = X[perm[0 : (int)(X.shape[0] * (5/100))]]
    Y = Y[perm[0 : (int)(Y.shape[0] * (5/100))]]
    return X, Y

**Pre-processing:**

1.   Load CIFAR10 dataset
2.   Reduce the number of examples




In [4]:
print("Loading CIFAR10 images ...")
(Xtrain, Ytrain), (Xtest, Ytest) = cifar10.load_data()

#Xtrain, Ytrain = lowSampleDataset(Xtrain, Ytrain)
#Xtest, Ytest = lowSampleDataset(Xtest, Ytest)

Ytrain = to_categorical(Ytrain)
Ytest = to_categorical(Ytest)

print('\tTraining set shape: ', Xtrain.shape)
print('\tTesting set shape: ', Xtest.shape)

Loading CIFAR10 images ...
	Training set shape:  (50000, 32, 32, 3)
	Testing set shape:  (10000, 32, 32, 3)


**Preparing the CNN:**

1.   Define the architecture structure
2.   Define the loss function and optimizer


In [5]:
def setModel():
  input_img = Input(shape=(32, 32, 3,))
  model = Conv2D(64, kernel_size=(3,3), padding='same', activation='relu')(input_img)
  model = Conv2D(64, kernel_size=(3,3), activation='relu')(model)
  model = AveragePooling2D(pool_size=(2, 2))(model)
  
  model = Conv2D(64, kernel_size=(3,3), padding='same', activation='relu')(model)
  model = Conv2D(64, kernel_size=(3,3), activation='relu')(model)
  model = AveragePooling2D(pool_size=(2, 2))(model)
  
  model = Conv2D(64, kernel_size=(3,3), padding='same', activation='relu')(model)
  model = Conv2D(64, kernel_size=(3,3), activation='relu')(model)
  model = AveragePooling2D(pool_size=(2, 2))(model)
  
  model = Flatten()(model)
  model = Dense(128, activation='relu')(model)
  model = Dense(numberClasses, activation='softmax')(model)
  
  model = Model(input_img, model)
  model.compile(loss='categorical_crossentropy', optimizer=SGD(learning_rate=0.01, momentum=0.0001), metrics=['accuracy'])
  #model.summary()

  return model

**Scoring function:**

In [6]:
def scoringFunction(X, Y):
  print("\tScoring function...")
  bootstrapModel = setModel()
  history = bootstrapModel.fit(x=X, y=Y, batch_size=batchSize, epochs=int(numberEpochs*0.1), shuffle=True)
  
  print("\t\tDefining difficulty level...")
  pred = bootstrapModel.predict(X)
  del bootstrapModel
  
  score = []
  for i in range(0,Y.shape[0]):
    score.append((1 - (pred[i][np.argmax(Y[i])])))
  
  print("\t\tSorting the instances...")
  X = X.tolist()
  Xsorted = [x for _,x in sorted(zip(score,X))]
  
  Y = Y.tolist()
  Ysorted = [y for _,y in sorted(zip(score,Y))]
  
  return np.array(Xsorted), np.array(Ysorted)

**Pacing function:**

In [7]:
def pacingFunction(X, Y, pacing, currentEpoch):
  if (pacing == "linear"):
    size = int((X.shape[0]/numberEpochs) * currentEpoch)
  elif (pacing == "log"):
    size = int((math.log((currentEpoch/numberEpochs)*(X.shape[0]),(X.shape[0])))*(X.shape[0]))
  elif (pacing == "ladder"):
    numberStepsLadder = 10
    sizeStep = int(numberEpochs/numberStepsLadder)
    currentStep = int(currentEpoch/sizeStep)+1
    size = int((X.shape[0]*(currentStep/numberStepsLadder)))
  
  imagePacing = []
  labelPacing = []
  
  size = int(size/numberClasses)
  Ysearch = np.argmax(Y, axis=1)
  
  for i in range(0,numberClasses):
    rowsImage = np.array([x for x,y in zip(X,Ysearch) if y == i])
    rowsLabel = np.array([y for x,y in zip(X,Ysearch) if y == i])
    
    if (size <= 0):
      images = rowsImage[:1]
      labels = rowsLabel[:1]
    else:
      images = rowsImage[:size]
      labels = rowsLabel[:size]
      
    if (i == 0):
      imagePacing = images
      labelPacing = labels
    else:
      imagePacing = np.concatenate((imagePacing, images), axis=0)
      labelPacing = np.concatenate((labelPacing, labels), axis=0)
  
  return imagePacing, to_categorical(labelPacing)

**Training the model:**

In [8]:
def trainingTestingModel(Xtrain, Ytrain, Xtest, Ytest, pacing):
  print("\tTraining curriculum model with " + pacing + "...")
  model = setModel()
  
  for e in range(0, numberEpochs):
    if not (pacing == "constant"):
      Xcurriculum, Ycurriculum = pacingFunction(Xtrain, Ytrain, pacing, e+1)
    else:
      Xcurriculum, Ycurriculum = Xtrain, Ytrain
    
    batches = list(range(0, Xcurriculum.shape[0], batchSize))
    perm = np.random.permutation(Xcurriculum.shape[0])
    for b in batches:
      if b + batchSize < Xcurriculum.shape[0]:
        x = Xcurriculum[perm[b : b + batchSize]]
        y = Ycurriculum[perm[b : b + batchSize]]
      else:
        x = Xcurriculum[perm[b : ]]
        y = Ycurriculum[perm[b : ]]
      loss = model.train_on_batch(x, y)
    
    acc = model.evaluate(Xtest, Ytest, batch_size=batchSize)
    print("\t\tEpoch %i/%i: Instances (%i), Loss(%.15f), Acc Train(%.15f), Acc Test(%.15f)." % (e+1, numberEpochs, Xcurriculum.shape[0], loss[0], loss[1], acc[1]))


In [10]:
Xtrain, Ytrain = scoringFunction(Xtrain, Ytrain)
trainingTestingModel(Xtrain, Ytrain, Xtest, Ytest, "ladder")

	Scoring function...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
		Defining difficulty level...
		Sorting the instances...
	Training curriculum model with ladder...
		Epoch 1/100: Instances (5000), Loss(2.318812131881714), Acc Train(0.125000000000000), Acc Test(0.108900003135204).
		Epoch 2/100: Instances (5000), Loss(2.353803157806396), Acc Train(0.125000000000000), Acc Test(0.238600000739098).
		Epoch 3/100: Instances (5000), Loss(1.854172825813293), Acc Train(0.375000000000000), Acc Test(0.183899998664856).
		Epoch 4/100: Instances (5000), Loss(1.961445927619934), Acc Train(0.250000000000000), Acc Test(0.229100003838539).
		Epoch 5/100: Instances (5000), Loss(1.992015838623047), Acc Train(0.375000000000000), Acc Test(0.278800010681152).
		Epoch 6/100: Instances (5000), Loss(2.063083648681641), Acc Train(0.250000000000000), Acc Test(0.296499997377396).
		Epoch 7/100: Instances (5000), Loss(1.465039491653442), Acc Trai