Import libraries

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, save_model, load_model
from tensorflow.keras.layers import Dropout, Input, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img
from superintendent.class_labeller import ClassLabeller
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline

Define directiories needed

In [2]:
main_folder = 'D:/IST/5 ano/1 Semestre/Machine Learning/Project/Part 2/Proj'
valid_path = main_folder+'/train_data/valid'
test_path = main_folder+'/train_data/test'
unlabelled_data_path = main_folder+'/unlabelled_frames'
unlabelled_batch_path = main_folder+'/unlabelled'

Downloading the model

In [4]:
# Downloading the model
efficientnet_model = tf.keras.applications.efficientnet.EfficientNetB1(include_top=False,input_tensor=Input(shape=(224, 224, 3)))

Setting up the model with pretrained weights

In [4]:
# Transfer learning -> adding the layers including dropout for classification
top_layers = efficientnet_model.output
top_layers = Flatten(name="flatten_top")(top_layers)
top_layers = Dense(1024, activation="ReLU",name="first_dense_top")(top_layers)
top_layers = Dropout(0.5, name="dropout_top")(top_layers)
top_layers = Dense(units=3, activation="softmax",name="linear_output")(top_layers)

# Put it together
model = Model(inputs=efficientnet_model.input, outputs=top_layers)

#loading the weights
cp_dir = main_folder+'/B1_weights'
weights = tf.train.latest_checkpoint(cp_dir)
model.load_weights(weights)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1384a549088>

In [3]:
# Generate validation and test batches
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input) \
    .flow_from_directory(directory=valid_path, target_size=(240,240), classes=['Fissure', 'Racines_Extrusion', 'Normal'], batch_size=64)

test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input) \
    .flow_from_directory(directory=test_path, target_size=(240,240), classes=['Fissure', 'Racines_Extrusion', 'Normal'], batch_size=64, shuffle=False)


Found 700 images belonging to 3 classes.
Found 600 images belonging to 3 classes.


Prioritization score for Active Learning: Entropy

In [4]:
# Define function that organizes predictions according to the prioritization score
# Prioritization score: Entropy

def entropy_score(predictions):
    # Substitute any 0 for a small epsilon for numerical stability
    new_predict = np.where(predictions!=0, predictions, 10**(-10))
    scores = np.zeros(predictions.shape[0])
    for i in range(predictions.shape[1]):
        scores += -new_predict[:,i]*np.log(new_predict[:,i])
    return scores



# Active Learning

Active Learning: cell 0

In [5]:
#version = 0
learning_rate = np.logspace(-4,-2,4)
lr = learning_rate[1]
num_batch2label = 100
labels_list = ['Fissure', 'Racines_Extrusion', 'Normal', 'Delete from dataset']

In [10]:
version = 7

Active Learning: cell 1

In [14]:
# Create list of all the unlabelled frames
frames = os.listdir(unlabelled_data_path)
if frames != None:
    unlabelled_frames = []
    os.chdir(unlabelled_data_path)
    for frame in frames:
        img1 = load_img(frame)
        unlabelled_frames.append(img1)
    os.chdir('../')

    # Unlabelled batch for predictions
    unlabelled_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input) \
        .flow_from_directory(directory=unlabelled_batch_path, target_size=(240,240), classes=None, batch_size=64, shuffle=False)

    #generating the predictions
    predictions = model.predict(x=unlabelled_batches, steps=len(unlabelled_batches), verbose=0)

    # compile the model to be saved
    model.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])

    # Save model to directory
    if os.path.isdir('Models') is False:
        os.makedirs('Models')
    if os.path.isdir('Models/Version'+str(version)) is False:
        os.makedirs('Models/Version'+str(version))    
    model_path = main_folder+'/Models/Version'+str(version)
    save_model(model, model_path)
    # Delete previous models
    # if version > 0:
    #     os.rmdir(main_folder+'/Models/Version'+str(version-1))
    version += 1

    # Select images to be labelled according to the prioritizarion score
    scores = entropy_score(predictions)
    ind = np.argpartition(scores, -num_batch2label)[-num_batch2label:]
    batch2label = []
    img_in_dir = os.listdir(unlabelled_batch_path+'/unlabelled_frames')
    os.chdir(unlabelled_batch_path+'/unlabelled_frames')
    for ii in ind:
        batch2label.append(unlabelled_frames[ii])
        # Removing those images from the unlabelled directory
        os.remove(img_in_dir[ii])
        
    os.chdir('../../')

    # Creating the widgets to easily label the data
    widgets = ClassLabeller(features = batch2label, options = labels_list)




Found 7606 images belonging to 1 classes.




INFO:tensorflow:Assets written to: D:/IST/5 ano/1 Semestre/Machine Learning/Project/Part 2/Proj/Models/Version8\assets


INFO:tensorflow:Assets written to: D:/IST/5 ano/1 Semestre/Machine Learning/Project/Part 2/Proj/Models/Version8\assets
  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


Active Learning: cell 2 

Label the selected images

In [15]:
widgets

ClassLabeller(children=(HBox(children=(FloatProgress(value=0.0, description='Progress:', max=1.0),)), Box(chil…

Active Learning: cell 3

Further train the model with the newly labelled data

In [16]:
# # Create new training data from newly labelled data
if os.path.isdir('new_train_data') is False:
    os.makedirs('new_train_data')
    os.makedirs('new_train_data/Fissure')
    os.makedirs('new_train_data/Racines_Extrusion')
    os.makedirs('new_train_data/Normal')

i = 0
for label in widgets.new_labels:
    if label == 'Delete from dataset':
       batch2label.remove(batch2label[i])
    else:
        name = main_folder+'/new_train_data/'+label+'/'+label+str(ind[i])+'.jpg'
        save_img(name, batch2label[i])
        i += 1

new_train_path = main_folder+'/new_train_data'
new_train_batch = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input) \
    .flow_from_directory(directory=new_train_path, target_size=(240,240), classes=['Fissure', 'Racines_Extrusion', 'Normal'], batch_size=64)

# Load previously trained model
model = load_model(model_path)

# Further train the model with the newly labelled data
model.fit(new_train_batch,
          steps_per_epoch=len(new_train_batch),
          validation_data=valid_batches,
          validation_steps=len(valid_batches),
          epochs=5,
)

Found 1027 images belonging to 3 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x22fe841e1c8>

You may now rerun cells 1, 2 and 3 (in order) to continue labelling and training the model