In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from tensorflow.keras.applications import ResNet50

In [4]:
from tensorflow.keras import Model, Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import load_model

In [5]:
import numpy as np
from sklearn import metrics

In [6]:
from experiments.data_gen import PascalVOCDataGenerator

Using TensorFlow backend.


In [7]:
from model.callbacks.metric_callbacks import MAPCallback
from model.callbacks.save_callback import SaveModel

In [8]:
from model.losses import BCE

In [9]:
from sklearn.metrics import average_precision_score

In [10]:
prop=100

## model

In [None]:
# From http://cedric.cnam.fr/vertigo/Cours/ml2/tpDeepLearning5.html

In [23]:
# Load ResNet50 architecture & its weights
# imagenet_model = ResNet50(include_top=True, weights='imagenet')
# model.layers.pop()
# Modify top layers
# x = model.layers[-1].output
# x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
# model = Model(inputs=model.input, outputs=x)

In [38]:
# imagenet only model (no finetuning)
model = ResNet50(include_top=True, weights='imagenet')

In [40]:
# model.layers.pop().pop()
model = Model(inputs=model.input,outputs=model.layers[-2].output)

In [41]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_6[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [42]:
lr = 0.1
model.compile(loss='binary_crossentropy', optimizer=SGD(lr, momentum=0.9), metrics=['binary_accuracy'])

## feature extraction

In [43]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)

loading dataset from /share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/Annotations/annotations_multilabel_trainval_partial_70_1.csv


In [44]:
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, force_old=True)

In [45]:
batch_size = 32
generator = data_generator_train.flow(batch_size=batch_size)

In [46]:
# Initilisation des matrices contenant les Deep Features et les labels
X_train = np.zeros((len(data_generator_train.images_ids_in_subset),2048))
Y_train = np.zeros((len(data_generator_train.images_ids_in_subset),20))

# Calcul du nombre e batchs
nb_batches = int(len(data_generator_train.images_ids_in_subset) / batch_size) + 1

for i in range(nb_batches):
    # Pour chaque batch, on extrait les images d'entrée X et les labels y
    X, y = next(generator)
    # On récupère les Deep Feature par appel à predict
    y_pred = model.predict(X)
    X_train[i*batch_size:(i+1)*batch_size,:] = y_pred
    Y_train[i*batch_size:(i+1)*batch_size,:] = y

In [49]:
# same for test
data_generator_test = PascalVOCDataGenerator('test', data_dir)
generator_test = data_generator_test.flow(batch_size=batch_size)

X_test = np.zeros((len(data_generator_test.images_ids_in_subset),2048))
Y_test = np.zeros((len(data_generator_test.images_ids_in_subset),20))

nb_batches = int(len(data_generator_test.images_ids_in_subset) / batch_size) + 1

for i in range(nb_batches):
    X, y = next(generator_test)
    y_pred = model.predict(X)
    X_test[i*batch_size:(i+1)*batch_size,:] = y_pred
    Y_test[i*batch_size:(i+1)*batch_size,:] = y

In [50]:
outfile = 'DF_ResNet50_VOC2007_test28'
np.savez(outfile, X_train=X_train, Y_train=Y_train,X_test=X_test, Y_test=Y_test)

## Train classifier only

In [51]:
outfile = 'DF_ResNet50_VOC2007_test28.npz'
learning_rate = 0.1
nb_epoch = 20
batch_size = 32

npzfile = np.load(outfile)

X_train = npzfile['X_train']
Y_train = npzfile['Y_train']

X_test = npzfile['X_test']
Y_test = npzfile['Y_test']

print("data \n X_train=", X_train.shape, "Y_train=", Y_train.shape, " X_test=", X_test.shape, "Y_train=", Y_test.shape)

data 
 X_train= (5011, 2048) Y_train= (5011, 20)  X_test= (4952, 2048) Y_train= (4952, 20)


In [54]:
model = Sequential()
model.add(Dense(20, input_dim=2048, name='fc1', activation='sigmoid'))
model.summary()


sgd = SGD(learning_rate)
model.compile(loss='binary_crossentropy',optimizer=sgd,metrics=['binary_accuracy'])

model.fit(X_train, Y_train,batch_size=batch_size, epochs=nb_epoch,verbose=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
fc1 (Dense)                  (None, 20)                40980     
Total params: 40,980
Trainable params: 40,980
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fa128fdff60>

In [55]:
scores = model.evaluate(X_test, Y_test, verbose=0)
print("%s TEST: %.2f%%" % (model.metrics_names[0], scores[0]*100))
print("%s TEST: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss TEST: 7.51%
binary_accuracy TEST: 97.39%


In [58]:
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
AP_train = np.zeros(20)
AP_test = np.zeros(20)

for c in range(20):
    AP_train[c] = average_precision_score(Y_train[:, c], y_pred_train[:, c])
    AP_test[c] = average_precision_score(Y_test[:, c], y_pred_test[:, c])

print("MAP TRAIN =%.2f", AP_train.mean()*100)
print("MAP TEST =%.2f", AP_test.mean()*100)

MAP TRAIN =%.2f 91.8934972747674
MAP TEST =%.2f 82.51005642434744


## Eval

In [17]:
data_generator_test = PascalVOCDataGenerator('test', data_dir)
len(data_generator_test.images_ids_in_subset)

loaded ids


4952

In [18]:
batch_size = len(data_generator_test.images_ids_in_subset)
generator_test = data_generator_test.flow(batch_size=batch_size)

In [19]:
X_test, Y_test = next(generator_test)

In [20]:
print(X_test.shape, Y_test.shape)

(4952, 224, 224, 3) (4952, 20)


In [None]:
def eval_fn(model, X_test, Y_test):
    y_pred_test = model.predict(X_test)
    #y_pred_train = model.predict(X_train)

    #AP_train = np.zeros(20)
    AP_test = np.zeros(20)
    for c in range(20):
        #AP_train[c] = average_precision_score(Y_train[:, c], y_pred_train[:, c])
        AP_test[c] = metrics.average_precision_score(Y_test[:, c], y_pred_test[:, c])

    #print "MAP TRAIN =", AP_train.mean()*100
    print("MAP TEST =", AP_test.mean()*100)
    print(AP_test)

In [None]:
eval_fn(model, X_test, Y_test)

In [None]:
print(AP_test)

## Finetune

In [16]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)

loaded ids
loading dataset from /share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/Annotations/annotations_multilabel_trainval_partial_100_1.csv


In [12]:
# vanilla from RCP209
model = ResNet50(include_top=True, weights='imagenet')
model.layers.pop()
# Modify top layers
x = model.layers[-1].output
x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
model = Model(inputs=model.input, outputs=x)

In [12]:
# dropping the Dense(1000)
model = ResNet50(include_top=True, weights='imagenet')
x = model.layers[-2].output
x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
model = Model(inputs=model.input, outputs=x)

In [13]:
# Laura way
# Load ResNet50 architecture & its weights
input_shape = (224, 224, 3)
resnet = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)

inp = Input(shape=input_shape, name='image_input')
x = resnet(inp)
x = Flatten()(x)
output = Dense(data_generator_train.nb_classes, activation='sigmoid')(x)
model = Model(inputs=inp, outputs=output)



In [13]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [14]:
lr = 0.1
# model.compile(loss='binary_crossentropy', optimizer=SGD(lr=lr), metrics=['binary_accuracy'])
loss = BCE()
model.compile(loss=loss, optimizer=SGD(lr=lr), metrics=['binary_accuracy'])

In [66]:
# test data
data_generator_test = PascalVOCDataGenerator('test', data_dir)
len(data_generator_test.images_ids_in_subset)
batch_size = len(data_generator_test.images_ids_in_subset)
generator_test = data_generator_test.flow(batch_size=batch_size)
X_test, Y_test = next(generator_test)
print(X_test.shape, Y_test.shape)

(4952, 224, 224, 3) (4952, 20)


In [21]:
# callbacks
exp_folder = '/home/caleml/partial_experiments/exp_20190722_1108_TESTNB'

cb_list = list()
cb_list.append(SaveModel(exp_folder, prop))

map_cb = MAPCallback(X_test, Y_test, exp_folder)
cb_list.append(map_cb)

In [None]:
batch_size=32
nb_epochs=20
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)
steps_per_epoch_train = int(len(data_generator_train.id_to_label) / batch_size) + 1
model.fit_generator(data_generator_train.flow(batch_size=batch_size),
                    steps_per_epoch=steps_per_epoch_train,
                    epochs=nb_epochs,
                    callbacks=cb_list,
                    verbose=1)

loaded ids
loading dataset from /share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/Annotations/annotations_multilabel_trainval_partial_100_1.csv
Epoch 1/20


## Dataset comparison

In [None]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir)

In [None]:
nico_data = data_generator_train.id_to_label
print(len(nico_data))
print(nico_data['000131'])

In [None]:
trainval_path = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/Annotations/annotations_multilabel_trainval.csv'

In [None]:
laura_data = dict()
with open(trainval_path, 'r') as f_in:
    for line in f_in:
        parts = line.strip().split(',')
        laura_data[parts[0]] = [int(elt) for elt in parts[1:]]

In [None]:
print(len(laura_data))
print(laura_data['000131'])

In [None]:
for id_img, labels in nico_data.items():
    laura_labels = laura_data[id_img]
    converted = [l if l in [0, 1] else 0 for l in laura_labels]
    assert all([converted[i] == labels[i] for i in range(len(labels))])

## Launch.py surrogate