In [126]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import os

In [91]:
import tensorflow as tf

In [3]:
from tensorflow.keras.applications import ResNet50

In [81]:
from tensorflow.keras import Model, Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dense, Flatten, Input, Concatenate
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import LearningRateScheduler

In [5]:
import numpy as np
from sklearn import metrics

In [8]:
from data.pascalvoc.pascalvoc import PascalVOC

Using TensorFlow backend.


In [127]:
from model.networks.baseline import Baseline
from model.networks.prior import PriorModel

In [30]:
from model.callbacks.metric_callbacks import MAPCallback
from model.callbacks.save_callback import SaveModel
from model.callbacks.scheduler import lr_scheduler

In [None]:
from model.losses import BCE, PartialBCE

In [None]:
from sklearn.metrics import average_precision_score

In [7]:
from config import config_utils

In [None]:
from config.config import cfg

In [10]:
prop=100

In [None]:
parse_options_file('/home/caleml/partial-labels/config/pv_baseline50_sgd_448lrs.yaml')

## model

In [None]:
# From http://cedric.cnam.fr/vertigo/Cours/ml2/tpDeepLearning5.html

In [None]:
# Load ResNet50 architecture & its weights
# imagenet_model = ResNet50(include_top=True, weights='imagenet')
# model.layers.pop()
# Modify top layers
# x = model.layers[-1].output
# x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
# model = Model(inputs=model.input, outputs=x)

In [None]:
# imagenet only model (no finetuning)
model = ResNet50(include_top=True, weights='imagenet')

In [None]:
# model.layers.pop().pop()
model = Model(inputs=model.input,outputs=model.layers[-2].output)

In [None]:
model.summary()

In [None]:
lr = 0.1
model.compile(loss='binary_crossentropy', optimizer=SGD(lr, momentum=0.9), metrics=['binary_accuracy'])

## feature extraction

In [None]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)

In [None]:
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, force_old=True)

In [None]:
batch_size = 32
generator = data_generator_train.flow(batch_size=batch_size)

In [None]:
# Initilisation des matrices contenant les Deep Features et les labels
X_train = np.zeros((len(data_generator_train.images_ids_in_subset),2048))
Y_train = np.zeros((len(data_generator_train.images_ids_in_subset),20))

# Calcul du nombre e batchs
nb_batches = int(len(data_generator_train.images_ids_in_subset) / batch_size) + 1

for i in range(nb_batches):
    # Pour chaque batch, on extrait les images d'entrée X et les labels y
    X, y = next(generator)
    # On récupère les Deep Feature par appel à predict
    y_pred = model.predict(X)
    X_train[i*batch_size:(i+1)*batch_size,:] = y_pred
    Y_train[i*batch_size:(i+1)*batch_size,:] = y

In [None]:
# same for test
data_generator_test = PascalVOCDataGenerator('test', data_dir)
generator_test = data_generator_test.flow(batch_size=batch_size)

X_test = np.zeros((len(data_generator_test.images_ids_in_subset),2048))
Y_test = np.zeros((len(data_generator_test.images_ids_in_subset),20))

nb_batches = int(len(data_generator_test.images_ids_in_subset) / batch_size) + 1

for i in range(nb_batches):
    X, y = next(generator_test)
    y_pred = model.predict(X)
    X_test[i*batch_size:(i+1)*batch_size,:] = y_pred
    Y_test[i*batch_size:(i+1)*batch_size,:] = y

In [None]:
outfile = 'DF_ResNet50_VOC2007_test28'
np.savez(outfile, X_train=X_train, Y_train=Y_train,X_test=X_test, Y_test=Y_test)

## Train classifier only

In [None]:
outfile = 'DF_ResNet50_VOC2007_test28.npz'
learning_rate = 0.1
nb_epoch = 20
batch_size = 32

npzfile = np.load(outfile)

X_train = npzfile['X_train']
Y_train = npzfile['Y_train']

X_test = npzfile['X_test']
Y_test = npzfile['Y_test']

print("data \n X_train=", X_train.shape, "Y_train=", Y_train.shape, " X_test=", X_test.shape, "Y_train=", Y_test.shape)

In [None]:
model = Sequential()
model.add(Dense(20, input_dim=2048, name='fc1', activation='sigmoid'))
model.summary()


sgd = SGD(learning_rate)
model.compile(loss='binary_crossentropy',optimizer=sgd,metrics=['binary_accuracy'])

model.fit(X_train, Y_train,batch_size=batch_size, epochs=nb_epoch,verbose=1)

In [None]:
scores = model.evaluate(X_test, Y_test, verbose=0)
print("%s TEST: %.2f%%" % (model.metrics_names[0], scores[0]*100))
print("%s TEST: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)
AP_train = np.zeros(20)
AP_test = np.zeros(20)

for c in range(20):
    AP_train[c] = average_precision_score(Y_train[:, c], y_pred_train[:, c])
    AP_test[c] = average_precision_score(Y_test[:, c], y_pred_test[:, c])

print("MAP TRAIN =%.2f", AP_train.mean()*100)
print("MAP TEST =%.2f", AP_test.mean()*100)

## Eval

In [None]:
data_generator_test = PascalVOCDataGenerator('test', data_dir)
len(data_generator_test.images_ids_in_subset)

In [None]:
batch_size = len(data_generator_test.images_ids_in_subset)
generator_test = data_generator_test.flow(batch_size=batch_size)

In [None]:
X_test, Y_test = next(generator_test)

In [None]:
print(X_test.shape, Y_test.shape)

In [None]:
def eval_fn(model, X_test, Y_test):
    y_pred_test = model.predict(X_test)
    #y_pred_train = model.predict(X_train)

    #AP_train = np.zeros(20)
    AP_test = np.zeros(20)
    for c in range(20):
        #AP_train[c] = average_precision_score(Y_train[:, c], y_pred_train[:, c])
        AP_test[c] = metrics.average_precision_score(Y_test[:, c], y_pred_test[:, c])

    #print "MAP TRAIN =", AP_train.mean()*100
    print("MAP TEST =", AP_test.mean()*100)
    print(AP_test)

In [None]:
eval_fn(model, X_test, Y_test)

In [None]:
print(AP_test)

## Finetune

In [None]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)

In [None]:
# vanilla from RCP209
model = ResNet50(include_top=True, weights='imagenet')
model.layers.pop()
# Modify top layers
x = model.layers[-1].output
x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
model = Model(inputs=model.input, outputs=x)

In [None]:
# dropping the Dense(1000)
model = ResNet50(include_top=True, weights='imagenet')
x = model.layers[-2].output
x = Dense(data_generator_train.nb_classes, activation='sigmoid', name='predictions')(x)
model = Model(inputs=model.input, outputs=x)

In [None]:
# Laura way
# Load ResNet50 architecture & its weights
input_shape = (224, 224, 3)
resnet = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)

inp = Input(shape=input_shape, name='image_input')
x = resnet(inp)
x = Flatten()(x)
output = Dense(data_generator_train.nb_classes, activation='sigmoid')(x)
model = Model(inputs=inp, outputs=output)

In [None]:
model.summary()

In [None]:
lr = 0.1
# model.compile(loss='binary_crossentropy', optimizer=SGD(lr=lr), metrics=['binary_accuracy'])
loss = BCE()
# loss = PartialBCE(prop / 100)
model.compile(loss=loss, optimizer=SGD(lr=0.01), metrics=['binary_accuracy'])

In [None]:
# test data
data_generator_test = PascalVOCDataGenerator('test', data_dir)
len(data_generator_test.images_ids_in_subset)
batch_size = len(data_generator_test.images_ids_in_subset)
generator_test = data_generator_test.flow(batch_size=batch_size)
X_test, Y_test = next(generator_test)
print(X_test.shape, Y_test.shape)

In [None]:
# callbacks
exp_folder = '/home/caleml/partial_experiments/exp_20190724_1659_TESTNB'
os.makedirs(exp_folder, exist_ok=True)

cb_list = list()
cb_list.append(SaveModel(exp_folder, prop))

data_generator_test = PascalVOCDataGenerator('test', data_dir)
map_cb = MAPCallback(X_test, Y_test, exp_folder, prop)
cb_list.append(map_cb)

# cb_list.append(LearningRateScheduler(lr_scheduler))

In [None]:
batch_size=32
nb_epochs=25

steps_per_epoch_train = int(len(data_generator_train.id_to_label) / batch_size) + 1
model.fit_generator(data_generator_train.flow(batch_size=batch_size),
                    steps_per_epoch=steps_per_epoch_train,
                    epochs=nb_epochs,
                    callbacks=cb_list,
                    verbose=1)

## Dataset comparison

In [None]:
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
data_generator_train = PascalVOCDataGenerator('trainval', data_dir)

In [None]:
nico_data = data_generator_train.id_to_label
print(len(nico_data))
print(nico_data['000131'])

In [None]:
trainval_path = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/Annotations/annotations_multilabel_trainval.csv'

In [None]:
laura_data = dict()
with open(trainval_path, 'r') as f_in:
    for line in f_in:
        parts = line.strip().split(',')
        laura_data[parts[0]] = [int(elt) for elt in parts[1:]]

In [None]:
print(len(laura_data))
print(laura_data['000131'])

In [None]:
for id_img, labels in nico_data.items():
    laura_labels = laura_data[id_img]
    converted = [l if l in [0, 1] else 0 for l in laura_labels]
    assert all([converted[i] == labels[i] for i in range(len(labels))])

## Test validation

## Test prior

In [None]:
from model.networks.prior import PriorModel
from model.callbacks.prior_callback import PriorCallback

import tensorflow as tf

In [None]:
exp_folder = '/home/caleml/partial_experiments/exp_20190812_1654_TESTNB'
os.makedirs(exp_folder, exist_ok=True)

In [None]:
# dataset
data_dir = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/'
dataset_train = PascalVOCDataGenerator('trainval', data_dir, prop=prop)

In [None]:
model = PriorModel(exp_folder, dataset_train.nb_classes, prop)
model.load_config('pv_prior50_sgd')
model.build()

In [None]:
# callbacks
prior_cb = PriorCallback()
fetches = [tf.assign(prior_cb.var_y_true, model.targets[0], validate_shape=False),
           tf.assign(prior_cb.var_y_pred, model.outputs[0], validate_shape=False)]
model._function_kwargs = {'fetches': fetches}


In [None]:
# train
steps_per_epoch = int(len(dataset_train.id_to_label) / cfg.BATCH_SIZE) + 1
model.train(dataset_train.flow(batch_size=cfg.BATCH_SIZE), steps_per_epoch=steps_per_epoch, cb_list=cb_list)

## Dataset test

In [None]:
import matplotlib.image as mpimg
from matplotlib import pyplot as plt

In [None]:
data_dir = '/home/caleml/datasets/pascalvoc/VOCdevkit/VOC2007/'

In [None]:
old_dataset = PascalVOCDataGenerator('trainval', data_dir, prop=prop)
new_dataset = PascalVOC(data_dir, 16, 'trainval', x_keys=['image'], y_keys=['multilabel'], p=prop)

In [None]:
# comparison function
def compare(d_old, d_new):
    
    batch1 = next(d_old)
    batch2 = d_new[0]
    print(type(batch1), type(batch2))
    print(len(batch1), len(batch2))
    x1, y1 = batch1
    x2, y2 = batch2
    print(type(x1), type(x2))
    print(type(y1), type(y2))
    print(x1.shape, np.array(x2).shape, len(x2))
    print(y1.shape, np.array(y2).shape, len(y2))
    
    fig = plt.figure()
    a = fig.add_subplot(1,3,1)
    imgplot = plt.imshow(x1[0])
    a = fig.add_subplot(1,3,2)
    imgplot = plt.imshow(x2[0][0])
    a = fig.add_subplot(1,3,3)
    first_img = '/share/DEEPLEARNING/datasets/pascalvoc/VOCdevkit/VOC2007/JPEGImages/000005.jpg'
    img = mpimg.imread(first_img)
    imgplot = plt.imshow(img)
    
    assert np.array_equal(x1, np.array(x2[0]))
    assert np.array_equal(y1, np.array(y2[0]))

In [None]:
compare(old_dataset.flow(batch_size=16), new_dataset)

In [None]:
a = [1, 3, 4, 7, 8, 9]
a[[1,2,4]]

In [None]:
a = np.zeros(6)
a[1] = 6
a[2] = 7
a[-1] = 12
a[0] = 3

b = a / sum(a)

In [None]:
a

In [None]:
print(b, sum(b))

## New finetune with prior

In [59]:
data_dir = '/home/caleml/datasets/pascalvoc/VOCdevkit/VOC2007/'
dataset_train = PascalVOC(data_dir, 16, 'trainval', x_keys=['image'], y_keys=['multilabel'], p=prop)

In [124]:
exp_folder = '/home/caleml/partial_experiments/exp_20190829_1434_TESTNB'
os.makedirs(exp_folder, exist_ok=True)

In [None]:
from model.networks.prior import PriorModel
model = PriorModel(exp_folder, dataset_train.nb_classes, prop)
model.load_config('pv_baseline50_sgd')
model.build()

Init input_shape (448, 448, 3)
Loading options

Loaded config

{'archi': {'classifier': 'resnet50', 'loss': 'bce', 'name': 'baseline'},
 'batch_size': 32,
 'dataset': {'name': 'pascalvoc',
             'path': '/home/caleml/datasets/pascalvoc/VOCdevkit/VOC2007/',
             'test': 'test',
             'train': 'trainval'},
 'training': {'n_epochs': 15, 'optimizer': 'sgd', 'start_lr': 0.1}}





In [25]:
a = dataset_train[1]

target batch [[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1  0 -1 -1 -1  1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1  1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1]
 [-1 -1 -1 -1 -1 -1 -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1  1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1  1 -1 -1 -1  0 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1  0 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -

In [61]:
dataset_test = PascalVOC(data_dir, 4952, 'test', x_keys=['image'], y_keys=['multilabel'])
X_test, Y_test = dataset_test[0]

In [62]:
# callbacks
cb_list = list()

map_cb = MAPCallback(X_test, Y_test, exp_folder, prop)
cb_list.append(map_cb)

cb_list.append(SaveModel(exp_folder, prop))

In [63]:
# actual train
steps_per_epoch = len(dataset_train)
model.train(dataset_train, steps_per_epoch=steps_per_epoch, cb_list=cb_list)

Training with 2 callbacks
Epoch 1/15
interval evaluation - epoch: 0 - mAP score: 0.834503

Trying to save model @epoch=001 to /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/model_100_001.h5
Couldn't save model, saving weights instead at /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/weights_100_001.h5
Epoch 2/15
interval evaluation - epoch: 1 - mAP score: 0.876873

Trying to save model @epoch=002 to /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/model_100_002.h5
Couldn't save model, saving weights instead at /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/weights_100_002.h5
Epoch 3/15
interval evaluation - epoch: 2 - mAP score: 0.892885

Trying to save model @epoch=003 to /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/model_100_003.h5
Couldn't save model, saving weights instead at /home/caleml/partial_experiments/exp_20190828_1645_TESTNB/weights_100_003.h5
Epoch 4/15
interval evaluation - epoch: 3 - mAP score: 0.899928

Trying to sa

## debug

In [49]:
x = np.arange(9).reshape(3,3)

In [50]:
print(x)
print(x.shape)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
(3, 3)


In [51]:
x1 = np.repeat(x[None,...],10,0)

In [52]:
x1

array([[[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]],

       [[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]])

In [53]:
x1.shape

(10, 3, 3)

In [54]:
x1[0]

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [None]:
from tensorflow.keras import backend as K

In [74]:
def kullback_leibler_div(p, q):
    p = K.clip(p, K.epsilon(), 1)
    q = K.clip(q, K.epsilon(), 1)
    return K.sum(p * K.log(p / q), axis=-1)

In [108]:
def kullback_leibler_div2(c):
    p = c[0]
    q = c[1]
    p = K.clip(p, K.epsilon(), 1)
    q = K.clip(q, K.epsilon(), 1)
    return K.sum(p * K.log(p / q), axis=-1)

In [67]:
kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32')
K.eval(kvar)

array([[1., 2.],
       [3., 4.]], dtype=float32)

In [85]:
np_cooc = np.array([[0.06060606, 0.03030303, 0.75757576],
 [0.00456621, 0.00913242, 0.03652968],
 [0.11764706, 0.02941176, 0.05882353]])
np_logits = np.array([0.7, 0.15, 0.15])

fake_cooc = K.variable(np_cooc, dtype='float32')
fake_logits = K.variable(np_logits, dtype='float32')

In [117]:
fake_logits.shape

TensorShape([Dimension(3)])

In [83]:
K.eval([kullback_leibler_div(fake_cooc[i], fake_logits) for i in range(3)])

AttributeError: 'list' object has no attribute 'eval'

In [109]:
broad_logits = tf.tile(tf.expand_dims(fake_logits, 0), [3, 1])
broad_logits.shape
# out = tf.map_fn(kullback_leibler_div2, (fake_cooc, broad_logits), dtype=(tf.float32, tf.float32))

ValueError: The two structures don't have the same nested structure.

First structure: type=tuple str=(tf.float32, tf.float32)

Second structure: type=Tensor str=Tensor("map_4/while/Sum:0", shape=(), dtype=float32)

More specifically: Substructure "type=tuple str=(tf.float32, tf.float32)" is a sequence, while substructure "type=Tensor str=Tensor("map_4/while/Sum:0", shape=(), dtype=float32)" is not
Entire first structure:
(., .)
Entire second structure:
.

In [106]:
K.eval(out)

TypeError: kullback_leibler_div() missing 1 required positional argument: 'q'

In [101]:
K.eval(tf.concat([fake_cooc, broad_logits], axis=0))

array([[0.06060606, 0.03030303, 0.75757575],
       [0.00456621, 0.00913242, 0.03652968],
       [0.11764706, 0.02941176, 0.05882353],
       [0.7       , 0.15      , 0.15      ],
       [0.7       , 0.15      , 0.15      ],
       [0.7       , 0.15      , 0.15      ]], dtype=float32)

In [96]:
K.eval(tf.tile(tf.expand_dims(fake_logits, 0), [3, 1]))

array([[0.7 , 0.15, 0.15],
       [0.7 , 0.15, 0.15],
       [0.7 , 0.15, 0.15]], dtype=float32)

In [None]:
K.eval(out)

In [122]:
matrix = tf.keras.layers.Input(shape=(3,3))
logits = tf.keras.layers.Input(shape=(3,))
print(logits.shape)
print(matrix.shape)

# broad_logits = tf.tile(tf.expand_dims(logits, 0), [3, 1])
broad_logits = tf.tile(logits, [3, 1])
print(broad_logits.shape)
out = tf.map_fn(kullback_leibler_div2, (matrix, broad_logits), dtype=(tf.float32, tf.float32))

test_model = tensorflow.keras.models.Model(inputs=[input1, input2], outputs=out)

(?, 3)
(?, 3, 3)
(?, 3)


ValueError: The two structures don't have the same nested structure.

First structure: type=tuple str=(tf.float32, tf.float32)

Second structure: type=Tensor str=Tensor("map_7/while/Sum:0", shape=(3,), dtype=float32)

More specifically: Substructure "type=tuple str=(tf.float32, tf.float32)" is a sequence, while substructure "type=Tensor str=Tensor("map_7/while/Sum:0", shape=(3,), dtype=float32)" is not
Entire first structure:
(., .)
Entire second structure:
.