In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import PIL
import os
import sys
import h5py 

  from ._conv import register_converters as _register_converters


In [2]:
#dataset = h5py.File('data-train.h5', 'r')
#dataset_val = h5py.File('data-val.h5', 'r')

classes=['n02085620', # chiuaua 1075
         'n02099601', # golden retriever 967
         'n02165456', # ladybug 1574 
         'n02676566', # acoustic guitar 1083
         'n02701002', # ambulance 249
         'n02871525', # bookshop 1050 
         'n02927161', # butcher 1026
         'n03000134', # chainlink fence 1239
         'n03042490', # cliff dwelling 1335
         'n03089624', # confectionery 
        ]

In [12]:
def print_info(name, obj):
    print name 
#dataset.visititems(print_info)

In [13]:
def get_data(dataset, classes):
    data=[]
    labels=[]
    l=0
    for c in classes:
        data.append(np.asarray(dataset[c][:]))
        labels.append(np.asarray([l]* len(dataset[c])))
        #import pdb; pdb.set_trace()
        l+=1
    data = np.concatenate([d for d in data])
    labels = np.concatenate([l for l in labels])
    return data, labels

In [14]:
def get_imgnt_datasets(classes, path_to_train, path_to_val):
    print path_to_train, path_to_val
    dataset = h5py.File(path_to_train, 'r')
    dataset_val = h5py.File(path_to_val, 'r')
    #for c in classes:
        #print dataset[c], dataset_val[c]
    x_train, y_train = get_data(dataset, classes)
    x_val, y_val = get_data(dataset_val, classes)
    return (x_train, y_train), (x_val, y_val)

In [15]:
import keras

class ImageNet10Random():
    '''
    Params
    corrupted: float
      Default 0.0
    num_classes: int
      Default 10.
    '''
    
    def __init__(self, label_corrupt_p=0.0, gaussian_noise_f = 0.0, classes=[], path_to_train='', path_to_val='', **kwargs):
        #super(CIFAR10Random, self).__init__(**kwargs)
        #import pdb; pdb.set_trace()
        (self.x_train, self.y_train), (self.x_test, self.y_test) = get_imgnt_datasets(classes, path_to_train, path_to_val)
        self.num_classes = len(classes)
        # note: corruption is performed on the training set. 
        # you test on real data to check generalization
        if label_corrupt_p > 0.0:
            self.label_corrupt(label_corrupt_p)
        if gaussian_noise_f > 0.0:
            self.gaussian_noise(gaussian_noise_f)
    
    def label_corrupt(self, corrupted):
        # Corrupts the labels in the training set according to
        # the specified corruption probability
        labels=np.array(self.y_train)
        #labels = np.reshape(len(labels),1)
        np.random.seed(1)
        mask = np.random.rand(len(labels)) <= corrupted
        rnd_labels = np.random.choice(self.num_classes, mask.sum())
        #rnd_labels = np.reshape(rnd_labels, (len(rnd_labels),1))
        #rnd_labels = np.reshape(rnd_labels, (len(rnd_labels),1))
        labels[mask] = rnd_labels
        labels = [int(x) for x in labels]
        # corruption
        self.y_train = labels
        
    def gaussian_noise(self, gaussian_noise_f):
        # Adds Gaussian Noise to the images,
        # matching the real dataset's mean and variance
        data = np.array(self.x_train)
        mean = np.mean(data)
        var = np.std(data)
        sigma = var**0.5
        n_samples, row, col, ch = data.shape
        mask = np.random.rand(n_samples) <= gaussian_noise_f
        gaussian = np.random.normal(mean, sigma, (row, col, ch))
        gaussian = gaussian.reshape(row, col, ch)
        noisy_imgs = [x+gaussian for x in data[mask]]
        data[mask] = noisy_imgs
        self.x_train = data

In [17]:
imgnet04=ImageNet10Random(classes=classes,  label_corrupt_p=0.4, path_to_train='./data-train.h5', path_to_val='./data-val.h5')

./data-train.h5 ./data-val.h5


In [None]:
plt.imshow(imgnet04.x_train[10])

In [None]:
imgnet04.y_train[10]

In [16]:
imgnet=ImageNet10Random(classes=classes, path_to_train='./data-train.h5', path_to_val='./data-val.h5')

./data-train.h5 ./data-val.h5


In [17]:
imgnet.x_train.shape

(12775, 299, 299, 3)

In [18]:
### NOTE: modified to do Imagenet well
class CNN():
    '''
    Convolutional Neural Network for experiments on CIFAR
    input, crop(2,2), 
    conv(200,5,5), bn, relu, maxpool(3,3), 
    conv(200,5,5), bn, relu, maxpool(3,3),
    dense(384), bn, relu,
    dense(192), bn, relu,
    dense(n_classes), softmax
    
    Params
    deep: int (how many convolution blocks)
      Default 2
    wide: int (how many neurons in the first dense connection)
      Default 512
    optimizer: string
      Default SGD
    lr: float
      Default 1e-2
    epochs: int
      Default 10
    batch_size: int
      Default: 32
    input_shape: int
      Default 32
    n_classes: int
      Default 10
    '''
    
    def __init__(self, deep=2, wide=384, optimizer='SGD', lr=1e-2, epochs=9, 
                 batch_size=14, input_shape=299, n_classes=10, **kwargs):
        
        #mask_shape = np.ones((1,512))
        #mask = keras.backend.variable(mask_shape)

        cnn = keras.models.Sequential()
        cnn.add(keras.layers.Cropping2D(cropping=((36,36),(36,36)), input_shape=(299,299,3)))
        counter = 0
        while counter<deep:
            cnn.add(keras.layers.Conv2D(200, (5,5)))
            cnn.add((keras.layers.BatchNormalization()))
            cnn.add(keras.layers.Activation('relu'))
            if counter<2:
                cnn.add(keras.layers.MaxPool2D(pool_size=(3,3)))
            counter+=1
        cnn.add(keras.layers.GlobalAveragePooling2D())
        #cnn.add(keras.layers.Flatten())
        cnn.add(keras.layers.Dense(wide))
        cnn.add(keras.layers.BatchNormalization())
        cnn.add(keras.layers.Activation('relu'))
        cnn.add(keras.layers.Dense(wide/2))
        cnn.add(keras.layers.BatchNormalization())
        cnn.add(keras.layers.Activation('relu'))
        
        loss_function = 'categorical_crossentropy'
        activation = 'softmax'
        if n_classes == 2:
            loss_function = 'binary_crossentropy'
            activation = 'sigmoid'
        cnn.add(keras.layers.Dense(n_classes, activation=keras.layers.Activation(activation)))

        #masking_layer = keras.layers.Lambda(lambda x: x*mask)(bmlp.layers[-2].output)
        #if n_hidden_layers>1:
        #    while n_hidden_layers!=1:
        #        masking_layer= keras.layers.Dense(512, activation=keras.layers.Activation('sigmoid'))(masking_layer)
        #        n_hidden_layers-=1
        #decision_layer = keras.layers.Dense(10, activation=keras.layers.Activation('softmax'))(masking_layer)
        #masked_model = keras.models.Model(input= bmlp.input, output=decision_layer)
        model = keras.models.Model(input=cnn.input, output=cnn.output)
        model.compile(optimizer=optimizer,
                      loss=loss_function,
                      metrics=['accuracy'])
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.deep=deep
        
    
    def train(self, dataset):
        #import pdb; pdb.set_trace()
        x_train = dataset.x_train
        y_train = dataset.y_train
        x_train = x_train / 255.0
        x_train -= np.mean(x_train)
        np.random.seed(0)
        idxs_train = np.arange(len(x_train))
        np.random.shuffle(idxs_train)
        x_train = np.asarray(x_train[idxs_train])
        y_train = y_train[idxs_train]

        x_test = dataset.x_test
        y_test = dataset.y_test
        x_test = x_test / 255.0
        x_test -= np.mean(x_test)
        idxs_test = np.arange(len(x_test))
        np.random.shuffle(idxs_test)
        x_test = np.asarray(x_test[idxs_test])
        y_test = y_test[idxs_test]
        
        
        try:
            shape1, shape2 = y_train.shape()
        except:
            y_train = keras.utils.to_categorical(y_train, self.n_classes)
        try:
            shape1, shape2 = y_test.shape()
        except:
            y_test = keras.utils.to_categorical(y_test, self.n_classes)
        history=self.model.fit(x_train, y_train, epochs=self.epochs, batch_size=self.batch_size, validation_data=(x_test, y_test))
        self.training_history=history
    
    def train_and_compute_rcvs(self, dataset):
        #import pdb; pdb.set_trace()
        
        x_train = dataset.x_train/255.
        x_train -= np.mean(x_train)
        y_train = dataset.y_train
        np.random.seed(0)
        idxs_train = np.arange(len(x_train))
        np.random.shuffle(idxs_train)
        x_train = np.asarray(x_train[idxs_train])
        y_train= np.asarray(y_train)
        y_train = y_train[idxs_train]
        #if x_train
        #x_train = x_train / 255.0

        try:
            shape1, shape2 = y_train.shape()
        except:
            y_train = keras.utils.to_categorical(y_train)

        history=[]
        embeddings=[]
        batch_size=self.batch_size
        print self.model.summary()
        
        #import pdb; pdb.set_trace()
        #layers_of_interest = [layer.name for layer in self.model.layers[2:-1]]
        if self.deep==2:
            layer_idxs = [9,13,16]
        if self.deep==3:
             layer_idxs = [9,12,14]
        if self.deep==4:
             layer_idxs = [9,12,15,19,22]
        if self.deep==5:
             layer_idxs = [9,12,15,18,22,25]
            
        layers_of_interest = [self.model.layers[layer_idx].name for layer_idx in layer_idxs]
        print 'loi', layers_of_interest
        self.model.metrics_tensors += [layer.output for layer in self.model.layers if layer.name in layers_of_interest]
        epoch_number = 0

        
        n_batches = len(x_train)/self.batch_size
        remaining = len(x_train)-n_batches * self.batch_size
        #if epoch_number > 1:
        while epoch_number <= self.epochs:
            print epoch_number
            batch_number = 0
            embedding_=[]

            for l in layers_of_interest:
                print 'in layer ', l
                print 'output shape ', self.model.get_layer(l).output.shape
                print 'metrics tensors, ', self.model.metrics_tensors
                if len(self.model.get_layer(l).output.shape)<=2:
                    space = np.zeros((len(x_train), self.model.get_layer(l).output.shape[-1]))
                else:
                    x = self.model.get_layer(l).output.shape[-3]
                    y = self.model.get_layer(l).output.shape[-2]
                    z = self.model.get_layer(l).output.shape[-1]
                    space = np.zeros((len(x_train), x*y*z))

                embedding_.append(space)
            while batch_number <= n_batches:

                outs=self.model.train_on_batch(
                    x_train[batch_number*batch_size:batch_number*batch_size + batch_size], 
                    y_train[batch_number*batch_size:batch_number*batch_size + batch_size])
                #import pdb;pdb.set_trace()
                #print out[0]
                #import pdb; pdb.set_trace()
                embedding_[0][batch_number*batch_size: batch_number*batch_size+batch_size]= outs[2].reshape((min(batch_size,len(outs[2])),-1))
                embedding_[1][batch_number*batch_size: batch_number*batch_size+batch_size]=outs[3].reshape((len(outs[3]),-1))
                embedding_[2][batch_number*batch_size: batch_number*batch_size+batch_size]=outs[4].reshape((len(outs[4]),-1))
                #embedding_[3][batch_number*batch_size: batch_number*batch_size+batch_size]=outs[5].reshape((len(outs[5]),-1))

                #print outs, outs
                history.append(outs[0])
                batch_number+=1
            #import pdb; pdb.set_trace()
            source = '/mnt/nas2/results/IntermediateResults/Mara/probes/imagenet/2H_lcp0.4'
            c=0
            if True:
                for l in layers_of_interest:
                    if 'max_pooling' in l:
                        #import pdb; pdb.set_trace()
                        tosave_= np.mean(embedding_[c].reshape(12775, 23*23,200), axis=1)
                        np.save('{}/imagenet_training_emb_e{}_l{}'.format(source,epoch_number, l), tosave_)
                        #np.mean(embedding_[0].reshape(12775, 31*31,200), axis=1).shape
                    else:
                        np.save('{}/imagenet_training_emb_e{}_l{}'.format(source,epoch_number, l), embedding_[c])
                    c+=1
            del embedding_
            #embeddings.append(embedding_) 
            epoch_number +=1
        self.training_history=history
        self.embeddings = embeddings
    
        
    def save(self, name, folder):
        try:
            os.listdir(folder)
        except:
            os.mkdir(folder)
            
        #model_json = self.model.to_json()
        #with open(folder+"/"+name+".json", "w") as json_file:
        #    json_file.write(model_json) 
        
        # serialize weights to HDF5
        self.model.save_weights(folder+"/"+name+".h5")
        print("Saved model to disk")
        np.save(folder+'/'+name+'_history', self.training_history.history)

In [19]:
model=CNN(deep=2)

  identifier=identifier.__class__.__name__))


In [20]:
model.train(imgnet)
model.save('imgnet_model', './')

Train on 12775 samples, validate on 494 samples
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
Saved model to disk


In [None]:
model.model.summary()

In [16]:
model.train_and_compute_rcvs(imgnet)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cropping2d_3_input (InputLay (None, 299, 299, 3)       0         
_________________________________________________________________
cropping2d_3 (Cropping2D)    (None, 227, 227, 3)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 223, 223, 200)     15200     
_________________________________________________________________
batch_normalization_9 (Batch (None, 223, 223, 200)     800       
_________________________________________________________________
activation_11 (Activation)   (None, 223, 223, 200)     0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 74, 74, 200)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 70, 70, 200)       1000200   
__________

6
in layer  max_pooling2d_6
output shape  (?, 23, 23, 200)
metrics tensors,  [<tf.Tensor 'metrics_2/acc/Mean:0' shape=() dtype=float32>, <tf.Tensor 'max_pooling2d_6/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_13/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_14/Relu:0' shape=(?, 192) dtype=float32>]
in layer  activation_13
output shape  (?, 384)
metrics tensors,  [<tf.Tensor 'metrics_2/acc/Mean:0' shape=() dtype=float32>, <tf.Tensor 'max_pooling2d_6/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_13/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_14/Relu:0' shape=(?, 192) dtype=float32>]
in layer  activation_14
output shape  (?, 192)
metrics tensors,  [<tf.Tensor 'metrics_2/acc/Mean:0' shape=() dtype=float32>, <tf.Tensor 'max_pooling2d_6/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_13/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_14/Relu:0' shape=(?, 192) dtype

In [21]:
model.train_and_compute_rcvs(imgnet04)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cropping2d_4_input (InputLay (None, 299, 299, 3)       0         
_________________________________________________________________
cropping2d_4 (Cropping2D)    (None, 227, 227, 3)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 223, 223, 200)     15200     
_________________________________________________________________
batch_normalization_13 (Batc (None, 223, 223, 200)     800       
_________________________________________________________________
activation_16 (Activation)   (None, 223, 223, 200)     0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 74, 74, 200)       0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 70, 70, 200)       1000200   
__________

4
in layer  max_pooling2d_8
output shape  (?, 23, 23, 200)
metrics tensors,  [<tf.Tensor 'metrics_3/acc/Mean:0' shape=() dtype=float32>, <tf.Tensor 'max_pooling2d_8/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_18/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_19/Relu:0' shape=(?, 192) dtype=float32>, <tf.Tensor 'max_pooling2d_8/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_18/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_19/Relu:0' shape=(?, 192) dtype=float32>]
in layer  activation_18
output shape  (?, 384)
metrics tensors,  [<tf.Tensor 'metrics_3/acc/Mean:0' shape=() dtype=float32>, <tf.Tensor 'max_pooling2d_8/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_18/Relu:0' shape=(?, 384) dtype=float32>, <tf.Tensor 'activation_19/Relu:0' shape=(?, 192) dtype=float32>, <tf.Tensor 'max_pooling2d_8/MaxPool:0' shape=(?, 23, 23, 200) dtype=float32>, <tf.Tensor 'activation_18/Relu:0' sh