In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_curve, auc, fbeta_score
from sklearn.model_selection import train_test_split
from opt import RAdam
import os

os.environ["CUDA_VISIBLE_DEVICES"] = '7'

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

import efficientnet.keras as efn
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import BatchNormalization, Input, Dense, MaxPooling2D, Conv2D, Flatten, Concatenate, Dropout, UpSampling2D, Reshape, Add
from keras.layers.core import Activation, Layer
from keras.callbacks import ModelCheckpoint
from keras.utils import multi_gpu_model
from keras.preprocessing.image import ImageDataGenerator

import keras.backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.tensorflow_backend.set_session(tf.Session(config=config))

Using TensorFlow backend.


In [2]:
print("Importing Dataset")

data_dir = '/home/dados4t/DataChallenge2/'

images_y = np.load(os.path.join(data_dir,'test_images_y_normalized_resized.npy'))
images_vis = np.load(os.path.join(data_dir,'test_images_vis_normalized_resized.npy'))
#is_lens = np.load(os.path.join(data_dir,'Y.npy'))

#X_train_y, X_test_y, Y_train, Y_test = train_test_split(images_y, is_lens, test_size = 0.10, random_state = 7)
#X_train_vis, X_test_vis, Y_train, Y_test = train_test_split(images_vis, is_lens, test_size = 0.10, random_state = 7)
#del images_vis
#del images_y
#print(X_train_y.shape)
#print(X_train_vis.shape)

Importing Dataset


In [3]:
print("Building Model")

def Residual(filters, out, activation, skip):
    
    conv1 = Conv2D(filters, (3,3), padding="same")(skip)
    bn1_1 = BatchNormalization()(conv1)
    act1 = Activation(activation)(bn1_1)
    bn1_2 = BatchNormalization()(act1)

    conv2 = Conv2D(filters, (3,3), padding="same")(bn1_2)
    bn2_1 = BatchNormalization()(conv2)
    act2 = Activation(activation)(bn2_1)
    bn2_2 = BatchNormalization()(act2)

    conv3 = Conv2D(filters, (3,3), padding="same")(bn2_2)
    bn3_1 = BatchNormalization()(conv3)
    act3 = Activation(activation)(bn3_1)
    bn3_2 = BatchNormalization()(act3)
    
    add = Add()([skip, bn3_2])
    bn_add = BatchNormalization()(add)
    mp = MaxPooling2D((2,2))(bn_add)
    bn_mp = BatchNormalization()(mp)
    out_conv = Conv2D(out, (1,1), padding="same")(bn_mp)
    out_bn = BatchNormalization()(out_conv)
    
    return out_bn
    
def ResidualUp(filters, activation,skip):
   
    bn_conv1x1 = BatchNormalization()(skip)
    conv1x1 = Conv2D(filters, (1,1), padding="same")(bn_conv1x1)
    
    
    bn_up = BatchNormalization()(conv1x1)
    up = UpSampling2D((2,2))(bn_up)
    
    bn_act1 = BatchNormalization()(up)
    act1 = Activation(activation)(bn_act1) 
    bn_conv1 = BatchNormalization()(act1)
    conv1 = Conv2D(filters, (3,3), padding="same")(bn_conv1)
    
    bn_act2 = BatchNormalization()(conv1)
    act2 = Activation(activation)(bn_act2) 
    bn_conv2 = BatchNormalization()(act2)
    conv2 = Conv2D(filters, (3,3), padding="same")(bn_conv2)
    
    bn_act3 = BatchNormalization()(conv2)
    act3 = Activation(activation)(bn_act3) 
    bn_conv3 = BatchNormalization()(act3)
    conv3 = Conv2D(filters, (3,3), padding="same")(bn_conv3)
    
    bn_add = BatchNormalization()(conv3)
    add = Add()([bn_act1, bn_add])
    
    return add

Building Model


In [4]:
from keras.engine.topology import Layer, InputSpec
class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.

    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
         Measure the similarity between embedded point z_i and centroid µ_j.
                 q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it.
                 q_ij can be interpreted as the probability of assigning sample i to cluster j.
                 (i.e., a soft assignment)
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [5]:
activation = "elu"


inp_vis = Input((images_vis.shape[1], images_vis.shape[2], images_vis.shape[3]))

res_vis = Residual(16, 32, activation, inp_vis)
res0_vis = Residual(32, 64, activation, res_vis)
res1_vis = Residual(64, 128, activation,res0_vis)
res2_vis = Residual(128, 256, activation,res1_vis)
res3_vis = Residual(256, 512,activation,res2_vis)
res4_vis = Residual(512, 20,activation,res3_vis)
flat_vis = Flatten()(res4_vis)

inp_y = Input((images_y.shape[1], images_y.shape[2], images_y.shape[3]))

res_y = Residual(16, 32, activation, inp_y)
res0_y = Residual(32, 64, activation, res_y)
res1_y = Residual(64, 128, activation,res0_y)
res2_y = Residual(128, 256, activation,res1_y)
res3_y = Residual(256, 512,activation,res2_y)
res4_y = Residual(512, 20,activation,res3_y)
flat_y = Flatten()(res4_y)

concat = Concatenate()([flat_vis, flat_y])

dense1 = Dense(200, activation=activation)(concat)
bn_dense1 = BatchNormalization()(dense1)
dense2 = Dense(200, activation=activation)(bn_dense1)
bn_dense2 = BatchNormalization()(dense2)

lat_space = Dense(180, activation = "linear")(bn_dense2)

up_bn_dense2 = BatchNormalization()(lat_space)
up_dense2 = Dense(200, activation=activation)(up_bn_dense2)
up_bn_dense1 = BatchNormalization()(up_dense2)
up_dense1 = Dense(200, activation=activation)(up_bn_dense1)

#VIS
dense_vis = Dense(180)(up_dense1)
reshape_vis = Reshape((3,3,20))(dense_vis)

res_up_vis4 = ResidualUp(512, activation, reshape_vis)
res_up_vis3 = ResidualUp(256, activation, res_up_vis4)
res_up_vis2 = ResidualUp(128, activation, res_up_vis3)
res_up_vis1 = ResidualUp(64, activation, res_up_vis2)
res_up_vis0 = ResidualUp(32, activation, res_up_vis1)
res_up_vis = ResidualUp(16, activation, res_up_vis0)

y_hat_bn_vis = BatchNormalization()(res_up_vis)
y_hat_vis = Conv2D(1, (1,1), padding="same")(y_hat_bn_vis)


#Y
dense_y = Dense(20)(up_dense1)
reshape_y = Reshape((1,1,20))(dense_y)

res_up_y4 = ResidualUp(512, activation, reshape_y)
res_up_y3 = ResidualUp(256, activation, res_up_y4)
res_up_y2 = ResidualUp(128, activation, res_up_y3)
res_up_y1 = ResidualUp(64, activation, res_up_y2)
res_up_y0 = ResidualUp(32, activation, res_up_y1)
res_up_y = ResidualUp(16, activation, res_up_y0)

y_hat_bn_y = BatchNormalization()(res_up_y)
y_hat_y = Conv2D(1, (1,1), padding="same")(y_hat_bn_y)

#model = Model([inp_vis, inp_y], [y_hat_vis, y_hat_y])















In [6]:
autoencoder = Model([inp_vis, inp_y], [y_hat_vis, y_hat_y], name="AE")
autoencoder.load_weights("AUTOENCODER.hdf5")
encoder = Model([inp_vis, inp_y], lat_space, name= "encoder")
n_clusters = 2
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input,
                           outputs=[clustering_layer, autoencoder.output[0], autoencoder.output[1]])

In [7]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=n_clusters, n_init=20)
y_pred = kmeans.fit_predict(encoder.predict([images_vis, images_y]))
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
y_pred_last = np.copy(y_pred)


In [8]:
model.compile(loss=['kld', 'mse', 'mse'], loss_weights=[0.1, 1, 1], optimizer=RAdam())





In [9]:
import metrics
from keras.initializers import VarianceScaling
from time import time
maxiter = 8000
update_interval = 140
loss = 0
index = 0
index_array = np.arange(images_vis.shape[0])
tol = 0.001 # tolerance threshold to stop training
save_dir = "final_model/"
batch_size = 25
y = None
# computing an auxiliary target distribution
def target_distribution(q):
    weight = q ** 2 / q.sum(0)
    return (weight.T / weight.sum(1)).T

In [11]:
for ite in range(int(maxiter)):
    if ite % update_interval == 0:
        model.save(save_dir + 'conv_b_DEC_model_final_22.hdf5')
        q, _ , __  = model.predict([images_vis, images_y], verbose=0)
        p = target_distribution(q)  # update the auxiliary target distribution p
        print(q.shape, q[:3])
        print(p.shape, p[:3])
        # evaluate the clustering performance
        y_pred = q.argmax(1)
        #print(y_pred)
        if y is not None:
            acc = np.round(metrics.acc(y, y_pred), 5)
            nmi = np.round(metrics.nmi(y, y_pred), 5)
            ari = np.round(metrics.ari(y, y_pred), 5)
            loss = np.round(loss, 5)
            print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)

        # check stop criterion
        delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
        y_pred_last = np.copy(y_pred)
        if ite > 0 and delta_label < tol:
            print('delta_label ', delta_label, '< tol ', tol)
            print('Reached tolerance threshold. Stopping training.')
            break
    idx = index_array[index * batch_size: min((index+1) * batch_size, images_vis.shape[0])]
    loss = model.train_on_batch(x=[images_vis[idx], images_y[idx]], y=[p[idx], images_vis[idx], images_y[idx]])
    index = index + 1 if (index + 1) * batch_size <= images_vis.shape[0] else 0
    print(loss)
    #model.save(save_dir + 'conv_b_DEC_model_final_22.hdf5')

(99991, 2) [[9.99902368e-01 9.76456722e-05]
 [9.99875307e-01 1.24714017e-04]
 [9.99886155e-01 1.13842136e-04]]
(99991, 2) [[9.9999982e-01 1.9648841e-07]
 [9.9999964e-01 3.2054197e-07]
 [9.9999976e-01 2.6708594e-07]]
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


[0.014661498, 0.034868937, 0.005899871, 0.0052747335]
[0.11431513, 1.0237184, 0.006178652, 0.0057646395]
[0.11007191, 0.98804194, 0.005892225, 0.0053754915]
[0.013492969, 0.0002099825, 0.006999173, 0.0064727976]
[0.017230924, 0.0467642, 0.00660673, 0.0059477747]
[0.07328075, 0.6023464, 0.006800492, 0.0062456103]
[0.041208595, 0.29142, 0.0060597397, 0.0060068523]
[0.054596793, 0.4338908, 0.006011777, 0.0051959385]
[0.035017874, 0.23446144, 0.006056134, 0.005515595]
[0.012462258, 0.0010711895, 0.0064229285, 0.0059322105]
[0.044180013, 0.3279167, 0.005988658, 0.0053996868]
[0.013336786, 0.023049247, 0.005777298, 0.005254564]
[0.043978292, 0.33228973, 0.005757879, 0.004991438]
[0.075

In [16]:
model.load_weights(save_dir + 'conv_b_DEC_model_final.hdf5')

In [18]:
# Eval.
q, _ ,__= model.predict([images_vis, images_y], verbose=0)
p = target_distribution(q)  # update the auxiliary target distribution p

# evaluate the clustering performance
y_pred = q.argmax(1)
if y is not None:
    acc = np.round(metrics.acc(y, y_pred), 5)
    nmi = np.round(metrics.nmi(y, y_pred), 5)
    ari = np.round(metrics.ari(y, y_pred), 5)
    loss = np.round(loss, 5)
    print('Acc = %.5f, nmi = %.5f, ari = %.5f' % (acc, nmi, ari), ' ; loss=', loss)

In [19]:
print(q.shape, q[:3])

(99991, 2) [[nan nan]
 [nan nan]
 [nan nan]]


In [20]:
print(p.shape, p[:3])

(99991, 2) [[nan nan]
 [nan nan]
 [nan nan]]


In [21]:
print(y_pred.shape, y_pred[:3])

(99991,) [0 0 0]


In [None]:
pred = 

In [None]:
files = np.load(os.path.join(data_dir,'test_catalog.npy'))

In [None]:
import pandas as pd

df_files = pd.DataFrame(data = files)
df_predict = pd.DataFrame(data = pred[:,1])
df = pd.concat([df_files, df_predict], axis=1)
df.head(5)

In [None]:
df.to_csv("Subs/Schubert_Unsupervised_Submission_1.csv", sep=",", index=False, header=False)