In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import keras
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
#config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess) 

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
from __future__ import print_function

from keras.models import Sequential, Model
from keras.layers import Conv1D, Dense, Dropout, Flatten, BatchNormalization, Input, Concatenate, Add
from keras.callbacks import EarlyStopping, CSVLogger

from loader import load_20news
from custom_layer import MonteCarloLRF, SeparableMonteCarloLRF, SeparableMonteCarloMaxPoolingV2, RandomLRF

import numpy as np
import sklearn as sk
import pickle

In [4]:
top_words=10000
sparse=False
remove_short_documents=True
notebook = 'mcNet_top10k_temptative_42'

In [5]:
(input_shape, nb_classes), (X_train, X_test, Y_train, Y_test), graph_data = \
    load_20news(data_home='data', top_words=top_words, sparse=sparse,
                remove_short_documents=remove_short_documents, verbose=False)

In [6]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2])
num_classes = Y_train.shape[1]

In [7]:
%%time
if False:
  #Process next cell only once
  path = os.path.join('probabilities_' + 
                      'top' + str(top_words) +
                      '_sparse' + str(sparse) +
                      '_removeShorts' + str(remove_short_documents) +
                      '_tfidf_bis.pkl')
  if os.path.isfile(path):
    probabilities = pickle.load(open(path, "rb"), encoding='latin1')

CPU times: user 1 µs, sys: 2 µs, total: 3 µs
Wall time: 4.53 µs


In [8]:
%%time
#if not(os.path.isfile(path)):
if True:
  METRIC = 'cosine'#'euclidean'
  distances = sk.metrics.pairwise.pairwise_distances(graph_data, metric=METRIC, n_jobs=-2)

  # enforce exact zero
  for k in range(distances.shape[0]):
    distances[k,k] = 0.

  # max normalize
  #distances /= distances.max()
  distances /= distances.max(axis=1).reshape((distances.shape[0], 1))

  # use tricube kernel (becaause of flatness around 0)
  probabilities = (1. - np.abs(distances) ** 3) ** 3

  # remove auto connections (which are taken anyway in LRF)
  for k in range(probabilities.shape[0]):
    probabilities[k,k] = 0.

  # normalize proba
  probabilities /= np.sum(probabilities, axis=1).reshape((probabilities.shape[0], 1))
  
  # pickled for later use
  #pickle.dump(probabilities, open(path,"wb"))  

CPU times: user 12.2 s, sys: 1.17 s, total: 13.4 s
Wall time: 12.8 s


In [9]:
if False:
  probabilities = np.ones(probabilities.shape)

  # remove auto connections (which are taken anyway in LRF)
  for k in range(probabilities.shape[0]):
    probabilities[k,k] = 0.

  # renormalize proba
  probabilities /= np.sum(probabilities, axis=-1).reshape((probabilities.shape[0], 1))
  assert ((np.sum(probabilities, axis=-1) - 1) < 0.000001).all()

In [10]:
batch_size = 64
X = Input(shape=(X_train.shape[1], X_train.shape[2]))

H = SeparableMonteCarloLRF(probabilities, LRF_size=2, activation='relu')(X)
H = Conv1D(64, kernel_size=1, activation='relu', padding='same') (H)
H = Dropout(0.2)(H)
#H = Add()([H,X])

H2 = SeparableMonteCarloLRF(probabilities, LRF_size=2, activation='relu')(H)
H2 = Conv1D(64, kernel_size=1, activation='relu', padding='same') (H)
H2 = Dropout(0.2)(H2)

H2 = Conv1D(1, kernel_size=1, activation='relu', padding='same') (H2)
H2 = Add()([X,H2])


L = Flatten()(H2)
L = Dense(500, activation='relu')(L)
L = Dropout(0.2)(L)
Y = Dense(num_classes, activation='softmax')(L)
model = Model(inputs=X, outputs=Y)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 10000, 1)     0                                            
__________________________________________________________________________________________________
separable_monte_carlo_lrf_1 (Se (None, 10000, 1)     3           input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 10000, 64)    128         separable_monte_carlo_lrf_1[0][0]
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 10000, 64)    0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
conv1d_2 (

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stopper = EarlyStopping(min_delta=0.001, patience=2)
csv = CSVLogger(notebook + '_log.csv')

history = model.fit(X_train, Y_train,
                    batch_size=batch_size,
                    epochs=10,
                    verbose=1,
                    #callbacks=[early_stopper, csv],
                    validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 10168 samples, validate on 7071 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.0315624627715003
Test accuracy: 0.7198416065873022
