In [1]:
# Import the libraries
import os
import numpy as np
from numpy import load
from scipy.sparse import load_npz
import tensorflow as tf
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.layers import Input, Conv1D, GlobalMaxPooling1D, concatenate, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Precision, Recall
import matplotlib.pyplot as plt

In [2]:
# Check that a GPU is available
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# Not sure why, but I need this to get TF to work
# from: https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


### Load in the data

In [4]:
# Define the load path
load_path = '../Datasets/AmazonCat-13K/processed/'

In [5]:
# Load the X_train
X_train_data = load('../Datasets/AmazonCat-13K/processed/' + 'X_train.npz')
X_train = X_train_data['arr_0']

In [6]:
# Load the X_test
X_test_data = load('../Datasets/AmazonCat-13K/processed/' + 'X_test.npz')
X_test = X_test_data['arr_0']

In [7]:
# Load y_train
y_train_sparse = load_npz(load_path + 'y_train_sparse.npz').astype('int64')

In [8]:
# Load y_test
y_test_sparse = load_npz(load_path + 'y_test_sparse.npz').astype('int64')

In [9]:
# Check the shape of the X data
print(f'X_train: {X_train.shape}')
print(f'X_test: {X_test.shape}')

X_train: (639360, 512)
X_test: (213120, 512)


In [10]:
# Check the shape of the X data
print(f'y_train: {y_train_sparse.shape}')
print(f'y_test: {y_test_sparse.shape}')

y_train: (639360, 13680)
y_test: (213120, 13680)


### Sparse Tensors

In [11]:
# Create function to convert sparse matrix to SparseTensor
def convert_sparse_matrix_to_sparse_tensor(sparse_matrix):
    coo = sparse_matrix.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

In [12]:
# Create sparse tensor of y_train
y_train_sparse_tensor = convert_sparse_matrix_to_sparse_tensor(y_train_sparse)

In [13]:
# Create sparse tensor of y_test
y_test_sparse_tensor = convert_sparse_matrix_to_sparse_tensor(y_test_sparse)

### Dataset

In [14]:
# Create function to convert SparseTensor to dense tensor
def convert_sparse_tensor_to_dense_tensor(X, y_sparse):
    y_dense = tf.sparse.to_dense(y_sparse)
    return X, y_dense

In [15]:
tr_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train_sparse_tensor))
tr_dataset = tr_dataset.map(convert_sparse_tensor_to_dense_tensor)
tr_dataset = tr_dataset.batch(batch_size=1024)

In [16]:
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test_sparse_tensor))
val_dataset = val_dataset.map(convert_sparse_tensor_to_dense_tensor)
val_dataset = val_dataset.batch(batch_size=1024)

### Create the embedding layer

In [17]:
# Create the embedding layer definition
class PretrainedEmbedding(tf.keras.layers.Layer):
    """Non-trainable embedding layer."""

    def __init__(self, embeddings, dropout_rate=0.2, **kwargs):
        """"Instantiate the layer using a pre-defined embedding matrix."""
        super().__init__(**kwargs)
        self.embeddings = tf.constant(embeddings)
        # if you want to add some dropout (or normalization, etc.)
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)

    def call(self, inputs, training=None):
        """Embed some input tokens and optionally apply dropout."""
        output = tf.nn.embedding_lookup(self.embeddings, inputs)
        return self.dropout(output, training=training)

In [18]:
# Load in the embedding matrix
embedding_matrix = np.loadtxt(load_path + 'embedding_matrix.csv', delimiter=',')

In [19]:
# Check the embedding matrix size
embedding_matrix.shape

(200000, 200)

### Define the model layers

In [20]:
# Define the Input and Embedding layers
i = Input(shape=(X_train.shape[1]), dtype=tf.int32)
x = PretrainedEmbedding(embedding_matrix, dropout_rate=0)(i)

# Convolution with window size = 3  
x3 = Conv1D(filters=96, 
            kernel_size=3, 
            strides=1,
            padding='valid',
            activation='relu',
            use_bias=True
           )(x)
x3 = GlobalMaxPooling1D()(x3)

# Convolution with window size = 4
x4 = Conv1D(filters=96, 
            kernel_size=4, 
            strides=1,
            padding='valid',
            activation='relu',
            use_bias=True
           )(x)
x4 = GlobalMaxPooling1D()(x4)

# Convolution with window size = 5
x5 = Conv1D(filters=96, 
            kernel_size=5, 
            strides=1,
            padding='valid',
            activation='relu',
            use_bias=True
           )(x)
x5 = GlobalMaxPooling1D()(x5)

# Concatenated max-pooling layers with Dropout (CNN-Kim uses a dropout rate of 0.5)
concatenated = concatenate([x3, x4, x5])
x = Dropout(rate=0.5)(concatenated)

# Final, fully-connected Dense layer
x = Dense(y_train_sparse.shape[1], activation='sigmoid')(x)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



In [21]:
# Build the model
model = Model(i, x)

In [22]:
# Remove unused vars (to save memory)
del embedding_matrix

In [23]:
# Have a look at the model
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
pretrained_embedding (Pretraine (None, 512, 200)     0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 510, 96)      57696       pretrained_embedding[0][0]       
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 509, 96)      76896       pretrained_embedding[0][0]       
______________________________________________________________________________________________

### Define how the model should be trained

In [24]:
# Define how the model should be trained
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.025), # CNN-Kim uses SGD with Adelta update rule
              loss='binary_crossentropy',
              metrics=[Precision(), Recall()])

In [25]:
# Create learning rate decay function
def lr_scheduler(epoch, lr):
    learning_rate = lr * 0.9
    print(f'Learning Rate: {learning_rate}')
    return learning_rate

# define the learning rate scheduler
callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

### Train the model

In [26]:
# Train the model
result = model.fit(tr_dataset,
                   epochs=75,
                   validation_data=val_dataset,
                   callbacks=[callback]);

Learning Rate: 0.02250000033527613
Epoch 1/75
    340/Unknown - 174s 512ms/step - loss: 0.0125 - precision: 0.0163 - recall: 0.0677

KeyboardInterrupt: 

In [27]:
# Plot the precision metric at each itteration
plt.plot(result.history['precision'], label='train_precision');
plt.plot(result.history['val_precision'], label='test_precision');
plt.legend();

NameError: name 'result' is not defined

In [28]:
# Plot the recall metric at each itteration
plt.plot(result.history['recall'], label='train_recall');
plt.plot(result.history['val_recall'], label='test_recall');
plt.legend();

NameError: name 'result' is not defined