In [1]:
import numpy as np
my_data = np.load('small_set.npz', allow_pickle=True)
word_emb = np.load('word_emb.npz')['arr_0']
x = my_data['x'].tolist()
y = my_data['y']

In [2]:
import tensorflow as tf
import tensorflow.keras.layers as layers

In [17]:
import scipy()

In [31]:
#scipy.sparse.coo_matrix(([1]*len(x[0]), ([i]*len(x[0]), x[0])), shape=len(x[0])), word_emb.shape[0]))
row = [i] * len(x[0])
col = x[0]
data = [1]*len(x[0])
test = scipy.sparse.coo_matrix((data, (row, col)), shape=(len(x[0]), word_emb.shape[0]), dtype=np.int32)

In [9]:
class KMaxPooling(layers.Layer):
    def __init__(self, k=1, **kwargs):
        super().__init__(**kwargs)
        self.input_spec = tf.layers.InputSpec(ndim=3)
        self.k = k
        
    def compute_output_shape(self, input_shape):
        return (input_shape[0], (input_shape[-1] * self.k))
    def call(self, inputs):
        
        # swap last two dimensions since top_k will be applied along the last dimension
        shifted_input = tf.transpose(inputs, [0, 2, 1])
        
        # extract top_k, returns two tensors [values, indices]
        top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
        
        # return flattened output
        return layers.Flatten()(top_k)

In [29]:
def cnn_model_fn(features, labels, emb):
    
    voc_size, emb_size = emb.shape
    label_size = labels.shape[-1]

    # input_layer
    inputs = tf.keras.Input(shape=(voc_size,), sparse=True)
    print(inputs.shape)
    
    #emb_layer
    emb_input = layers.Embedding(voc_size, emb_size, weights=emb)(inputs)
    print(emb_input.shape)
    
    # conv_layer, filter_size=2 & 3
    conv0_out = layers.Conv1D(1, 2, padding='same', activation=tf.nn.relu)(emb_input)
    conv1_out = layers.Conv1D(1, 3, padding='same', activation=tf.nn.relu)(emb_input)
    
    # stack the activation map together
    conv_out = layers.concatenate([conv0_out, conv1_out], axis=-1)
    print(conv_out.shape)
    
    # pooling
    pool_out = KMaxPooling(k=2)(conv_out)
    
    # bottleneck & predict
    #dense_out = layers.Dense(label_size/5, activation=tf.nn.relu)(pool_out_flat)
    prediction = layers.Dense(label_size, activation=tf.nn.sigmoid)(pool_out)
    
    # model
    model = tf.keras.Model(inputs=emb_input, outputs=prediction)
    model.compile(optimizer=tf.train.RMSPropOptimizer(0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    
    # fit in model doc by doc
    for i, feats in enumerate(features):
        row = [i] * len(feats)
        data = [1]*len(feats)
        feat = scipy.sparse.coo_matrix((data, (row, feats)), shape=(len(feats), word_emb.shape[0]), dtype=np.int32)
        model.fit(feat, labels[i], batch_size=16, epochs=1)

In [30]:
cnn_model_fn(x, y, word_emb)

(?, ?)


TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=Tensor("input_4/indices:0", shape=(?, 2), dtype=int64), values=Tensor("embedding_3/Cast:0", shape=(?,), dtype=int32), dense_shape=Tensor("input_4/shape:0", shape=(2,), dtype=int64)). Consider casting elements to a supported type.

In [70]:
model = tf.keras.Model()

(1000, 4955)

In [9]:
features.shape

(1000, 9885, 50)

In [8]:
labels.shape

(1000, 4955)

In [19]:
features[1].shape

(255, 50)

In [21]:
labels.shape

(1000, 4955)