In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import nltk
import os
import json

from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Concatenate
from tensorflow.python.ops import array_ops
from nltk import sent_tokenize, word_tokenize

from gcdc_data import load, load_pandas, Source, TrainOrTest

def pythonize(history):
    return {
        key: [float(x) for x in scores]
        for key, scores in history.items()
    }

In [2]:
train_data = load_pandas(TrainOrTest.TRAIN, sources=[Source.CLINTON, Source.ENRON])
test_data = load_pandas(TrainOrTest.TEST, sources=[Source.CLINTON, Source.ENRON])

print(train_data['label'].unique())

train_data.head()

['2' '3' '1']


Unnamed: 0,text,label
0,Cheryl:\n\nAre we in a good place to begin pap...,2
1,"Our friend, General Joe Ballard owns The Raven...",2
2,Outstanding news! Miki Rakic called about 10 m...,3
3,Responding to separate emails from Uzra + Jeff...,1
4,Guy from Mexico is in NY and is cooperating. D...,1


In [3]:
t = tf.keras.preprocessing.text.Tokenizer(oov_token='unk')
t.fit_on_texts([
    word
    for doc in train_data['text']
    for sent in sent_tokenize(doc)
    for word in word_tokenize(sent)
])

vocab_size = len(t.word_index) + 1

vocab_size, t.word_index['unk']

(18832, 1)

In [4]:
EMBEDDING_DIM = 100

embeddings_index = {}
f = open(os.path.join('data', f'glove.6B.{EMBEDDING_DIM}d.txt'))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found {} word vectors.'.format(len(embeddings_index)))

embedding_matrix = np.zeros((vocab_size, EMBEDDING_DIM))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word, embeddings_index['unk'])
    embedding_matrix[i] = embedding_vector
        
print(embedding_matrix[72])

Found 400000 word vectors.
[ 1.28830001e-01 -8.22090030e-01  2.74379998e-01 -6.90139979e-02
  1.79890007e-01  7.26050019e-01 -1.51120007e-01  8.55410006e-03
 -9.51219976e-01  7.72430003e-01 -2.83749998e-01  2.83289999e-01
  1.48249999e-01 -1.22300005e-02 -1.92670003e-02 -3.44600007e-02
  3.15059990e-01 -1.66390002e-01 -1.34349996e-02 -2.04590010e-03
  6.49050027e-02 -2.09889993e-01  1.25239998e-01  3.52299988e-01
  6.40399992e-01  5.95699996e-02 -8.03020000e-01 -8.16479981e-01
  6.61339998e-01  5.99699989e-02 -6.15210012e-02  8.49219978e-01
 -2.87330002e-02  2.76699990e-01 -1.00680006e+00  7.17580020e-01
 -3.72570008e-01  4.30640012e-01 -4.92439985e-01  3.86830002e-01
 -3.68279994e-01  2.79820003e-02  1.53460002e+00 -6.05329990e-01
 -3.44489992e-01 -1.70690000e-01  2.92879999e-01 -5.35809994e-01
  5.60350001e-01 -6.30129993e-01 -1.23080000e-01  9.36330035e-02
  5.93360007e-01  1.52139997e+00 -9.26290005e-02 -3.14080000e+00
  1.39310002e-01 -5.38200021e-01  1.17359996e+00  6.23179972e-0

In [5]:
len(set(t.word_index.keys() - embeddings_index.keys()))

1698

In [6]:
example_document = """Dear abby,

I'm writing to tell you you suck. Help me out of this mess.

Bye"""

In [7]:
def tokenize(text, tok=None):
    return [
        [
            tok.texts_to_sequences(nltk.word_tokenize(sent))
            if tok else
            nltk.word_tokenize(sent)
            for sent in nltk.sent_tokenize(para)
        ]
        for para in text.splitlines()
        if len(para) > 0
    ]

print(tokenize(example_document))

[[['Dear', 'abby', ',']], [['I', "'m", 'writing', 'to', 'tell', 'you', 'you', 'suck', '.'], ['Help', 'me', 'out', 'of', 'this', 'mess', '.']], [['Bye']]]


In [8]:
print(tokenize(example_document, t))

[[[[310], [1], []]], [[[6], [115], [997], [3], [379], [12], [12], [1], []], [[140], [37], [51], [5], [16], [5216], []]], [[[4542]]]]


In [9]:
train_data['tokenized'] = train_data['text'].map(lambda text: tokenize(text, t))
test_data['tokenized'] = test_data['text'].map(lambda text: tokenize(text, t))

train_data.head()

Unnamed: 0,text,label,tokenized
0,Cheryl:\n\nAre we in a good place to begin pap...,2,"[[[[239], []]], [[[22], [13], [8], [7], [106],..."
1,"Our friend, General Joe Ballard owns The Raven...",2,"[[[[30], [1104], [], [399], [1482], [5415], [2..."
2,Outstanding news! Miki Rakic called about 10 m...,3,"[[[[1727], [485], []], [[7862], [10654], [240]..."
3,Responding to separate emails from Uzra + Jeff...,1,"[[[[2476], [3], [1134], [2020], [28], [7867], ..."
4,Guy from Mexico is in NY and is cooperating. D...,1,"[[[[1106], [28], [783], [11], [8], [1012], [4]..."


In [10]:
MAX_DOC_LENGTH = 0
MAX_PARA_LENGTH = 0
MAX_SENT_LENGTH = 0

for doc in train_data['tokenized'].append(test_data['tokenized']):
    MAX_DOC_LENGTH = max(MAX_DOC_LENGTH, len(doc))
    for para in doc:
        MAX_PARA_LENGTH = max(MAX_PARA_LENGTH, len(para))
        for sent in para:
            MAX_SENT_LENGTH = max(MAX_SENT_LENGTH, len(sent))
            
MAX_DOC_LENGTH, MAX_PARA_LENGTH, MAX_SENT_LENGTH

(12, 32, 255)

In [11]:
def pad_to_dense(M, sent_len=MAX_SENT_LENGTH, para_len=MAX_PARA_LENGTH, doc_len=MAX_DOC_LENGTH):
    Z = np.zeros((len(M), doc_len, para_len, sent_len))
    for docidx, doc in enumerate(M):
        for paraidx, para in enumerate(doc):
            for sentidx, sent in enumerate(para):
                sentnp = np.hstack(np.array(sent))
                Z[docidx, paraidx, sentidx, :len(sentnp)] += sentnp
    return Z

print(train_data['tokenized'][0])
pad_to_dense(train_data['tokenized'][:1])

[[[[239], []]], [[[22], [13], [8], [7], [106], [225], [3], [606], [3809], [3], [484], [4721], [1653], [57], [118], [14], [10647], [8], [523], []]], [[[27], [43], [], [6], [26], [1054], [13], [318], [67], [3], [248], [14], [523], [1654], [4], [6], [19], [56], [2], [4722], [220], [3], [305], [2], [3809], []], [[69], [19], [17], [1943], [3], [53], [10], [1723], [161], [], [64], [19], [3222], [67], [3], [650], [1317], [10], [2797], [], [4723], [], [10648], [], [298], [], [4], [16], [1723], [121], [667], [19], [2624], [67], [138], [2], [176], [5], [2], [321], [14], [544], [459], [], [3223], []], [[16], [242], [26], [179], [37], [233], [161], [41], [43], [5], [4224], [131], [7], [4225], [1597], [10], [4724], [14], [537], [916], []]], [[[6], [66], [25], [1797], [14], [7860], [51], [3], [4724], [], [2], [2018], [13], [56], [2], [60], [46], [11], [3], [17], [211], [], [4], [13], [74], [25], [56], [2625], [], [4725], [51], [49], [131], [13], [2019], [], [25], [7], [10649], [10650], [1103], [], [

array([[[[239.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         ...,
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.]],

        [[ 22.,  13.,   8., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         ...,
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.]],

        [[ 27.,  43.,   6., ...,   0.,   0.,   0.],
         [ 69.,  19.,  17., ...,   0.,   0.,   0.],
         [ 16., 242.,  26., ...,   0.,   0.,   0.],
         ...,
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.]],

        ...,

  

In [12]:
def dense_mask(M, sent_len=MAX_SENT_LENGTH, para_len=MAX_PARA_LENGTH, doc_len=MAX_DOC_LENGTH):
    Z = np.zeros((len(M), doc_len, para_len, sent_len), dtype=bool)
    for docidx, doc in enumerate(M):
        for paraidx, para in enumerate(doc):
            for sentidx, sent in enumerate(para):
                for tokenidx, token in enumerate(sent):
                    Z[docidx, paraidx, sentidx, tokenidx] = True
    return Z

dense_mask(train_data['tokenized'][:1])

array([[[[ True,  True, False, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         ...,
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False]],

        [[ True,  True,  True, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         ...,
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False],
         [False, False, False, ..., False, False, False]],

        [[ True,  True,  True, ..., False, False, False],
         [ True,  True,  True, ..., False, False, False],
         [ True,  True,  True, ..., False, False, False],
         ...,
         [False, False, False, ..., False, False, False],
         [False, False, Fa

In [13]:
train_tensor = pad_to_dense(train_data['tokenized'])
test_tensor = pad_to_dense(test_data['tokenized'])

train_tensor.shape, test_tensor.shape

((2000, 12, 32, 255), (400, 12, 32, 255))

In [14]:
def categorical_labels(labels):
    eye = [
        [1.0, 0.0, 0.0],
        [0.0, 1.0, 0.0],
        [0.0, 0.0, 1.0]
    ]
    
    result = []
    for item in labels:
        result.append(eye[int(item) - 1])
        
    return np.array(result)

train_labels = categorical_labels(pd.to_numeric(train_data['label']))
test_labels = categorical_labels(pd.to_numeric(test_data['label']))

train_labels.shape, test_labels.shape

((2000, 3), (400, 3))

In [15]:
train_sent_mask = dense_mask(train_data['tokenized'])
test_sent_mask = dense_mask(test_data['tokenized'])

train_para_mask = np.apply_along_axis(any, 3, train_sent_mask)
test_para_mask = np.apply_along_axis(any, 3, test_sent_mask)

train_doc_mask = np.apply_along_axis(any, 2, train_para_mask)
test_doc_mask = np.apply_along_axis(any, 2, test_para_mask)

(train_sent_mask.shape, train_para_mask.shape, train_doc_mask.shape), (test_sent_mask.shape, test_para_mask.shape, test_doc_mask.shape)

(((2000, 12, 32, 255), (2000, 12, 32), (2000, 12)),
 ((400, 12, 32, 255), (400, 12, 32), (400, 12)))

In [16]:
BATCH_SIZE = 2

train_dataset = tf.data.Dataset.from_tensor_slices(
    ((train_tensor, train_sent_mask, train_para_mask, train_doc_mask), train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices(
    ((test_tensor, test_sent_mask, test_para_mask, test_doc_mask), test_labels))

train_dataset = train_dataset.shuffle(1000).batch(BATCH_SIZE)
test_dataset = test_dataset.shuffle(1000).batch(BATCH_SIZE)

(a, b, c, d), e = next(iter(train_dataset))
a.shape, b.shape, c.shape, d.shape, e.shape

(TensorShape([2, 12, 32, 255]),
 TensorShape([2, 12, 32, 255]),
 TensorShape([2, 12, 32]),
 TensorShape([2, 12]),
 TensorShape([2, 3]))

In [17]:
embedding = tf.keras.layers.Embedding(
    vocab_size,
    EMBEDDING_DIM,
    weights=[embedding_matrix],
    trainable=False)

embedded_example = embedding(train_tensor[:2])

print(embedded_example.shape)
embedded_example[0][0][0][0]

(2, 12, 32, 255, 100)


<tf.Tensor: id=73, shape=(100,), dtype=float32, numpy=
array([ 0.12616  , -0.2641   ,  0.071929 , -0.96047  , -0.086358 ,
       -0.032276 ,  0.33611  ,  0.55126  , -0.19631  , -0.66443  ,
       -0.16571  , -0.40401  , -0.13921  ,  0.44772  , -0.039273 ,
       -0.63014  , -0.2928   , -0.024373 , -0.81073  ,  0.70829  ,
       -0.47717  ,  0.39068  ,  0.44993  ,  0.25711  ,  0.68318  ,
        0.14003  , -0.013181 , -1.212    , -0.14414  ,  0.21759  ,
        0.30636  ,  0.7272   ,  0.82667  , -0.20531  , -0.68931  ,
       -0.047831 ,  0.3048   ,  0.20761  ,  0.33063  ,  0.33195  ,
       -0.23914  ,  0.046714 , -0.46688  ,  0.46208  ,  0.29071  ,
        0.60412  , -0.75673  , -0.34308  , -0.32161  , -0.17654  ,
        0.66982  ,  0.014476 , -0.12332  , -0.29709  ,  0.26196  ,
       -0.49916  , -0.65069  ,  0.3813   , -0.76894  , -0.2284   ,
       -0.25254  , -0.27246  ,  0.38411  ,  0.52052  ,  0.05651  ,
       -0.49209  ,  1.0191   ,  0.20061  , -0.33445  , -0.0094115,
       

In [18]:
# onions

class BahdanauAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super(BahdanauAttentionLayer, self).__init__(**kwargs)
        self.W = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, values):
        # (batch_size, max_length, 1)
        scores = self.V(tf.nn.tanh(self.W(values)))
        
        # (batch_size, max_length, 1) normalized lulz
        attention_weights = tf.nn.softmax(scores, axis=1)
        
        context_vector = tf.reduce_sum(attention_weights * values, axis=1)
        
        return context_vector, attention_weights


class AttentiveSequenceEncoder(tf.keras.layers.Layer):
    def __init__(self, lstm_units, attention_units, **kwargs):
        super().__init__(**kwargs)
        self.lstm = Bidirectional(LSTM(lstm_units, recurrent_dropout=0.0001, return_sequences=True))
        self.concat = Concatenate()
        self.attention = BahdanauAttentionLayer(attention_units)
        
    def call(self, inputs, mask):
        encoded = self.lstm(inputs, mask=mask)
        output, attention_weights = self.attention(encoded)
        
        return output, attention_weights

In [19]:
EPOCHS = 10

In [20]:
class AttentiveDocModel(tf.keras.Model):
    def __init__(self, lstm_units, hidden_units, attention_units, dropout, batch_size=BATCH_SIZE):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(
            vocab_size,
            EMBEDDING_DIM,
            weights=[embedding_matrix],
            trainable=False,
            input_shape=(MAX_DOC_LENGTH, MAX_PARA_LENGTH, MAX_SENT_LENGTH))
        
        self.sent_encoder = AttentiveSequenceEncoder(lstm_units, attention_units)
        self.para_encoder = AttentiveSequenceEncoder(lstm_units, attention_units)
        self.doc_encoder = AttentiveSequenceEncoder(lstm_units, attention_units)
        
        self.hidden = tf.keras.layers.Dense(hidden_units, activation='tanh')
        self.dropout = tf.keras.layers.Dropout(dropout)
        self.classifier = tf.keras.layers.Dense(3, activation='sigmoid')
        
        self.dropout.build((BATCH_SIZE, hidden_units))
    
    def call(self, inputs, training=False):
        (inputs, sent_mask, para_mask, doc_mask) = inputs
        
        embedded = self.embedding(inputs)
        embedded = array_ops.reshape(
            embedded, (BATCH_SIZE * MAX_DOC_LENGTH * MAX_PARA_LENGTH, MAX_SENT_LENGTH, -1))
        sent_mask = array_ops.reshape(
            sent_mask, (BATCH_SIZE * MAX_DOC_LENGTH * MAX_PARA_LENGTH, MAX_SENT_LENGTH))

        sent_embedded, sent_weights = self.sent_encoder(embedded, mask=sent_mask)
        sent_embedded = array_ops.reshape(
            sent_embedded, (BATCH_SIZE * MAX_DOC_LENGTH, MAX_PARA_LENGTH, -1))
        para_mask = array_ops.reshape(
            para_mask, (BATCH_SIZE * MAX_DOC_LENGTH, MAX_PARA_LENGTH))
        
        para_embedded, para_weights = self.para_encoder(sent_embedded, mask=para_mask)
        para_embedded = array_ops.reshape(
            para_embedded, (BATCH_SIZE, MAX_DOC_LENGTH, -1))
        
        x, doc_weights = self.doc_encoder(para_embedded, mask=doc_mask)
        x = self.hidden(x)
        x = self.dropout(x)
        
        if not training:
            self.sent_weights = sent_weights
            self.para_weights = para_weights
            self.doc_weights = doc_weights
        
        return self.classifier(x)

In [31]:
best_attentive_doc_model = None
best_units = None
best_val_score = 0.0

for attention_units in (100, 200, 300, 400):
    attentive_doc_model = AttentiveDocModel(150, 300, attention_units, 0.5)
    attentive_doc_model.compile(optimizer='adam', metrics=['accuracy'https://manjaro.org/, 'categorical_accuracy'],
                                loss=tf.keras.losses.CategoricalCrossentropy())
    attentive_doc_model(next(iter(train_dataset))[0])
    attentive_doc_model_hist = attentive_doc_model.fit(
        train_dataset,
        validation_data=test_dataset,
        epochs=5)
    
    with open(f'attentive_doc_model_history_{attention_units}.json', 'w') as f:
        json.dump(pythonize(attentive_doc_model_hist.history), f)
        
    val_accuracy = max(attentive_doc_model_hist.history['val_accuracy'])
    if val_accuracy > best_val_score:
        best_attentive_doc_model = attentive_doc_model
        best_units = attention_units
        best_val_score = val_accuracy

best_attentive_doc_model.summary()
best_units, best_val_score



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor

(200, 0.565)

In [21]:
attentive_doc_model = AttentiveDocModel(150, 300, 200, 0.5)
attentive_doc_model.compile(optimizer='adam', metrics=['accuracy', 'categorical_accuracy'],
                            loss=tf.keras.losses.CategoricalCrossentropy())
attentive_doc_model(next(iter(train_dataset))[0])
attentive_doc_model_hist = attentive_doc_model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=EPOCHS)
    
with open(f'best_att_doc_model_history.json', 'w') as f:
    json.dump(pythonize(attentive_doc_model_hist.history), f)

attentive_doc_model.save_weights('./best_att_doc_model/ckpt')



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
class SmallAttentiveDocModel(tf.keras.Model):
    def __init__(self, lstm_units, hidden_units, attention_units, dropout, batch_size=BATCH_SIZE):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(
            vocab_size,
            EMBEDDING_DIM,
            weights=[embedding_matrix],
            trainable=False,
            input_shape=(MAX_DOC_LENGTH, MAX_PARA_LENGTH, MAX_SENT_LENGTH))
        
        self.sent_encoder = AttentiveSequenceEncoder(lstm_units, attention_units)
        self.doc_encoder = AttentiveSequenceEncoder(lstm_units, attention_units)
        
        self.hidden = tf.keras.layers.Dense(hidden_units, activation='tanh')
        self.dropout = tf.keras.layers.Dropout(dropout)
        self.classifier = tf.keras.layers.Dense(3, activation='sigmoid')
        
        self.dropout.build((BATCH_SIZE, hidden_units))
    
    def call(self, inputs, training=False):
        (inputs, sent_mask, para_mask, doc_mask) = inputs
        
        embedded = self.embedding(inputs)
        embedded = array_ops.reshape(
            embedded, (BATCH_SIZE * MAX_DOC_LENGTH * MAX_PARA_LENGTH, MAX_SENT_LENGTH, -1))
        sent_mask = array_ops.reshape(
            sent_mask, (BATCH_SIZE * MAX_DOC_LENGTH * MAX_PARA_LENGTH, MAX_SENT_LENGTH))

        sent_embedded, sent_weights = self.sent_encoder(embedded, mask=sent_mask)
        sent_embedded = array_ops.reshape(
            sent_embedded, (BATCH_SIZE, MAX_DOC_LENGTH * MAX_PARA_LENGTH, -1))
        doc_mask = array_ops.reshape(
            para_mask, (BATCH_SIZE, MAX_DOC_LENGTH * MAX_PARA_LENGTH))
        
        x, doc_weights = self.doc_encoder(sent_embedded, mask=doc_mask)
        x = self.hidden(x)
        x = self.dropout(x)
        
        if not training:
            self.sent_weights = sent_weights
            self.doc_weights = doc_weights
        
        return self.classifier(x)

In [22]:
best_small_att_doc_model = None
best_small_units = None
best_small_val_score = 0.0

for attention_units in (100, 200, 300, 400):
    small_att_doc_model = SmallAttentiveDocModel(150, 300, attention_units, 0.5)
    small_att_doc_model.compile(optimizer='adam', metrics=['accuracy'],
                                loss=tf.keras.losses.CategoricalCrossentropy())
    small_att_doc_model(next(iter(train_dataset))[0])
    small_att_doc_model_hist = small_att_doc_model.fit(
        train_dataset,
        validation_data=test_dataset,
        epochs=5)
    
    with open(f'small_att_doc_model_history_{attention_units}.json', 'w') as f:
        json.dump(pythonize(small_att_doc_model_hist.history), f)
        
    val_accuracy = max(small_att_doc_model_hist.history['val_accuracy'])
    if val_accuracy > best_small_val_score:
        best_small_att_doc_model = small_att_doc_model
        best_small_units = attention_units
        best_small_val_score = val_accuracy

best_small_att_doc_model.summary()
best_small_units, best_small_val_score



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor

(100, 0.515)

In [23]:
small_att_doc_model = SmallAttentiveDocModel(150, 300, 100, 0.5)
small_att_doc_model.compile(optimizer='adam', metrics=['accuracy'],
                            loss=tf.keras.losses.CategoricalCrossentropy())
small_att_doc_model(next(iter(train_dataset))[0])
small_att_doc_model_hist = small_att_doc_model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=EPOCHS)
    
with open(f'best_small_att_doc_model_history.json', 'w') as f:
    json.dump(pythonize(small_att_doc_model_hist.history), f)

small_att_doc_model.save_weights('./small_att_doc_model/ckpt')



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
