## Autoencoders (AE)

Neural networks designed for unsupervised learning tasks. They aim to learn a compressed, encoded representation of data, typically for dimensionality reduction or feature extraction. Autoencoders find widespread applications in data denoising, generation, and anomaly detection; they can also be applied to NLP tasks.

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, RepeatVector, Dense

# Define a small dataset
sentences = [
    "The cat sat on the mat",
    "Dogs are amazing pets",
    "The sun is bright today",
    "I love reading books",
    "Data science is fascinating"
]

# Preprocess the data
def preprocess_sentences(sentences):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    sequences = tokenizer.texts_to_sequences(sentences)
    max_sequence_length = max(len(s) for s in sequences)
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')
    return padded_sequences, tokenizer, max_sequence_length

padded_sequences, tokenizer, max_sequence_length = preprocess_sentences(sentences)
vocab_size = len(tokenizer.word_index) + 1  # Adding 1 because of reserved 0 index

# Build the autoencoder model
embedding_dim = 8

input_seq = Input(shape=(max_sequence_length,))
encoded = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length)(input_seq)
encoded = LSTM(16)(encoded)
decoded = RepeatVector(max_sequence_length)(encoded)
decoded = LSTM(embedding_dim, return_sequences=True)(decoded)
decoded = Dense(vocab_size, activation='softmax')(decoded)

autoencoder = Model(input_seq, decoded)
autoencoder.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
autoencoder.summary()

# Prepare the data for training
target_data = np.expand_dims(padded_sequences, -1)

# Train the autoencoder
autoencoder.fit(padded_sequences, target_data, epochs=100, batch_size=32)

# Function to encode and decode sentences
def encode_decode_sentence(sentence, tokenizer, autoencoder, max_sequence_length):
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    prediction = autoencoder.predict(padded_sequence)
    decoded_sentence = ' '.join(tokenizer.index_word.get(np.argmax(word), '') for word in prediction[0])
    return decoded_sentence.strip()

# Test the autoencoder with a new sentence
test_sentence = "The sun is bright"
decoded_sentence = encode_decode_sentence(test_sentence, tokenizer, autoencoder, max_sequence_length)

print(f"Original sentence: {test_sentence}")
print(f"Decoded sentence: {decoded_sentence}")



Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 3.0452
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 3.0438
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 3.0425
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 3.0411
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 3.0398
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 3.0384
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 3.0370
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 3.0356
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 3.0341
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 3.0326
Epoch 11/10

## Neural Turing Machines (NTMs)

Classe de modelos de redes neurais que ampliam as redes neurais tradicionais com capacidades de memória, permitindo-lhes resolver tarefas complexas que requerem manipulação e armazenamento de dados ao longo do tempo. A arquitetura NTM combina um controlador de rede neural com um banco de memória externo, muito parecido com o modo como uma máquina de Turing convencional opera com um cabeçote de leitura/gravação em uma fita. O controlador interage com a memória por meio de operações de leitura e gravação, que são diferenciáveis, permitindo que todo o sistema seja treinado ponta a ponta com gradiente descendente.

A implementação de um NTM em Python requer a construção do controlador e dos componentes de memória, juntamente com os mecanismos para leitura e gravação na memória. Aqui está um exemplo conceitual simplificado usando o TensorFlow:

In [2]:
# Example is conceptual and focuses on the architecture components
import tensorflow as tf

class NTMCell(tf.keras.layers.Layer):
    def __init__(self, controller_units, memory_size, memory_vector_dim):
        super(NTMCell, self).__init__()
        self.controller = tf.keras.layers.LSTMCell(controller_units)
        self.memory_size = memory_size
        self.memory_vector_dim = memory_vector_dim
        self.read_head = ReadHead(memory_size, memory_vector_dim)
        self.write_head = WriteHead(memory_size, memory_vector_dim)

    def call(self, x, states):
        # Controller operations
        controller_output, controller_state = self.controller(x, states['controller_state'])
        # Memory operations
        read_vector = self.read_head(controller_output, states['memory'])
        write_vector = self.write_head(controller_output, states['memory'])
        # Update memory
        new_memory = self.update_memory(states['memory'], read_vector, write_vector)
        return controller_output, new_memory, controller_state

# NTMCell would be used as part of a larger model, integrated with input and output processing

# The ReadHead and WriteHead classes, along with the memory update mechanisms, are not detailed here
# but involve implementing the addressing and manipulation logic of the NTM's memory matrix.

## Neural Topic Modeling

Integra redes neurais à modelagem tradicional de tópicos, aprimorando a capacidade de descobrir tópicos abstratos de uma coleção de documentos. Esta abordagem aproveita a aprendizagem representacional e as capacidades de generalização das redes neurais, oferecendo melhorias em relação aos métodos clássicos como a alocação latente de Dirichlet em termos de flexibilidade e desempenho.

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset
max_words = 10000  # Vocabulary size
max_len = 500  # Maximum length of a document
(x_train, _), (x_test, _) = reuters.load_data(num_words=max_words, maxlen=max_len)
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Convert to one-hot encoding
def one_hot_sequences(sequences, dimension=max_words):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

x_train = one_hot_sequences(x_train)
x_test = one_hot_sequences(x_test)

# Model parameters
embedding_dim = 64  # Size of the document embedding
topic_dim = 10  # Number of topics to learn

# Neural topic model
input_doc = layers.Input(shape=(max_words,))
encoded = layers.Dense(embedding_dim, activation='relu')(input_doc)
topic_distribution = layers.Dense(topic_dim, activation='softmax', name='topic_distribution')(encoded)
decoded = layers.Dense(max_words, activation='sigmoid')(topic_distribution)

model = Model(input_doc, decoded)
model.compile(optimizer='adam', loss='categorical_crossentropy')

# Train the model
model.fit(x_train, x_train, epochs=10, batch_size=64, validation_data=(x_test, x_test))

# Extract topic distribution for new documents
topic_model = Model(input_doc, topic_distribution)

# Example: Get topic distribution for the first document in the test set
topic_distribution_output = topic_model.predict(x_test[:1])
print("Topic distribution for the first document:", topic_distribution_output)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - loss: 679.3219 - val_loss: 677.7847
Epoch 2/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 663.3959 - val_loss: 668.7076
Epoch 3/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 660.3478 - val_loss: 660.0817
Epoch 4/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 655.2888 - val_loss: 651.8496
Epoch 5/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 638.0151 - val_loss: 643.9755
Epoch 6/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 638.8223 - val_loss: 636.4519
Epoch 7/10
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s