In [None]:
import tensorflow as tf
import numpy as np

sentences = ["i like dog", "i love coffee", "i hate milk"]

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict)  # number of Vocabulary

# NNLM Parameters
n_step = 2  # number of steps ['i like', 'i love', 'i hate']
n_hidden = 2  # number of hidden units

def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(np.eye(n_class)[input])
        target_batch.append(np.eye(n_class)[target])

    return input_batch, target_batch

# Custom Model
class NNLM(tf.keras.Model):
    def __init__(self):
        super(NNLM, self).__init__()
        self.H = tf.Variable(tf.random.normal([n_step * n_class, n_hidden]))
        self.d = tf.Variable(tf.random.normal([n_hidden]))
        self.U = tf.Variable(tf.random.normal([n_hidden, n_class]))
        self.b = tf.Variable(tf.random.normal([n_class]))

    def call(self, inputs):
        input = tf.reshape(inputs, shape=[-1, n_step * n_class])
        tanh = tf.nn.tanh(tf.matmul(input, self.H) + self.d)
        logits = tf.matmul(tanh, self.U) + self.b
        return logits

model = NNLM()

# Loss function
def loss_fn(model, inputs, targets):
    logits = model(inputs)
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))

# Optimizer
optimizer = tf.optimizers.Adam(0.001)

# Training
input_batch, target_batch = make_batch(sentences)

for epoch in range(5000):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, input_batch, target_batch)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

# Predict
logits = model(input_batch)
predict = tf.argmax(logits, 1).numpy()

# Test
input = [sen.split()[:2] for sen in sentences]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict])


Epoch: 1000 cost = 0.477011
Epoch: 2000 cost = 0.095235
Epoch: 3000 cost = 0.038383
Epoch: 4000 cost = 0.019316
Epoch: 5000 cost = 0.010693
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']


In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Text-CNN Parameters
embedding_dim = 50
max_sequence_length = 10  # Maximum sequence length (increased from 5 to 10)
num_classes = 2
filter_sizes = [2, 3, 2]  # Adjusted filter sizes
num_filters = 128
dropout_rate = 0.7
batch_size = 64
epochs = 10

# Sample sentences and labels
sentences = ["i love","i loves","i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful", "he is wrost" , "this is bad"]
labels = [1,1,1, 1, 1, 0, 0, 0,0,0]  # 1 is good, 0 is not good.

# Tokenize and pad the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert labels to one-hot encoding
labels = tf.keras.utils.to_categorical(labels, num_classes=num_classes)

# Define the Text CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1,
                              output_dim=embedding_dim,
                              input_length=max_sequence_length),
    *[
        tf.keras.layers.Conv1D(filters=num_filters, kernel_size=filter_size, activation='relu')
        for filter_size in filter_sizes
    ],
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(sequences, labels, batch_size=batch_size, epochs=epochs)

# Test the model with new text
test_text = ["i love coding", "he loves me", "he is wrost","she likes baseball","this is a bad movie"]
test_sequences = tokenizer.texts_to_sequences(test_text)
test_sequences = pad_sequences(test_sequences, maxlen=max_sequence_length)

predictions = model.predict(test_sequences)
predicted_labels = np.argmax(predictions, axis=1)

for i, text in enumerate(test_text):
    if predicted_labels[i] == 1:
        print(f"'{text}' is good.")
    else:
        print(f"'{text}' is not good.")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
'i love coding' is good.
'he loves me' is good.
'he is wrost' is not good.
'she likes baseball' is good.
'this is a bad movie' is not good.


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load a smaller subset of the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=1000)  # Use only the top 1000 most frequent words

# Preprocess the data
max_sequence_length = 200
train_data = pad_sequences(train_data, maxlen=max_sequence_length)
test_data = pad_sequences(test_data, maxlen=max_sequence_length)

# Build a log-linear text classification model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=1000, output_dim=16, input_length=max_sequence_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_data, train_labels,
                    epochs=10,
                    batch_size=512,
                    validation_split=0.2)

# Evaluate the model on the test data
results = model.evaluate(test_data, test_labels)
print("Test loss:", results[0])
print("Test accuracy:", results[1])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.3360079228878021
Test accuracy: 0.8542400002479553
