In [None]:
import tensorflow as tf
shakspeare_url = 'https://homl.info/shakespeare'
filepath = tf.keras.utils.get_file('shakespeare.txt',shakspeare_url)
with open(filepath) as f:
  shakepeare_text = f.read()


Downloading data from https://homl.info/shakespeare
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print(shakepeare_text[:80])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.


In [None]:
text_vec_layer = tf.keras.layers.TextVectorization(split = 'character',
                                                   standardize = 'lower')
text_vec_layer.adapt([shakepeare_text])
encoded = text_vec_layer([shakepeare_text])[0]
encoded[:10]

<tf.Tensor: shape=(10,), dtype=int64, numpy=array([21,  7, 10,  9,  4,  2, 20,  7,  4,  7])>

In [None]:
shakepeare_text[:10]

'First Citi'

In [None]:
text_vec_layer.get_vocabulary()

['',
 '[UNK]',
 ' ',
 'e',
 't',
 'o',
 'a',
 'i',
 'h',
 's',
 'r',
 'n',
 '\n',
 'l',
 'd',
 'u',
 'm',
 'y',
 'w',
 ',',
 'c',
 'f',
 'g',
 'b',
 'p',
 ':',
 'k',
 'v',
 '.',
 "'",
 ';',
 '?',
 '!',
 '-',
 'j',
 'q',
 'x',
 'z',
 '3',
 '&',
 '$']

In [None]:
encoded -=2 # ilk boslugu ve unknownu silmek ucun
n_tokens = text_vec_layer.vocabulary_size() - 2
dataset_size = len(encoded)

In [None]:
text_vec_layer.vocabulary_size()

41

In [None]:
dataset_size

1115394

In [None]:
def to_dataset(sequence,length,shuffle = False, seed = None, batch_size = 32):
  ds = tf.data.Dataset.from_tensor_slices(sequence)
  ds = ds.window(length+1, shift = 1, drop_remainder = True)
  ds = ds.flat_map(lambda window_ds:window_ds.batch(length + 1))
  if shuffle:
    ds = ds.shuffle(100_000,seed = seed)
  ds= ds.batch(batch_size)
  return ds.map(lambda window:(window[:,:-1],window[:,1:])).prefetch(1) # cpu da hazirlayir gpu istifade edir

In [None]:
list(to_dataset(text_vec_layer(["To be"])[0],length = 4))

[(<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 4,  5,  2, 23]])>,
  <tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[ 5,  2, 23,  3]])>)]

In [None]:
length = 100
tf.random.set_seed(42)
train_set =to_dataset(encoded[:1_000_000],length = length,shuffle = True,seed = 42)
valid_set =to_dataset(encoded[1_000_000:1_060_000],length = length)
test_set =to_dataset(encoded[1_060_000:],length = length)

# Building and Training The Char- RNN Model


In [None]:
tf.random.set_seed(42)
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim = n_tokens, output_dim= 16),
    tf.keras.layers.GRU(128,return_sequences=True),
    tf.keras.layers.Dense(n_tokens,activation = 'softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy',optimizer = 'nadam',
              metrics= ['accuracy'])
model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    'my_shakespeare_model.keras',monitor = 'val_accuracy',save_best_only =True)
history = model.fit(train_set,validation_data = valid_set,epochs = 2,
                    callbacks = [model_ckpt])


Epoch 1/2
   7259/Unknown [1m116s[0m 13ms/step - accuracy: 0.4762 - loss: 1.7762

KeyboardInterrupt: 

In [None]:
shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X-2),
    model
])

In [None]:
y_proba = shakespeare_model.predict(tf.constant(['To be or not to b']))[0,-1]
y_pred = tf.argmax(y_proba)



In [None]:
log_probs = tf.math.log([[0.5,0.4,0.1]])
tf.random.set_seed(42)
tf.random.categorical(log_probs,num_samples=8)

In [None]:
def next_char(text,temperature = 1):

  y_proba = shakespeare_model.predict([text])[0,-1,:]
  rescaled_logits = tf.math.log(y_proba)
  char_id = tf.random.categorical(rescaled_logits[None,:],num_samples = 1)[0,0]
  return text_vec_layer.get_vocabulary()[char_id + 2]


In [None]:
tf.random.set_seed(42)

In [None]:
# print(extend_text(["To be or not t"],temperature = 1))

# Sentiment Analysis

In [None]:
import tensorflow_datasets as tfds

raw_train_set,raw_valid_set,raw_test_set = tfds.load(
    name = 'imdb_reviews',
    split = ['train[:90%]','train[90%:]','test'],
    as_supervised=True
)
tf.random.set_seed(42)
train_set = raw_train_set.shuffle(5000,seed =42).batch(32).prefetch(1)
valid_set = raw_valid_set.batch(32).prefetch(1)
test_set = raw_test_set.batch(32).prefetch(1)

In [None]:
for review,label in raw_train_set.take(4):
  print(review.numpy().decode('utf-8')[:200],"...")
  print('Label',label.numpy())

In [None]:
raw_train_set.take(4)

In [None]:
vocab_size = 1000
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens = vocab_size)
text_vec_layer.adapt(train_set.map(lambda reviews, labels: reviews))

In [None]:
embed_size = 128
tf.random.set_seed(42)
model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size,embed_size),
    tf.keras.layers.GRU(128),
    tf.keras.layers.Dense(1, activation = 'sigmoid')

])

model.compile(loss = 'binary_crossentropy',
              optimizer = 'nadam',
              metrics = ['accuracy'])
history = model.fit(train_set, validation_data = valid_set,epochs = 2)

In [None]:
#Masking

In [None]:
embed_size = 128
tf.random.set_seed(42)
model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size,embed_size,mask_zero = True),
    tf.keras.layers.GRU(128),
    tf.keras.layers.Dense(1, activation = 'sigmoid')

])

model.compile(loss = 'binary_crossentropy',
              optimizer = 'nadam',
              metrics = ['accuracy'])
history = model.fit(train_set, validation_data = valid_set,epochs = 5)

In [None]:
tf.random.set_seed(42)
input = tf.keras.layers.Input(shape = [],dtype = tf.string)
token_ids = text_vec_layer(input)

class MyLayer(tf.keras.Layer):
  def call(self,token_ids):
    return tf.math.not_equal(token_ids,0)
mask = MyLayer()(token_ids)
Z = tf.keras.layers.Embedding(vocab_size, embed_size)(token_ids)
Z = tf.keras.layers.GRU(128,dropout = 0.2)(Z, mask = mask)
outputs = tf.keras.layers.Dense(1,activation = 'sigmoid')(Z)
maodel = tf.keras.Model(inputs = [input],outputs = [outputs])



In [None]:
model.compile(loss = 'binary_crossentropy',
              optimizer = 'nadam',
              metrics = ['accuracy'])
history = model.fit(train_set, validation_data = valid_set,epochs = 5)

# Ragged tensor

In [None]:
text_vec_layer_ragged = tf.keras.layers.TextVectorization(
    max_tokens = vocab_size,ragged = True)
text_vec_layer_ragged.adapt(train_set.map(lambda reviews,labels:reviews))
text_vec_layer_ragged(['Hi gghg fpff','thissss Dic'])

In [None]:
text_vec_layer(['hello','thissss Dic'])

In [None]:
import os
import tensorflow_hub as hub
import tensorflow as tf
os.environ['TFHUB_CACHE_DIR'] = 'my_tfhub_cache'
tf.random.set_seed(42)
# model = tf.keras.Sequential([
#     hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
#                    trainable=True,dtype = tf.string,input_shape = []),
#     tf.keras.layers.Dense(64,activation = 'relu'),
#     tf.keras.layers.Dense(1,activation = 'sigmoid')
# ])


# model.compile(loss = 'binary_crossentropy',
#               optimizer = 'nadam',
#               metrics = ['accuracy'])
# history = model.fit(train_set, validation_data = valid_set,epochs = 10)

In [None]:
from pathlib import Path

In [None]:
url = "https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip"
path = tf.keras.utils.get_file("spa-eng.zip", origin = url, cache_dir = "datasets",
                               extract = True)
text = (Path(path).with_name("spa-eng") / "spa.txt").read_text()



Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip
[1m2638744/2638744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
text[-100:]

'ica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado.\n'

In [None]:
import numpy as np

text = text.replace("¡", "").replace("¿", "")
pairs  = [line.split("\t") for line in text.splitlines()]
np.random.seed(42)
np.random.shuffle(pairs)
sentences_en, sentences_es = zip(*pairs)

In [None]:
for i in range(3):
    print(sentences_en[i], "=>", sentences_es[i])

How boring! => Qué aburrimiento!
I love sports. => Adoro el deporte.
Would you like to swap jobs? => Te gustaría que intercambiemos los trabajos?


In [None]:
vocab_size = 1000
max_length = 50
text_vec_layer_en = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length=max_length)
text_vec_layer_es = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length = max_length)
text_vec_layer_en.adapt(sentences_en)
text_vec_layer_es.adapt([f"startofseq {s} endofseq" for s in sentences_es])


In [None]:
text_vec_layer_en.get_vocabulary()[:10]

['', '[UNK]', 'the', 'i', 'to', 'you', 'tom', 'a', 'is', 'he']

In [None]:
text_vec_layer_es.get_vocabulary()[:100]

['',
 '[UNK]',
 'startofseq',
 'endofseq',
 'de',
 'que',
 'a',
 'no',
 'tom',
 'la',
 'el',
 'en',
 'es',
 'un',
 'me',
 'se',
 'por',
 'lo',
 'una',
 'su',
 'Él',
 'los',
 'está',
 'mi',
 'con',
 'qué',
 'le',
 'ella',
 'te',
 'para',
 'mary',
 'y',
 'las',
 'más',
 'al',
 'yo',
 'tu',
 'estoy',
 'muy',
 'eso',
 'tiene',
 'este',
 'esta',
 'del',
 'él',
 'estaba',
 'quiero',
 'tengo',
 'fue',
 'si',
 'aquí',
 'casa',
 'hacer',
 'como',
 'puedo',
 'todo',
 'algo',
 'esto',
 'hay',
 'tiempo',
 'ha',
 'gusta',
 'son',
 'tan',
 'todos',
 'favor',
 'ir',
 'nada',
 'era',
 'cuando',
 'vez',
 'puede',
 'he',
 'bien',
 'mucho',
 'ellos',
 'nos',
 'sé',
 'ser',
 'solo',
 'nunca',
 'mañana',
 'dos',
 'sus',
 'trabajo',
 'ya',
 'estás',
 'creo',
 'dónde',
 'ahora',
 'cómo',
 'tienes',
 'dijo',
 'dinero',
 'quién',
 'pero',
 'tomás',
 'hablar',
 'están',
 'soy']

In [None]:
X_train = tf.constant(sentences_en[:100_000])
X_valid = tf.constant(sentences_en[100_000:])
X_train_dec = tf.constant([f"startofseq {s}" for s in sentences_es[:100_000]])
X_valid_dec = tf.constant([f"startofseq {s}" for s in sentences_es[100_000:]])
Y_train = text_vec_layer_es([f"{s}endofseq"  for s in sentences_es[:100_000]])
Y_valid =  text_vec_layer_es([f"{s}endofseq" for s in sentences_es[100_000:]])

In [None]:
tf.random.set_seed(42)
encoder_inputs = tf.keras.layers.Input(shape = [], dtype = tf.string)
decoder_inputs = tf.keras.layers.Input(shape = [], dtype = tf.string)

In [None]:
embed_size = 128
encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)
encoder_embedding_layer = tf.keras.layers.Embedding(vocab_size, embed_size,
                                                    mask_zero = True)
decoder_embedding_layer = tf.keras.layers.Embedding(vocab_size, embed_size,
                                                    mask_zero = True)
encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

In [None]:
encoder = tf.keras.layers.LSTM(512, return_state = True)
encoder_outputs, *encoder_state = encoder(encoder_embeddings)


In [None]:
decoder = tf.keras.layers.LSTM(512, return_sequences = True)
decoder_outputs = decoder(decoder_embeddings, initial_state = encoder_state)

In [None]:
output_layer = tf.keras.layers.Dense(vocab_size, activation = "softmax")
Y_proba = output_layer(decoder_outputs)

In [None]:
X_train.shape, X_train_dec.shape

(TensorShape([100000]), TensorShape([100000]))

In [None]:
model = tf.keras.Model(inputs = [encoder_inputs, decoder_inputs],
                       outputs = [Y_proba])
model.compile(loss = "sparse_categorical_crossentropy" ,optimizer = "nadam",
              metrics = ["accuracy"])
model.fit((X_train, X_train_dec), Y_train, epochs = 10,
          validation_data=((X_valid, X_valid_dec), Y_valid))



Epoch 1/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 23ms/step - accuracy: 0.8924 - loss: 3.0858 - val_accuracy: 0.9401 - val_loss: 1.8863
Epoch 2/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 24ms/step - accuracy: 0.9430 - loss: 1.7559 - val_accuracy: 0.9497 - val_loss: 1.4996
Epoch 3/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 23ms/step - accuracy: 0.9523 - loss: 1.3772 - val_accuracy: 0.9539 - val_loss: 1.3445
Epoch 4/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 27ms/step - accuracy: 0.9582 - loss: 1.1528 - val_accuracy: 0.9556 - val_loss: 1.2859
Epoch 5/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 28ms/step - accuracy: 0.9532 - loss: 0.9857 - val_accuracy: 0.9545 - val_loss: 1.2817
Epoch 6/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 26ms/step - accuracy: 0.9375 - loss: 0.8485 - val_accuracy: 0.9549 - val_loss: 1.3061
Epo

<keras.src.callbacks.history.History at 0x7c6524cbd3c0>

In [None]:
def translate(sentence_en):
    translation = ""
    for word_idx in range(max_length):
        X = tf.constant([sentence_en])
        X_dec = tf.constant(["startofseq" + translation])
        y_proba = model.predict((X, X_dec))[0, word_idx]
        predicted_word_id = tf.argmax(y_proba)
        predicted_word = text_vec_layer_es.get_vocabulary()[predicted_word_id]
        if predicted_word == "endofseq":
            break
        translation += " " + predicted_word


    return translation.strip()




In [None]:
# from nltk.translate.bleu_score import sentence_bleu

# reference = [["this", "is", "a", "test"]]
# candidate = ["this", "is", "test"]
# score = sentence_bleu(reference, candidate)
# print("BLEU score:", score)

In [None]:
# import tensorflow as tf
# from nltk.translate.bleu_score import corpus_bleu

# references = [["this is a test".split(), "this is an example".split()]]  # Referans çeviriler
# candidates = ["this is a test".split()]  # Modelin çevirisi

# # BLEU skoru hesaplama
# bleu_score = corpus_bleu(references, candidates)
# print("BLEU score:", bleu_score)

In [None]:
# def translate_sentence(sentence, model, text_vec_layer_en, text_vec_layer_es, max_length=50):
#     # Encoder girişini oluştur
#     input_vector = text_vec_layer_en([sentence])  # İngilizce cümle

#     # Decoder başlangıç girdisini oluştur (start token ile başlar)
#     start_token = "startofseq"
#     decoder_input = text_vec_layer_es([start_token])  # İlk token

#     # Çeviriyi oluşturmak için döngü
#     translated_tokens = []
#     for _ in range(max_length):  # Maksimum uzunluk kadar döngü
#         # Modeli tahmin için çağır
#         predictions = model.predict([input_vector, decoder_input])

#         # En olası kelimeyi seç (örneğin, argmax ile)
#         predicted_token_id = tf.argmax(predictions[0, -1]).numpy()
#         predicted_token = text_vec_layer_es.get_vocabulary()[predicted_token_id]

#         # "endofseq" geldiğinde dur
#         if predicted_token == "endofseq":
#             break

#         # Çıktıya ekle
#         translated_tokens.append(predicted_token)

#         # Decoder girişini güncelle
#         decoder_input = tf.concat([decoder_input, [[predicted_token_id]]], axis=-1)

#     # Çeviriyi birleştir ve döndür
#     return " ".join(translated_tokens)

# # Örnek kullanım
# translated = translate_sentence("Good morning", model, text_vec_layer_en, text_vec_layer_es)
# print("Translated Sentence:", translated)


In [None]:
translate("I like soccer and eating")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 959ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

'me gusta el pan y [UNK]'

Bidirectional RNNs

In [None]:
tf.random.set_seed(42)
encoder = tf.keras.layers.Bidirectional(
    tf.keras.layers.LSTM(256, return_state = True))

In [None]:
encoder_outputs, *encoder_state = encoder(encoder_embeddings)

class ConcatenateStates(tf.keras.layers.Layer):
    def call(self, encoder_state):
        return [tf.concat(encoder_state[::2], axis = -1),
                tf.concat(encoder_state[1::2], axis = -1)]



concatenate_layer = ConcatenateStates()

encoder_state = concatenate_layer(encoder_state)

In [None]:
decoder = tf.keras.layers.LSTM(512, return_sequences = True)
decoder_outputs = decoder(decoder_embeddings, initial_state = encoder_state)
output_layer = tf.keras.layers.Dense(vocab_size, activation = "softmax")
Y_proba = output_layer(decoder_outputs)
model = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs],
                       outputs=[Y_proba])

model.compile(loss = "sparse_categorical_crossentropy", optimizer = "nadam",
              metrics = ['accuracy'])

model.fit((X_train, X_train_dec), Y_train, epochs=10,
          validation_data = ((X_valid, X_valid_dec), Y_valid))

Epoch 1/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 30ms/step - accuracy: 0.9124 - loss: 2.4927 - val_accuracy: 0.9518 - val_loss: 1.4075
Epoch 2/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 26ms/step - accuracy: 0.9543 - loss: 1.3000 - val_accuracy: 0.9567 - val_loss: 1.2248
Epoch 3/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 23ms/step - accuracy: 0.9611 - loss: 1.0551 - val_accuracy: 0.9581 - val_loss: 1.1764
Epoch 4/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 26ms/step - accuracy: 0.9659 - loss: 0.8936 - val_accuracy: 0.9584 - val_loss: 1.1772
Epoch 5/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 23ms/step - accuracy: 0.9659 - loss: 0.7649 - val_accuracy: 0.9579 - val_loss: 1.2020
Epoch 6/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 23ms/step - accuracy: 0.9730 - loss: 0.6591 - val_accuracy: 0.9571 - val_loss: 1.2475
Epoc

<keras.src.callbacks.history.History at 0x7c6524069630>

In [None]:
translate("I like soccer")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

'me gusta el [UNK]'

Beam Search


In [None]:
def beam_search(sentence_en, beam_width, verbose = False):
    X = tf.constant([sentence_en])
    X_dec = tf.constant9(["startofseq"])
    y_proba = model.predict((X, X_dec))[0, 0]
    top_k = tf.math.top_k(y_proba, k=beam_width)
    top_translation = [
        (np.log(word_proba), text_vec_layer_es.get_vocabulary()[word_id])
        for word_proba, word_id in zip(top_k.values, top_k.indices)
    ]



    if verbose:
        print("Top first words:", top_translations)


    for idx in range(1, max_length):
        candidates  = []
        for log_proba, translation in top_translations:
            if translation.endswith("endofseq"):
                candidates.append((log_proba, translation))
                continue


            X = tf.constant([sentence_en])
            X_dec = tf.constant(["startofseq"  +translation])
            y_proba = model.predict((X,X_dec))[0,idx]
            for word_id, word_proba in enumerate(y_proba):
                word = text_vec_layer_es.get_vocabulary()[word_id]
                candidates.append((log_proba + np.log(word_proba),
                                   f"{translation} {word}"))

        top_translations = sorted(candidates, reverse=True)[:beam_width]


        if verbose:
            print("Top translations so far:", top_translations)

def beam_search(sentence_en, beam_width, verbose = False):
    X = tf.constant([sentence_en])
    X_dec = tf.constant(["startofseq"])
    y_proba = model.predict((X, X_dec))[0, 0]
    top_k = tf.math.top_k(y_proba, k=beam_width)
    top_translations = [
        (np.log(word_proba), text_vec_layer_es.get_vocabulary()[word_id])
        for word_proba, word_id in zip(top_k.values, top_k.indices)
    ]



    if verbose:
        print("Top first words:", top_translations)


    for idx in range(1, max_length):
        candidates  = []
        for log_proba, translation in top_translations:
            if translation.endswith("endofseq"):
                candidates.append((log_proba, translation))
                continue


            X = tf.constant([sentence_en])
            X_dec = tf.constant(["startofseq"  +translation])
            y_proba = model.predict((X,X_dec))[0,idx]
            for word_id, word_proba in enumerate(y_proba):
                word = text_vec_layer_es.get_vocabulary()[word_id]
                candidates.append((log_proba + np.log(word_proba),
                                   f"{translation} {word}"))

        top_translations = sorted(candidates, reverse=True)[:beam_width]


        if verbose:
            print("Top translations so far:", top_translations)


        if all([tr.endswith("endofseq") for _, tr in top_translations]):

            return top_translations[0][1].replace("endofseq", "").strip()

In [None]:
sentence_en = "I love cats and dogs"
translate(sentence_en)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

'me [UNK] los gatos y [UNK]'

Attention Mechanisms

In [None]:
tf.random.set_seed(42)
encoder = tf.keras.layers.Bidirectional(
    tf.keras.layers.LSTM(256, return_sequences = True, return_state = True))

In [None]:
class ConcatenateStates(tf.keras.layers.Layer):
    def call(self, encoder_state):
        return [tf.concat(encoder_state[::2], axis = -1),
                tf.concat(encoder_state[1::2], axis = -1)]



encoder_outputs, *encoder_state = encoder(encoder_embeddings)
concat = ConcatenateStates()
encoder_state = concat(encoder_state)
decoder = tf.keras.layers.LSTM(512, return_sequences = True)
decoder_outputs = decoder(decoder_embeddings, initial_state = encoder_state)



In [None]:
class AttentionLayer(tf.keras.layers.Layer):
    def call(self, encoder_state, decoder_outputs):
        return tf.keras.layers.Attention()([encoder_state, decoder_outputs])



attention_layer = AttentionLayer()
attention_outputs = attention_layer(decoder_outputs, encoder_outputs)
output_layer = tf.keras.layers.Dense(vocab_size, activation = "softmax")
Y_proba = output_layer(attention_outputs)



NameError: name 'embedding_dim' is not defined

NameError: name 'embedding_dim' is not defined

In [None]:
model = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs],
                       outputs=[Y_proba])


model.compile(loss="sparse_categorical_crossentropy", optimizer = "nadam",
              metrics=["accuracy"])
model.fit((X_train, X_train_dec), Y_train,epochs=10,
          validation_data=((X_valid, X_valid_dec), Y_valid))

Epoch 1/10




InvalidArgumentError: Exception encountered when calling Attention.call().

[1m{{function_node __wrapped__Sub_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:Sub] name: [0m

Arguments received by Attention.call():
  • inputs=['tf.Tensor(shape=(32, 50, 512), dtype=float32)', 'tf.Tensor(shape=(32, 50, 512), dtype=float32)']
  • mask=['tf.Tensor(shape=(32, 50), dtype=bool)', 'tf.Tensor(shape=(32, 50), dtype=bool)']
  • training=True
  • return_attention_scores=False
  • use_causal_mask=False

In [None]:
translate("I like soccer and also going to the beach")


In [None]:
beam_search("I like soccer and also going to the beach", beam_width=3,
            verbose = True)

In [None]:
decoder_outputs.shape, encoder_outputs.shape, attention_outputs.shape, Y_proba.shape

((None, 50, 512), (None, 50, 512), (None, 50, 512), (None, 50, 1000))

Hugging Face

In [None]:
import sys

In [None]:
if "google.colab" in sys.modules:
    %pip install -q -U transformers
    %pip install -q -U datasets

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/480.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/179.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
result = classifier("The actor were very convincing.")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
result

[{'label': 'POSITIVE', 'score': 0.9997859597206116}]

In [None]:
classifier(["I am from Syria.", "I am from Iraq"])

[{'label': 'NEGATIVE', 'score': 0.5523207783699036},
 {'label': 'NEGATIVE', 'score': 0.9706069231033325}]

In [None]:
classifier('it was hard but im almost glad it happened but im not')

[{'label': 'NEGATIVE', 'score': 0.9644176363945007}]

In [None]:
model_name = "huggingface/distilbert-base-uncased-finetuned-mnli"
classifier_mnli = pipeline("text-classification", model=model_name)
classifier_mnli("She loves me. [SEP] she loves me not.")

config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'contradiction', 'score': 0.9790192246437073}]

In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [None]:
# classifier_mnli("She loves me. [SEP] Because she loves me .")

[{'label': 'entailment', 'score': 0.9903890490531921}]

In [None]:
# classifier_mnli("She loves me. [SEP] she loves me a lot.")

[{'label': 'neutral', 'score': 0.6143980026245117}]

In [None]:
token_ids = tokenizer(["I like soccer. [SEP] We all love soccer!",
                       "Joe lived for a very long time. [SEP] Joe is old."],
                      padding=True, return_tensors="tf")
token_ids

{'input_ids': <tf.Tensor: shape=(2, 15), dtype=int32, numpy=
array([[ 101, 1045, 2066, 4715, 1012,  102, 2057, 2035, 2293, 4715,  999,
         102,    0,    0,    0],
       [ 101, 3533, 2973, 2005, 1037, 2200, 2146, 2051, 1012,  102, 3533,
        2003, 2214, 1012,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(2, 15), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

In [None]:
outputs = model(token_ids)
outputs

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-2.1123817 ,  1.17868   ,  1.4100995 ],
       [-0.01478346,  1.0962477 , -0.99199575]], dtype=float32)>, hidden_states=None, attentions=None)

In [None]:
Y_probas = tf.keras.activations.softmax(outputs.logits)
Y_probas

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.01619703, 0.43523633, 0.54856664],
       [0.2265597 , 0.6881726 , 0.08526774]], dtype=float32)>

In [None]:
Y_pred = tf.argmax(Y_probas, axis = 1)
Y_pred

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([2, 1])>

In [None]:
sentences= [("Sky is blue", "Sky is red"), ("I love her", "She loves me")]
X_train = tokenizer(sentences, padding=True, return_tensors="tf").data
y_train = tf.constant([0, 2])
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(loss=loss, optimizer = "nadam", metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs = 2)


Epoch 1/2
Epoch 2/2
