<a href="https://colab.research.google.com/github/jmfn098/transformer/blob/master/2heads_50batch_trueshuffle_dataset1_9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Translation con Redes Transformer

![Red Transformer](https://drive.google.com/uc?export=view&id=1nHAZ3WDsXYZuSPYximLx-hLa2S1zv9Cg)

In [1]:
pip install keras-transformer


Collecting keras-transformer
  Downloading https://files.pythonhosted.org/packages/8a/35/6b079e920fe09a9349028bc2f209447e5636d90e29c5cf060bcc3177803a/keras-transformer-0.39.0.tar.gz
Collecting keras-pos-embd>=0.12.0
  Downloading https://files.pythonhosted.org/packages/d8/d2/1cc072ea68b573f366e08936177a33e237e66fa7d5338289d4bee64696cf/keras-pos-embd-0.12.0.tar.gz
Collecting keras-multi-head>=0.28.0
  Downloading https://files.pythonhosted.org/packages/a5/e6/a83f26b2e1582de237b125f595874d808e40698f31d44d5903e872d5b64d/keras-multi-head-0.28.0.tar.gz
Collecting keras-layer-normalization>=0.15.0
  Downloading https://files.pythonhosted.org/packages/33/e1/0da586d544a0940a56a2f4aa704b7dbd95eaa8ceda6168b48f5ac95e6608/keras-layer-normalization-0.15.0.tar.gz
Collecting keras-position-wise-feed-forward>=0.7.0
  Downloading https://files.pythonhosted.org/packages/58/02/cd3e7e51cf45d3825818384a2f7d9c340b60c9bf55a5682b7318e1c16eab/keras-position-wise-feed-forward-0.7.0.tar.gz
Collecting keras-embed

In [2]:
import numpy as np
import os
os.environ['TF_KERAS'] = '1'
from keras_transformer import get_model, decode
from pickle import load
from google.colab import drive
np.random.seed(0)

In [None]:
#os.listdir('/drive/MyDrive/Colab Notebooks')

In [3]:
# Leer set de entrenamiento
drive.mount('/content/drive')
filename = '/content/drive/My Drive/Colab Notebooks/dataset.npy'

dataset = np.load(filename)
print(dataset[120000,0])
print(dataset[120000,1])

Mounted at /content/drive
litre
litro


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Crear "tokens"
source_tokens = []
for sentence in dataset[:,0]:
  source_tokens.append(sentence.split(' '))
print(source_tokens[120000])

target_tokens = []
for sentence in dataset[:,1]:
  target_tokens.append(sentence.split(' '))
print(target_tokens[120000])

['litre']
['litro']


In [5]:
def build_token_dict(token_list):
  token_dict = {
      '<PAD>': 0,
      '<START>': 1,
      '<END>': 2
  }
  for tokens in token_list:
    for token in tokens:
      if token not in token_dict:
        token_dict[token] = len(token_dict)
  return token_dict

In [11]:
source_token_dict = build_token_dict(source_tokens)
target_token_dict = build_token_dict(target_tokens)
target_token_dict_inv = {v:k for k,v in target_token_dict.items()}

print(len(source_token_dict))
print(len(target_token_dict))
print(len(target_token_dict_inv))

95532
110263
110263


In [9]:
len(source_token_dict)

95532

In [12]:
# Agregar start, end y pad a cada frase del set de entrenamiento
encoder_tokens = [['<START>'] + tokens + ['<END>'] for tokens in source_tokens]
decoder_tokens = [['<START>'] + tokens + ['<END>'] for tokens in target_tokens]
output_tokens = [tokens + ['<END>'] for tokens in target_tokens]

source_max_len = max(map(len, encoder_tokens))
target_max_len = max(map(len, decoder_tokens))

encoder_tokens = [tokens + ['<PAD>']*(source_max_len-len(tokens)) for tokens in encoder_tokens]
decoder_tokens = [tokens + ['<PAD>']*(target_max_len-len(tokens)) for tokens in decoder_tokens]
output_tokens = [tokens + ['<PAD>']*(target_max_len-len(tokens)) for tokens in output_tokens ]

In [13]:
print(encoder_tokens[120000])

['<START>', 'litre', '<END>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']


In [14]:
encoder_input = [list(map(lambda x: source_token_dict[x], tokens)) for tokens in encoder_tokens]
decoder_input = [list(map(lambda x: target_token_dict[x], tokens)) for tokens in decoder_tokens]
output_decoded = [list(map(lambda x: [target_token_dict[x]], tokens)) for tokens in output_tokens]

print(encoder_input[120000])

[1, 53982, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [15]:
# Crear la red transformer
model = get_model(
    token_num = max(len(source_token_dict),len(target_token_dict)),
    embed_dim = 32,
    encoder_num = 2,
    decoder_num = 2,
    head_num = 2,
    hidden_dim = 128,
    dropout_rate = 0.05,
    use_same_embed = False,
)
model.compile('adam', 'sparse_categorical_crossentropy')
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Encoder-Input (InputLayer)      [(None, None)]       0                                            
__________________________________________________________________________________________________
Encoder-Token-Embedding (Embedd [(None, None, 32), ( 3528416     Encoder-Input[0][0]              
__________________________________________________________________________________________________
Encoder-Embedding (TrigPosEmbed (None, None, 32)     0           Encoder-Token-Embedding[0][0]    
__________________________________________________________________________________________________
Encoder-1-MultiHeadSelfAttentio (None, None, 32)     4224        Encoder-Embedding[0][0]          
______________________________________________________________________________________________

In [None]:
#from keras.callbacks import ModelCheckpoint
#from sklearn.model_selection import train_test_split

In [None]:
#    Entrenamiento
#x = np.hstack((np.array(encoder_input), np.array(decoder_input)))
#y = np.array(output_decoded)
#x_trainNP,x_testNP,y_train,y_test=train_test_split(x,y,test_size=0.2)
#x_train=[x_trainNP[:,0:49],x_trainNP[:,49:]]
#X_test=[x_testNP[:,0:49],x_testNP[:,49:]]
x = [np.array(encoder_input), np.array(decoder_input)]
y = np.array(output_decoded)
DATA_PATH = '/content/drive/MyDrive/Colab Notebooks/tp'
output_path = os.path.join(DATA_PATH,'output2')
if not os.path.exists(output_path):
    os.makedirs(output_path)
filename = os.path.join(output_path)
#checkpoint = ModelCheckpoint(filepath=filename, monitor='loss', verbose=1,
#    save_best_only=True, mode='auto', save_freq=1)

history=model.fit(x,y, epochs=15, batch_size=50,validation_split=0.1)

#filename = '/content/drive/My Drive/videos/2020-07-06/translator.h5'
#model.load_weights(filename)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

In [None]:
import matplotlib.pyplot as plt
print(history.history.keys())
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
def translate(sentence):
  sentence_tokens = [tokens + ['<END>', '<PAD>'] for tokens in [sentence.split(' ')]]
  tr_input = [list(map(lambda x: source_token_dict[x], tokens)) for tokens in sentence_tokens][0]
  decoded = decode(
      model, 
      tr_input, 
      start_token = target_token_dict['<START>'],
      end_token = target_token_dict['<END>'],
      pad_token = target_token_dict['<PAD>']
  )

  print('Frase original: {}'.format(sentence))
  print('Traducción: {}'.format(' '.join(map(lambda x: target_token_dict_inv[x], decoded[1:-1]))))

In [None]:
translate('the day is warm and sunny')

Frase original: the day is warm and sunny
Traducción: el dia es caliente y sol


In [None]:
translate('i am tired of college')

Frase original: i am tired of college
Traducción: estoy una sola


In [None]:
a='why this net doesnt work Right'
a=a.lower()
translate(a)
model.save("2heads_50batch_trueshuffle_dataset1.9")

Frase original: why this net doesnt work right
Traducción: por que la vez no el trabajo




INFO:tensorflow:Assets written to: OP/assets


INFO:tensorflow:Assets written to: OP/assets
