<a href="https://colab.research.google.com/github/rb58853/ML-RSI-Images/blob/main/code/consult_model/position_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dense, Reshape, Flatten,LSTM
from keras.models import Model,Sequential
import tensorflow as tf

import numpy as np

class MyModel():
  def __init__(self, data_path, max_input_len = 50, max_output_len = 50):
    self.max_input_len = max_input_len
    self.max_output_len = max_output_len

    self.data = self.get_data(data_path)
    self.model = self.get_model()

  def get_data(self,data_path):
    return {
    'input':[
        'a dog to the left of a cat',
        'a cat to the left of a lion',
        'a bed to the left of a table',
        'a sit to the left of a mantis',
        'a object to the left of another object',
        'a rock to the left of the water',
        'a sea to the left of a shark'
        ],
    'output':[
        ['a cat', 'a dog', '','',''],
        ['a lion', 'a cat', '','',''],
        ['a table', 'a bed', '','',''],
        ['a mantis', 'a sit', '','',''],
        ['another object', 'a object', '','',''],
        ['the water', 'a rock', '','',''],
        ['a shark', 'a sea', '','',''],
        ],
    'types':
     [
        [1,5,1,1,10,1,1,5],
        [1,5,1,1,10,1,1,5],
        [1,5,1,1,10,1,1,5],
        [1,5,1,1,10,1,1,5],
        [1,5,1,1,10,1,5,5],
        [1,5,1,1,10,1,1,5],
        [1,5,1,1,10,1,1,5],
        ]
      }

  def get_train(self):
    # Crea un objeto Tokenizer
    tokenizer = Tokenizer()
    # Ajusta el tokenizer a tus datos
    types =  self.data['types']
    output = self.data ['output']
    input =  self.data ['input']

    tokenizer.fit_on_texts(input+[':'])
    vocab_size = len(tokenizer.word_index) + 1
    sequences_input = tokenizer.texts_to_sequences(input)

    sequences_output = []
    for item in output:
      texts = [text for text in item]
      sequences_output.append(tokenizer.texts_to_sequences(texts))

    # train_x =  [[date_input, date_type] for date_input, date_type in zip(sequences_input, types)]
    train_x =  {'values': types,
                'querys': sequences_input}

    train_y = [item for item in sequences_output]

    for key in train_x:
      for item in train_x[key]:
        while len(item) < self.max_input_len:
          item.append(0)

    for case_ in train_y:
      for item in case_:
        while len(item) < self.max_output_len:
          item.append(0)

    return  {'train_x':train_x, 'train_y':train_y}

  def get_model(self):
    # Variable-length int sequences.
    # query_input = tf.keras.Input(shape=(None,self.max_input_len), dtype='int32')
    # value_input = tf.keras.Input(shape=(None,self.max_input_len), dtype='int32')

    query_input = tf.keras.Input(shape=(self.max_input_len), dtype='int64')
    value_input = tf.keras.Input(shape=(self.max_input_len), dtype='int64')

    # Embedding lookup.
    token_embedding = tf.keras.layers.Embedding(input_dim=self.max_input_len, output_dim=64)
    # Query embeddings of shape [batch_size, Tq, dimension].
    query_embeddings = token_embedding(query_input)
    # Value embeddings of shape [batch_size, Tv, dimension].
    value_embeddings = token_embedding(value_input)

    # CNN layer.
    cnn_layer = tf.keras.layers.Conv1D(
        filters=100,
        kernel_size=4,
        # Use 'same' padding so outputs have the same shape as inputs.
        padding='same')
    # Query encoding of shape [batch_size, Tq, filters].
    query_seq_encoding = cnn_layer(query_embeddings)
    # Value encoding of shape [batch_size, Tv, filters].
    value_seq_encoding = cnn_layer(value_embeddings)

    # Query-value attention of shape [batch_size, Tq, filters].
    query_value_attention_seq = tf.keras.layers.Attention()(
        [query_seq_encoding, value_seq_encoding])

    # Reduce over the sequence axis to produce encodings of shape
    # [batch_size, filters].
    query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
        query_seq_encoding)
    query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
        query_value_attention_seq)

    # Concatenate query and document encodings to produce a DNN input layer.
    input_layer = tf.keras.layers.Concatenate()(
        [query_encoding, query_value_attention])

    # Add DNN layers, and create Model.
    # ...
    len_output = self.max_output_len * 5

    layer = Dense(64, activation='relu')(input_layer)
    layer = Dense(len_output, activation='relu')(layer)
    output_layer = Reshape((5, self.max_output_len))(layer)

    model = tf.keras.Model(inputs=(query_input, value_input), outputs=output_layer)
    # model = tf.keras.Model(input_layer, outputs=output_layer)
    return model

  def fit(self,epochs, batch_size=32):
    self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    train = self.get_train()
    train_x = [train['train_x']['querys'],train['train_x']['values']]
    train_x = (train['train_x']['querys'],train['train_x']['values'])
    train_y = train['train_y']

    self.model.fit(train_x, train_y, epochs=epochs, batch_size=32)
    # Convierte tus datos en secuencias de números

  def old_model(self):
    total_len_output = self.max_output_len * 5

    model = Sequential()
    model.add(Input(shape=(2, self.max_input_len)))
    model.add(LSTM(256, activation='relu', return_sequences=True, input_shape=(100, self.max_input_len)))
    model.add(LSTM(256, activation='relu', return_sequences=True, input_shape=(100, self.max_input_len)))
    model.add(LSTM(256, activation='relu', return_sequences=True, input_shape=(100, self.max_input_len)))
    model.add(LSTM(256, activation='relu'))
    # model.add(Dense(128, activation='relu'))
    # model.add(Dense(64, activation='relu'))
    model.add(Flatten(input_shape=(0,64)))
    model.add(Dense(total_len_output, activation='relu'))
    model.add(Reshape((5, self.max_output_len)))
    return model



In [44]:
model = MyModel("data_path")
train = model.get_train()
train_x = [train['train_x']['querys'],train['train_x']['values']]
for item in train_x:
  print(item)
model.fit(100)

[[1, 8, 3, 2, 4, 5, 1, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 6, 3, 2, 4, 5, 1, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 10, 3, 2, 4, 5, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 12, 3, 2, 4, 5, 1, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 7, 3, 2, 4, 5, 14, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 15, 3, 2, 4, 5, 2, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 17, 3, 2, 4, 5, 1, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

ValueError: ignored