In [None]:
import pandas as pd
import numpy as np

# Read the original CSV file
df = pd.read_csv('../../data/accions.csv')

In [None]:
# Group by 'Sessio' and aggregate the 'Accio' column into a list
grouped_df = df.groupby('Sessio').agg({'Accio': list, 'Tramit': list, 'Usuari': list, 'Data': list}).reset_index()


# Write the grouped data to a new CSV file
grouped_df.to_csv('../../data/grouped_accions.csv', index=False)

In [None]:
grouped_df['Usuari'] = grouped_df['Usuari'].apply(lambda x: x[-1] if isinstance(x, list) and x else x)

In [None]:
# Write the grouped data to a new CSV file
grouped_df.to_csv('../../data/grouped_accions_usuari.csv', index=False)

In [None]:
# grouped_df = pd.read_csv('../../data/grouped_accions_usuari.csv')
(grouped_df['Tramit'].apply(lambda x: len((x))) == 1).sum()

In [None]:
filtered_df = grouped_df[~((grouped_df['Usuari'].isna()) & (grouped_df['Tramit'].apply(lambda x: len((x))) == 1))]

In [None]:
# Write the grouped data to a new CSV file
filtered_df.to_csv('../../data/grouped_accions_filtered.csv', index=False)

In [None]:
filtered_df = pd.read_csv('../../data/grouped_accions_filtered.csv')

In [None]:
final_df = filtered_df[~(filtered_df['Tramit'].apply(lambda x: len(eval(x))) == 1)]

In [None]:
final_df = final_df.drop(columns=['Usuari', 'Accio', 'Data', 'Sessio'])

In [None]:
final_df.to_csv('../../data/grouped_accions_final.csv', index=False)

In [None]:
final_df = pd.read_csv('../../data/grouped_accions_final.csv')

#### 


In [None]:
final_df['Last_Tramit'] = final_df['Tramit'].apply(lambda x: eval(x)[-1] if isinstance(eval(x), list) and eval(x) else x)

In [None]:
final_df['Tramit'] = final_df['Tramit'].apply(lambda x: str(eval(x)[:-1]) if isinstance(eval(x), list) and eval(x) else x)

In [None]:
final_df.to_csv('../../data/final_dataset.csv', index=False)

In [None]:
train_final_df = pd.read_csv('../../data/final_dataset.csv')

In [None]:
X_train = train_final_df['Tramit']
y_train = train_final_df['Last_Tramit']



In [None]:
# Crear un dataset de muestra con 50 filas
sample_df = train_final_df.sample(n=50, random_state=42)

# Guardar el dataset de muestra en el directorio
sample_df.to_csv('../../data/sample_train_final_df.csv', index=False)


In [None]:
# Import the sample_tramits.csv file
sample_tramits_df = pd.read_csv('../../data/sample_tramits.csv')

# Erase the 'Vigent' column
sample_tramits_df = sample_tramits_df.drop(columns=['Vigent'])

# Display the first few rows of the dataframe to verify
print(sample_tramits_df.head())

sample_tramits_df.to_csv('../../data/sample_tramits_df.csv', index=False)

In [None]:

tramits_df = pd.read_csv('../../data/tramits.csv')


tramits_df['Sequence'] = range(502)


tramits_df.to_csv('../../data/tramits.csv', index=False)

In [None]:
id_to_sequence = dict(zip(tramits_df['Id'], tramits_df['Sequence']))

final_df['Tramit'] = final_df['Tramit'].apply(lambda x: [id_to_sequence.get(tramit, tramit) for tramit in eval(x)])
final_df['Last_Tramit'] = final_df['Last_Tramit'].apply(lambda x: id_to_sequence.get(x, x))

final_df.to_csv('../../data/final_dataset_mapped.csv', index=False)



In [None]:

final_df = pd.read_csv('../../data/final_dataset_mapped.csv')
final_df = final_df[final_df['Tramit'].apply(len) <= 30]



In [None]:
final_df['Tramit'].apply(len).max() 

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, LayerNormalization, Dropout
from tensorflow.keras.models import Model
import numpy as np

# Parámetros del modelo
max_seq_len =  30 # Longitud máxima de la secuencia de trámites
vocab_size = 502  # Cantidad total de trámites posibles (codificados)
embed_dim = 64  # Dimensión del embedding
num_heads = 4  # Cantidad de cabezas de atención
ff_dim = 128  # Dimensión de la red Feed-Forward en el Transformer
num_transformer_blocks = 2  # Número de bloques de Transformer
dropout_rate = 0.1  # Tasa de dropout

# Generar datos sintéticos como ejemplo (X_train es la secuencia, y_train es el trámite objetivo)
# Puedes reemplazar esto con tus datos reales
num_samples = 5000
X_train = np.random.randint(1, vocab_size, size=(num_samples, max_seq_len))
y_train = np.random.randint(1, vocab_size, size=(num_samples,))

# --------------- Construcción del Modelo con Transformer -------------------

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim)]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


# Crear el modelo completo
def create_model(vocab_size, max_seq_len, embed_dim, num_heads, ff_dim, num_transformer_blocks, dropout_rate):
    inputs = Input(shape=(max_seq_len,))
    embedding_layer = TokenAndPositionEmbedding(max_seq_len, vocab_size, embed_dim)
    x = embedding_layer(inputs)
    for _ in range(num_transformer_blocks):
        x = TransformerBlock(embed_dim, num_heads, ff_dim, rate=dropout_rate)(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(vocab_size, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model


# Custom callback to print the epoch number
class PrintEpochCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs=None):
        print(f"Epoch {epoch + 1} starting...")

# Instanciar el modelo
model = create_model(vocab_size, max_seq_len, embed_dim, num_heads, ff_dim, num_transformer_blocks, dropout_rate)

# Compilar el modelo
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Resumen del modelo
model.summary()

# Entrenar el modelo
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2, callbacks=[PrintEpochCallback()])


In [None]:
# --------------------- Realizar predicciones ----------------------
# Ejemplo de predicción para una nueva secuencia
new_sequence = np.random.randint(1, vocab_size, (1, max_seq_len))
predictions = model.predict(new_sequence)

# Salida: array de probabilidades para cada posible trámite
print("Predicciones:", predictions)
recommended_tramit_index = np.argmax(predictions)
print("Trámite recomendado:", recommended_tramit_index)
print("Probabilidad del trámite recomendado:", predictions[0][recommended_tramit_index])

In [None]:
model_name = "modelo_transformer_tramites.h5"
model.save(model_name)
print(f"Modelo guardado como {model_name}")

In [None]:
from tensorflow.keras.models import load_model

# Cargar el modelo guardado
loaded_model = load_model("modelo_transformer_tramites.h5")
print("Modelo cargado correctamente.")

# Usar el modelo cargado para predicciones
new_sequence = np.random.randint(1, vocab_size, (1, max_seq_len))
predictions = loaded_model.predict(new_sequence)

print("Predicciones del modelo cargado:", predictions)
