In [None]:
import pandas as pd
import sqlite3

# Establecer la conexión con la base de datos
conn = sqlite3.connect('argenconsql.db')

# Consulta SQL para seleccionar todos los registros de la tabla argencon
#query = "SELECT * FROM argencon" para seleccionar la tabla entera
# Para seleccionar solo las 2 primeras columnas y le cambio el nombre
query = "SELECT field1 AS Dominios, field2 AS Clases FROM argencon" 

# Cargar los datos en un DataFrame
df = pd.read_sql_query(query, conn)

# Cerrar la conexión con la base de datos
conn.close()

# Ver los primeros registros del DataFrame
print(df.head())

In [None]:
count_dominios = df['Dominios'].count()
print("Cantidad de elementos no nulos en la columna 'Dominios':", count_dominios)

In [None]:
def asignar_dga(row):
    if 'dga' in row['Clases'].lower():
        return row['Dominios']
    else:
        return None
    
def asignar_normal(row):
    if 'normal' in row['Clases'].lower():
        return row['Dominios']
    else:
        return None    

# Inicializar la columna "DGA" con valores vacíos
df['DGA'] = ''
df['Normal'] = ''

# Guardar los valores retornados por la función en la columna "DGA"
d = 1
n = 1
for i, row in df.iterrows():
    valuedga = asignar_dga(row)
    valuenormal = asignar_normal(row)
    if valuedga:
        df.at[d, 'DGA'] = valuedga
        d = d + 1  
    if valuenormal:
        df.at[n, 'Normal'] = valuenormal
        n = n + 1      
          
# Mostrar el DataFrame resultante
#print(df)

In [None]:
#dga_column = df['DGA']
#for i, domain in enumerate(dga_column):
#    if len(domain) < 45:
#        dga_column[i] = domain + ' ' * (45 - len(domain))
#df['DGA_Leng45'] = dga_column

normal_column = df['Normal']
for i, domain in enumerate(normal_column):
    if len(domain) < 45:
        normal_column[i] = domain + ' ' * (45 - len(domain))
df['Normal_Leng45'] = normal_column

In [None]:
#position = 0  # Posición deseada
#domain0 = df.loc[position, 'DGA_Leng45']
#length = len(domain0)
#print("Longitud en la posición", position, ":", length)

position = 0  # Posición deseada
domain0 = df.loc[position, 'Normal_Leng45']
length = len(domain0)
print("Longitud en la posición", position, ":", length)

In [None]:
#text = ' '.join(df['DGA_Leng45'])
text = ' '.join(df['Normal_Leng45'].iloc[:215336])

In [None]:
print("Longitud de text:", len(text))

In [None]:
chars = sorted(list(set(text)))
print("Total chars:", len(chars))

In [None]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
maxlen = 45
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import random  # Módulo para generar números aleatorios 
import io 

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [None]:
sentences

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten

# Definir el nuevo modelo con una capa convolucional y una capa LSTM
model = keras.Sequential([
    keras.Input(shape=(maxlen, len(chars))),
    layers.Conv1D(128, kernel_size=3, activation='relu'),  # Capa convolucional
    layers.MaxPooling1D(pool_size=2),  # Capa de max pooling opcional para reducir la dimensión
    layers.LSTM(128),  # Capa LSTM
    layers.Dense(len(chars), activation='softmax')  # Capa densa de salida
])

# Compilar el modelo con el optimizador Adam y la función de pérdida categorical_crossentropy
optimizer = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [None]:
epochs = 20
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    #for diversity in [0.2, 0.5, 1.0, 1.2]:
    for diversity in [ 0.5]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()


In [None]:
# Supongamos que tu modelo entrenado está en la variable 'model'
model.save('modelo_entrenado.h5')