In [None]:
def lcg_str(seed, length):
    a = 1664525
    c = 1013904223
    m = 2**32 #4294967296
    x = seed
    while True:
        x = (a * x + c) % m
        yield hex(x)[2:].zfill(length)

# Genera un conjunto de 10 cadenas pseudoaleatorias de longitud 8 y las guarda en un archivo llamado "tokens.txt"
rand = lcg_str(12345, 8)
with open("tokens.txt", "w") as f:
    for i in range(1000000):
        f.write(str(next(rand)) + "\n")

#Se comprueba que no se repiten.
#Y que el periodo es de 2^32
rand = lcg_str(12345, 8)
for i in range(10):
    print(str(next(rand)))

05391c44
043c7ad3
8b0c4216
a289127d
e8f7b1b8
1cca49b7
7ef29baa
8c609701
989a046c
c89034db


In [None]:
!wget https://surf.bilard.app/reports/testing2/cookies.txt
!wc -l cookies.txt
!tail -10 cookies.txt
!cp cookies.txt tokens.txt

--2023-04-25 20:44:36--  https://surf.bilard.app/reports/testing2/cookies.txt
Resolving surf.bilard.app (surf.bilard.app)... 104.26.9.28, 104.26.8.28, 172.67.72.181, ...
Connecting to surf.bilard.app (surf.bilard.app)|104.26.9.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: ‘cookies.txt.1’

cookies.txt.1           [ <=>                ] 637.53K  3.67MB/s    in 0.2s    

2023-04-25 20:44:37 (3.67 MB/s) - ‘cookies.txt.1’ saved [652827]

31087 cookies.txt
6a3f7d756779737c7773
777d757c6f7735397776
77777b6a733773677075
79777f396e7766737e37
6b726f7d677567757976
6b397f666a767670766f
7f677f756f7577757f75
7977777d757966737e7d
6b7379796e6f66727e7e
77777e397f7577773976


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Reshape, Flatten, Input, Activation
from tensorflow.keras.models import Sequential, Model
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
from collections import Counter

def one_hot_encode(cookies, max_cookie_length, unique_chars):
    one_hot = np.zeros((len(cookies), max_cookie_length, len(unique_chars)))
    for i, cookie in enumerate(cookies):
        for j, char in enumerate(cookie):
            one_hot[i, j, char] = 1
    return one_hot

def create_custom_activation(char_frequencies):
    def custom_activation(x):
        x = K.exp(x)  # Convertir logits en probabilidades no normalizadas
        x = K.reshape(x, (-1, max_cookie_length, len(unique_chars)))
        repeated_frequencies = K.constant([char_frequencies[index_to_char[i]] for i in range(len(unique_chars))])
        repeated_frequencies = K.reshape(repeated_frequencies, (1, 1, -1))
        x = x * repeated_frequencies  # Ponderar por frecuencias
        x = K.reshape(x, (-1, max_cookie_length * len(unique_chars)))
        return x / K.sum(x, axis=-1, keepdims=True)  # Normalizar para obtener una distribución de probabilidad

    return custom_activation



def build_generator(char_frequencies):
    custom_activation_func = create_custom_activation(char_frequencies)

    model = Sequential()
    model.add(Dense(256, input_dim=100))
    model.add(LeakyReLU(0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod((max_cookie_length, len(unique_chars)))))
    model.add(Activation(custom_activation_func))
    model.add(Reshape((max_cookie_length, len(unique_chars))))
    return model

def build_discriminator():
    model = Sequential()
    model.add(Flatten(input_shape=(max_cookie_length, len(unique_chars))))
    model.add(Dense(512))
    model.add(LeakyReLU(0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model


def evaluate_generator_on_validation_set(generator, val_cookies_one_hot):
    # Generar cookies sintéticas
    generated_cookies = generator.predict(np.random.normal(0, 1, (len(val_cookies_one_hot), 100)))
    generated_cookies = np.argmax(generated_cookies, axis=-1)
    generated_cookies = ["".join([index_to_char[index] for index in cookie]) for cookie in generated_cookies]

    val_cookies = ["".join([index_to_char[np.argmax(cookie)] for cookie in one_hot_cookie]) for one_hot_cookie in val_cookies_one_hot]

    valid_count = 0
    for generated_cookie in generated_cookies:
        if generated_cookie in val_cookies:
            print(f"Valid token found! : {generated_cookie}")
            valid_count += 1

    success_metric = valid_count / len(generated_cookies)
    return success_metric

# Carga las cookies desde el archivo de texto
with open('tokens.txt', 'r') as file:
    cookies = [line.strip() for line in file]

# Preparar los datos: codificar las cookies como secuencias de enteros
unique_chars = sorted(set("".join(cookies)))
char_to_index = {char: index for index, char in enumerate(unique_chars)}
index_to_char = {index: char for char, index in char_to_index.items()}

max_cookie_length = max(len(cookie) for cookie in cookies)
padded_cookies = [[char_to_index[char] for char in cookie.ljust(max_cookie_length, ' ')] for cookie in cookies]

# Dividir el conjunto de cookies en conjuntos de entrenamiento (60%), prueba (20%) y validación (20%)
train_cookies, test_cookies = train_test_split(padded_cookies, test_size=0.4, random_state=42)
test_cookies, val_cookies = train_test_split(test_cookies, test_size=0.5, random_state=42)

# Convertir todos los conjuntos a one-hot encoding
train_cookies_one_hot = one_hot_encode(train_cookies, max_cookie_length, unique_chars)
test_cookies_one_hot = one_hot_encode(test_cookies, max_cookie_length, unique_chars)
val_cookies_one_hot = one_hot_encode(val_cookies, max_cookie_length, unique_chars)


# Calcular las frecuencias de los caracteres
all_chars = "".join(cookies)
char_counts = Counter(all_chars)
total_chars = len(all_chars)

char_frequencies = {char: count / total_chars for char, count in char_counts.items()}

# Construir y compilar el discriminador
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5), metrics=['accuracy'])

# Construir el generador utilizando las frecuencias de caracteres


# Crear la GAN combinada
z = Input(shape=(100,))
cookie = generator(z)
discriminator.trainable = False
validity = discriminator(cookie)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))

# Construir y compilar el discriminador
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5), metrics=['accuracy'])

# Construir el generador
generator = build_generator(char_frequencies)

# Crear la GAN combinada
z = Input(shape=(100,))
cookie = generator(z)
discriminator.trainable = False
validity = discriminator(cookie)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))

# Parámetros de entrenamiento
batch_size = 32
half_batch = batch_size // 2
epochs = 2000

# Bucle de entrenamiento
for epoch in range(epochs):
    real_cookies = train_cookies_one_hot[np.random.randint(0, len(train_cookies_one_hot), half_batch)]
    noise = np.random.normal(0, 1, (half_batch, 100))
    generated_cookies = generator.predict(noise)

    real_labels = np.ones((half_batch, 1))
    fake_labels = np.zeros((half_batch, 1))

    d_loss_real = discriminator.train_on_batch(real_cookies, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_cookies, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, 100))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))

    if epoch % 100 == 0:
        print(f"Epoch: {epoch}, D loss: {d_loss[0]}, D accuracy: {100 * d_loss[1]}, G loss: {g_loss}")

        # Guarda algunas cookies generadas para inspección
        generated_cookies_sample = generator.predict(np.random.normal(0, 1, (5, 100)))
        generated_cookies_sample = np.argmax(generated_cookies_sample, axis=-1)
        generated_cookies_sample = ["".join([index_to_char[index] for index in cookie]) for cookie in generated_cookies_sample]

        with open(f"generated_cookies_epoch_{epoch}.txt", "w") as file:
            for cookie in generated_cookies_sample:
                file.write(cookie + "\n")

        # Evaluar el generador en el conjunto de validación
        success_metric = evaluate_generator_on_validation_set(generator, val_cookies_one_hot)
        print(f"Éxito en el conjunto de validación (época {epoch}): {success_metric}")





Epoch: 0, D loss: 0.7097789645195007, D accuracy: 25.0, G loss: 0.688662052154541
Éxito en el conjunto de validación (época 0): 0.0
Epoch: 100, D loss: 0.07308631902560592, D accuracy: 100.0, G loss: 2.102672815322876
Éxito en el conjunto de validación (época 100): 0.0
Epoch: 200, D loss: 0.0062886158702895045, D accuracy: 100.0, G loss: 4.524209022521973
Éxito en el conjunto de validación (época 200): 0.0
Epoch: 300, D loss: 0.001787187298759818, D accuracy: 100.0, G loss: 5.729187965393066
Éxito en el conjunto de validación (época 300): 0.0
Epoch: 400, D loss: 0.0008883282789611258, D accuracy: 100.0, G loss: 6.3819804191589355
Éxito en el conjunto de validación (época 400): 0.0
Epoch: 500, D loss: 0.000574221347051207, D accuracy: 100.0, G loss: 6.971172332763672
Éxito en el conjunto de validación (época 500): 0.0
Epoch: 600, D loss: 0.00039595289126737043, D accuracy: 100.0, G loss: 7.240387439727783
Éxito en el conjunto de validación (época 600): 0.0
Epoch: 700, D loss: 0.00024639

In [None]:
def evaluate_large_sample(generator, val_cookies_one_hot, num_generated_cookies):
    # Generar cookies sintéticas
    generated_cookies = generator.predict(np.random.normal(0, 1, (num_generated_cookies, 100)))
    generated_cookies = np.argmax(generated_cookies, axis=-1)
    generated_cookies = ["".join([index_to_char[index] for index in cookie]) for cookie in generated_cookies]

    val_cookies = [np.argmax(cookie, axis=-1).tolist() for cookie in val_cookies_one_hot]
    val_cookies = set(["".join([index_to_char[index] for index in cookie]) for cookie in val_cookies])

    valid_count = 0
    for generated_cookie in generated_cookies:
        #print(generated_cookie)
        if generated_cookie in val_cookies:
            print(generated_cookie)
            valid_count += 1

    return valid_count



# Generar 10000 cookies con el generador y evaluar cuántas son válidas
num_generated_cookies = 1000000
valid_count = evaluate_large_sample(generator, val_cookies_one_hot, num_generated_cookies)
print(f"{valid_count} de {num_generated_cookies} valid tokens!")


0 de 1000000 valid tokens!
