In [1]:
# Used to create a generator/discriminiator to verify passwords are being one_hot_encoded correctly
G_OPTIMIZE_LEARNING_RATE = 0.0001
D_OPTIMIZE_LEARNING_RATE = 0.00001

# Model monitor for storoing genreated passwords, not needed, can be taken out in future
FOLDER_PATH = "training_sessions"
TRAINING_SESSION = None
TRAINING_SESSION_PATH = None

In [2]:
import os

# Get the current working directory
current_dir = os.getcwd()
print(f"Current working directory: {current_dir}")

# Check if the last directory in the current path is 'notebooks'
if 'notebooks' in current_dir.split(os.sep):
    # Change the current working directory two levels up
    os.chdir('../../')
    print(f"Changed current working directory to: {os.getcwd()}\n")
else:
    print("Current directory is not inside 'notebooks', no change needed.")


Current working directory: /Users/bambrick/DevCenter/Juypter/PasswordGAN/notebooks/demo
Changed current working directory to: /Users/bambrick/DevCenter/Juypter/PasswordGAN



In [3]:
import re

# Function to preprocess passwords
def preprocess_password(password):
    # Remove non-numeric characters
    password = re.sub(r'\D', '', password)
    # Ensure password is 12 characters long
    return password if len(password) == 12 else None


In [4]:
def one_hot_encode(password):
    # Define the one-hot encoding for each digit
    encoding = []
    for char in password:
        one_hot = [0]*10
        one_hot[int(char)] = 1
        encoding.extend(one_hot)
    return encoding

In [5]:
import tensorflow as tf

class GeneratorLSTMv2(tf.keras.Model):
    def __init__(self, noise_dim=100, **kwargs):
        super(GeneratorLSTMv2, self).__init__(**kwargs)
        
        # Initial dense layer
        self.noise_dim = noise_dim
        self.dense_1 = tf.keras.layers.Dense(256, activation='relu', input_dim=noise_dim,)
        self.batch_norm_1 = tf.keras.layers.BatchNormalization()
        self.dropout_1 = tf.keras.layers.Dropout(0.5)
        
        # LSTM layers for sequence data
        self.reshape_1 = tf.keras.layers.Reshape((1, 256))  # Reshape input for LSTM
        self.lstm_1 = tf.keras.layers.LSTM(128, return_sequences=True)
        self.lstm_2 = tf.keras.layers.LSTM(128)
        self.batch_norm_2 = tf.keras.layers.BatchNormalization()
        self.dropout_2 = tf.keras.layers.Dropout(0.5)
        
        # Output layers
        self.dense_out = tf.keras.layers.Dense(12 * 10, activation='softmax')  # Adjusted to 12 characters, 10 classes each (digits 0-9)
        self.reshape_out = tf.keras.layers.Reshape((12, 10))  # Reshape for output
    
    def call(self, inputs, training=False):
        x = self.dense_1(inputs)
        x = self.batch_norm_1(x, training=training)
        x = self.dropout_1(x, training=training)
        
        x = self.reshape_1(x)
        x = self.lstm_1(x)
        x = self.lstm_2(x)
        x = self.batch_norm_2(x, training=training)
        x = self.dropout_2(x, training=training)
        
        x = self.dense_out(x)
        x = self.reshape_out(x)
        return x

    def get_config(self):
        config = super().get_config()
        config.update({
            'noise_dim': self.noise_dim
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [6]:
import tensorflow as tf

class DiscriminatorLSTMv2(tf.keras.Model):

    def __init__(self, input_shape=(12, 10), **kwargs):
        super(DiscriminatorLSTMv2, self).__init__(**kwargs)
        self._input_shape = input_shape  # Save input_shape as an attribute    
        self.flatten = tf.keras.layers.Flatten(input_shape=input_shape)
        
        # Layer 1
        self.dense_1 = tf.keras.layers.Dense(256)
        self.leaky_relu_1 = tf.keras.layers.LeakyReLU(alpha=0.2)
        self.dropout_1 = tf.keras.layers.Dropout(0.5)
        self.batch_norm_1 = tf.keras.layers.BatchNormalization()
        
        # Layer 2
        self.dense_2 = tf.keras.layers.Dense(128)
        self.leaky_relu_2 = tf.keras.layers.LeakyReLU(alpha=0.2)
        self.dropout_2 = tf.keras.layers.Dropout(0.5)
        self.batch_norm_2 = tf.keras.layers.BatchNormalization()
        
        # Output layer
        self.dense_out = tf.keras.layers.Dense(1, activation='sigmoid')
        
    def call(self, inputs, training=False):
        x = self.flatten(inputs)
        
        x = self.dense_1(x)
        x = self.leaky_relu_1(x)
        x = self.dropout_1(x, training=training)
        x = self.batch_norm_1(x, training=training)
        
        x = self.dense_2(x)
        x = self.leaky_relu_2(x)
        x = self.dropout_2(x, training=training)
        x = self.batch_norm_2(x, training=training)
        
        x = self.dense_out(x)
        return x
    
    def get_config(self):
        config = super().get_config()
        config.update({
            'input_shape': self._input_shape  # Directly use the input_shape passed during initialization
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)


In [7]:
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.losses import BinaryCrossentropy

def configure_losses_optimizers():
    gen_opt = Adam(learning_rate=G_OPTIMIZE_LEARNING_RATE)
    gen_loss = BinaryCrossentropy()
    
    dis_opt = Adam(learning_rate=D_OPTIMIZE_LEARNING_RATE)
    dis_loss = BinaryCrossentropy()

    return gen_opt, gen_loss, dis_opt, dis_loss

In [8]:
class RanPassGAN(tf.keras.Model): #(Model):
    def __init__(self, generator, discriminator, *args, **kwargs):
        # Pass through args and kwargs to base class 
        super().__init__(*args, **kwargs)
        
        # Create attributes for gen and disc
        self.generator = generator 
        self.discriminator = discriminator 

    def get_config(self):
        return {
            "generator_config": self.generator.get_config(),
            "discriminator_config": self.discriminator.get_config()
        }

    @classmethod
    def from_config(cls, config):
        # You might need to modify this depending on how your generator and discriminator are initialized
        generator = GeneratorLSTMv2.from_config(config['generator_config'])
        discriminator = DiscriminatorLSTMv2.from_config(config['discriminator_config'])
        
        return cls(generator=generator, discriminator=discriminator)

    def call(self, inputs, training=False):
        generated_passwords = self.generator(inputs, training=training)
        return generated_passwords
        
    def compile(self, g_opt, d_opt, g_loss, d_loss, *args, **kwargs): 
        # Compile with base class
        super().compile(*args, **kwargs)
        
        # Create attributes for losses and optimizers
        self.g_opt = g_opt
        self.d_opt = d_opt
        self.g_loss = g_loss
        self.d_loss = d_loss 
        
    def train_step(self, batch):
        # Get the data 
        real_passwords = batch
        batch_size = tf.shape(real_passwords)[0]  # Dynamically get the batch size

        # Generate noise for the generator
        noise = tf.random.normal([batch_size, 100])

        # Generate fake passwords using the generator
        fake_passwords = self.generator(noise, training=True)
        
        # Train the discriminator
        with tf.GradientTape() as d_tape: 
            # Pass the real and fake passwords to the discriminator model
            yhat_real = self.discriminator(real_passwords, training=True) 
            yhat_fake = self.discriminator(fake_passwords, training=True)
            yhat_realfake = tf.concat([yhat_real, yhat_fake], axis=0)

            # Create labels for real and fakes passwords
            y_realfake = tf.concat([tf.zeros_like(yhat_real), tf.ones_like(yhat_fake)], axis=0)
            
            # Calculate loss - BINARYCROSS 
            total_d_loss = self.d_loss(y_realfake, yhat_realfake)
            
        # Apply backpropagation - nn learn 
        dgrad = d_tape.gradient(total_d_loss, self.discriminator.trainable_variables) 
        self.d_opt.apply_gradients(zip(dgrad, self.discriminator.trainable_variables))
        
        # Train the generator 
        with tf.GradientTape() as g_tape: 
            # Generate some new passwords
            gen_passwords = self.generator(tf.random.normal((128, 100)), training=True)
                                        
            # Create the predicted labels
            predicted_labels = self.discriminator(gen_passwords, training=False)
                                        
            # Calculate loss - trick to training to fake out the discriminator
            total_g_loss = self.g_loss(tf.zeros_like(predicted_labels), predicted_labels) 
            
        # Apply backprop
        ggrad = g_tape.gradient(total_g_loss, self.generator.trainable_variables)
        self.g_opt.apply_gradients(zip(ggrad, self.generator.trainable_variables))
        
        return {"d_loss":total_d_loss, "g_loss":total_g_loss}

In [9]:
import os

import tensorflow as tf
import numpy as np

class ModelMonitor(tf.keras.callbacks.Callback):
    def __init__(self, num_passwords=10, latent_dim=100):
        self.num_passwords = num_passwords
        self.latent_dim = latent_dim
    
    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.uniform((self.num_passwords, self.latent_dim))
        generated_outputs = self.model.generator(random_latent_vectors)

        # Convert the generated softmax outputs into digits
        generated_passwords = [self.softmax_to_digit(output) for output in generated_outputs]

        results_file_path = os.path.join(TRAINING_SESSION_PATH, 'epoch_training_results.md')

        # Save to a file
        with open(results_file_path, 'a') as file:
            file.write(f"\n\n## Epoch {epoch} Results\n")
            for idx, password in enumerate(generated_passwords):
                password_str = ''.join(map(str, password))
                file.write(f"- Generated password {idx}: {password_str}\n")
                print(f"Epoch {epoch}: Generated password {idx}: {password_str}")

    def softmax_to_digit(self, softmax_output):
        return np.argmax(softmax_output, axis=-1)


In [10]:
import numpy as np

def softmax_to_digit(softmax_output):
    return np.argmax(softmax_output, axis=-1)

In [20]:
import os
import tensorflow as tf
from tensorflow.keras.models import load_model


# Number of passwords to generate
num_passwords = 100

# Noise dimension that your generator model expects
latent_dim = 100  # Example value

# Get the loss optimizers
gen_opt, gen_loss, dis_opt, dis_loss = configure_losses_optimizers()

# Load model with custom objects
custom_objects = {
    'RanPassGAN': RanPassGAN,
    'GeneratorLSTMv2': GeneratorLSTMv2,
    'DiscriminatorLSTMv2': DiscriminatorLSTMv2
}

# Load the generator model
model_path = 'resources/models/numeric_only/metal_12digit_pattern4_full_dataset_8_epochs/models/best_model_epoch_0008'
loaded_model = load_model(model_path, custom_objects=custom_objects, compile=False)
loaded_model.compile(gen_opt, dis_opt, gen_loss, dis_loss)

random_latent_vectors = tf.random.uniform((num_passwords, latent_dim))
generated_outputs = loaded_model.generator(random_latent_vectors)

# Convert the generated softmax outputs into digits
generated_passwords = [softmax_to_digit(output) for output in generated_outputs]

password_set = set()
for password in generated_passwords:
    password_str = ''.join(map(str, password))
    
# Print the generated passwords
for i, password in enumerate(generated_passwords):
    password_str = ''.join(map(str, password))
    password_set.add(password_str)

first_ten_passwords = list(password_set)[:10]
for i, password in enumerate(first_ten_passwords):
    print(f'Password {i + 1}: {password}')

print(f"\nTotal passwords generated: 100")
print(f"Total nunique passwords generated: {len(password_set)}")


Password 1: 745016455611
Password 2: 742011486611
Password 3: 985601450661
Password 4: 748615157611
Password 5: 788611657691
Password 6: 748605456611
Password 7: 748611657611
Password 8: 788601657611
Password 9: 742030455611
Password 10: 744018450611

Total passwords generated: 100
Total nunique passwords generated: 75


In [12]:
# import csv

# # The path to the CSV file where passwords will be saved
# csv_file_path = 'generated_passwords.csv'

# # Write the passwords to a CSV file
# with open(csv_file_path, mode='w', newline='') as file:
#     writer = csv.writer(file)
    
#     # If you want a header
#     writer.writerow(['Password'])
    
#     # Write passwords to the CSV file
#     for password in password_set:
#         writer.writerow([password])

# print(f"Passwords have been written to {csv_file_path}")


In [13]:
# import re

# pattern = re.compile(r'^.{3}1.{3}2.{3}3$')
# matches = []

# # Check each password in the set
# for password in password_set:
#     if pattern.fullmatch(password):
#         print(f"Password '{password}' matches the pattern.")

# if len(matches) == 0:
#     print("No matches found")

In [14]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# Assuming your discriminator model path is correct and the model is properly loaded
discriminator = loaded_model.discriminator

# The real password to test
real_password = '481180120293'

# Preprocess the real password using one-hot encoding
real_password_encoded = one_hot_encode(real_password)

generated_password = ''.join(map(str, softmax_to_digit(generated_outputs[0])))
generated_password_encoded = one_hot_encode(generated_password)

# Convert the encoded passwords to tensors and add a batch dimension
real_password_tensor = tf.convert_to_tensor([real_password_encoded])
generated_password_tensor = tf.convert_to_tensor([generated_password_encoded])

# Use the discriminator to predict the probability of the password being real
real_password_pred = discriminator.predict(real_password_tensor)
generated_password_pred = discriminator.predict(generated_password_tensor)

# Output the predictions
print(f"Real password: {real_password} - {real_password_pred[0][0]}")
print(f"Generated password: {generated_password} - {generated_password_pred[0][0]}")


Real password: 481180120293 - 0.49539658427238464
Generated password: 748611450661 - 0.4849720895290375


2023-12-03 21:41:59.425930: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


In [15]:
def one_hot_decode(encoded_password):
    # Split the encoded password into chunks of 10 (since there are 10 digits)
    digits = [encoded_password[i:i + 10] for i in range(0, len(encoded_password), 10)]
    # Find the index of the 1 in each chunk, which corresponds to the digit
    decoded_password = ''.join(str(digit.index(1)) for digit in digits)
    return decoded_password

In [16]:
import csv
import tensorflow as tf
from tensorflow.keras.models import load_model

# Assuming your discriminator model path is correct and the model is properly loaded
discriminator = loaded_model.discriminator

# Read in a csv file containing passwords, skip the header
csv_file_path = 'notebooks/demo/sample_digit12_patttern4.csv'
sample_passwords = []

with open(csv_file_path, mode='r') as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header
    for row in reader:
        # remove any whitespace
        sample_passwords.append(row[0].strip())

encoded_passwords = []
for password in sample_passwords:
    encoded_passwords.append(one_hot_encode(password))

results_encoded_passwords = []
for encoded_password in encoded_passwords:
    # Convert the encoded passwords to tensors and add a batch dimension
    password_tensor = tf.convert_to_tensor([encoded_password])
    
    # Use the discriminator to predict the probability of the password being real
    password_pred = discriminator.predict(password_tensor)

    # Output the predictions
    decoded_password = one_hot_decode(encoded_password)
    results_encoded_passwords.append((decoded_password, password_pred[0][0]))

print(f"Results: Pattern 4 Passwords:\n")
# limit to 10 results
for result in results_encoded_passwords[:10]:
    print(f"{result[0]} - {result[1]}")

average_probability = sum([result[1] for result in results_encoded_passwords]) / len(results_encoded_passwords)
print(f"\nPattern 4 Average: {average_probability}")

Results: Pattern 4 Passwords:

473142221343 - 0.48973944783210754
244169826023 - 0.49124255776405334
842167722903 - 0.47002866864204407
804127929213 - 0.5008504390716553
863180024843 - 0.4716152250766754
978178023473 - 0.49830108880996704
993112527413 - 0.5067145824432373
398198328603 - 0.48522236943244934
222163226293 - 0.4824694097042084
484119926733 - 0.4944850206375122

Pattern 4 Average: 0.4890901690721512


In [17]:
import csv
import tensorflow as tf
from tensorflow.keras.models import load_model

# Assuming your discriminator model path is correct and the model is properly loaded
discriminator = loaded_model.discriminator

# Read in a csv file containing passwords, skip the header
csv_file_path = 'notebooks/demo/sample_digit12_no_pattern.csv'
sample_passwords = []

with open(csv_file_path, mode='r') as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header
    for row in reader:
        # remove any whitespace
        sample_passwords.append(row[0].strip())

encoded_passwords = []
for password in sample_passwords:
    encoded_passwords.append(one_hot_encode(password))

results_encoded_passwords = []
for encoded_password in encoded_passwords:
    # Convert the encoded passwords to tensors and add a batch dimension
    password_tensor = tf.convert_to_tensor([encoded_password])
    
    # Use the discriminator to predict the probability of the password being real
    password_pred = discriminator.predict(password_tensor)

    # Output the predictions
    decoded_password = one_hot_decode(encoded_password)
    results_encoded_passwords.append((decoded_password, password_pred[0][0]))

print(f"Results: No Pattern Passwords:\n")
# limit to 10 results
for result in results_encoded_passwords[:10]:
    print(f"{result[0]} - {result[1]}")

average_probability = sum([result[1] for result in results_encoded_passwords]) / len(results_encoded_passwords)
print(f"\nNo Pattern: Average: {average_probability}")

Results: No Pattern Passwords:

524438557904 - 0.5114355087280273
130001030904 - 0.5134267210960388
367446784959 - 0.48637089133262634
858332515423 - 0.5034065246582031
791824174636 - 0.4870300889015198
377985523425 - 0.5030651092529297
044808903469 - 0.5099761486053467
817084116821 - 0.48423677682876587
660298373003 - 0.480099081993103
752893529378 - 0.49188610911369324

No Pattern: Average: 0.4956339582800865


In [18]:
print(f"Results: Pattern 4 Passwords:\n")
# limit to 10 results
for result in results_encoded_passwords[:10]:
    print(f"{result[0]} - {result[1]}")

average_probability = sum([result[1] for result in results_encoded_passwords]) / len(results_encoded_passwords)
print(f"\nPattern 4: Average Probability: {average_probability}")

print(f"\n")
print(f"Results: No Pattern Passwords:\n")
# limit to 10 results
for result in results_encoded_passwords[:10]:
    print(f"{result[0]} - {result[1]}")

average_probability = sum([result[1] for result in results_encoded_passwords]) / len(results_encoded_passwords)
print(f"\nNo Pattern: Average Probability: {average_probability}")

Results: Pattern 4 Passwords:

524438557904 - 0.5114355087280273
130001030904 - 0.5134267210960388
367446784959 - 0.48637089133262634
858332515423 - 0.5034065246582031
791824174636 - 0.4870300889015198
377985523425 - 0.5030651092529297
044808903469 - 0.5099761486053467
817084116821 - 0.48423677682876587
660298373003 - 0.480099081993103
752893529378 - 0.49188610911369324

Pattern 4: Average Probability: 0.4956339582800865


Results: No Pattern Passwords:

524438557904 - 0.5114355087280273
130001030904 - 0.5134267210960388
367446784959 - 0.48637089133262634
858332515423 - 0.5034065246582031
791824174636 - 0.4870300889015198
377985523425 - 0.5030651092529297
044808903469 - 0.5099761486053467
817084116821 - 0.48423677682876587
660298373003 - 0.480099081993103
752893529378 - 0.49188610911369324

No Pattern: Average Probability: 0.4956339582800865
