In [1]:
import itertools
import os
import tensorflow as tf
import os
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import TextVectorization, Input, Embedding, LSTM, Dense, Concatenate, Attention
from tensorflow.keras.models import Model
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

dataframe = pd.read_csv('pro_corpus.csv')
assert 'AAVE' in dataframe.columns and 'SAE' in dataframe.columns

# Preparing the dataset
aave_texts = dataframe['AAVE'].str.lower().tolist()
sae_texts = dataframe['SAE'].str.lower().tolist()

# Split the data into train and test sets
aave_train, aave_test, sae_train, sae_test = train_test_split(
    aave_texts, sae_texts, test_size=0.2, random_state=21)

# Convert the train and test data into TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices({
    'aave': aave_train,
    'sae': sae_train
})
test_dataset = tf.data.Dataset.from_tensor_slices({
    'aave': aave_test,
    'sae': sae_test
})

BUFFER_SIZE = len(aave_train)  # Use the size of the train dataset

# Adjust batch sizes
train_batch_size = 16
test_batch_size = 4

# Shuffle and batch the train dataset
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(train_batch_size, drop_remainder=True)

# Batch the test dataset
test_dataset = test_dataset.batch(test_batch_size, drop_remainder=True)

# Text Vectorization
aave_vectorization = TextVectorization(output_mode='int', output_sequence_length=100)
sae_vectorization = TextVectorization(output_mode='int', output_sequence_length=100)

aave_texts = train_dataset.map(lambda x: x['aave'])
sae_texts = train_dataset.map(lambda x: x['sae'])

aave_vectorization.adapt(aave_texts)
sae_vectorization.adapt(sae_texts)

aave_vocab_size = len(aave_vectorization.get_vocabulary())
sae_vocab_size = len(sae_vectorization.get_vocabulary())

# Model Parameters
embedding_dim = 256

# Define the ranges for each hyperparameter
patience_range = [5, 10, 15]
units_range = [512, 1024, 2048]
learning_rate_range = [0.0001, 0.001, 0.01]
activation_functions = ['relu', 'tanh']
dropout_rates = [0.2, 0.5, 0.7]
regularizers = [None, 'l2']
optimizers = ['adam', 'rmsprop', 'sgd']
# Add more hyperparameters as needed

# Create all combinations of hyperparameters
hyperparameter_combinations = list(itertools.product(
    patience_range, units_range, learning_rate_range, activation_functions, dropout_rates, regularizers, optimizers))

def build_model(units, activation, dropout, regularizer, learning_rate, optimizer):
    # Model architecture
    encoder_input = Input(shape=(None,), dtype='int64', name='encoder_input')
    encoder_embedding = Embedding(input_dim=aave_vocab_size, output_dim=embedding_dim)(encoder_input)

    # Apply regularizer if specified
    if regularizer == 'l2':
        lstm_regularizer = tf.keras.regularizers.l2(0.01)
    else:
        lstm_regularizer = None

    encoder_lstm = LSTM(units, return_state=True, return_sequences=True, dropout=dropout, recurrent_dropout=dropout, kernel_regularizer=lstm_regularizer)
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
    encoder_state = [state_h, state_c]

    decoder_input = Input(shape=(None,), dtype='int64', name='decoder_input')
    decoder_embedding_layer = Embedding(input_dim=sae_vocab_size, output_dim=embedding_dim)
    decoder_embedding = decoder_embedding_layer(decoder_input)
    decoder_lstm = LSTM(units, return_sequences=True, return_state=True, dropout=dropout, recurrent_dropout=dropout, kernel_regularizer=lstm_regularizer)
    decoder_lstm_output, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_state)

    attention_layer = Attention(use_scale=True)
    attention_output = attention_layer([decoder_lstm_output, encoder_outputs])

    decoder_concat_input = Concatenate(axis=-1)([decoder_lstm_output, attention_output])

    # Apply activation function if specified
    if activation:
        decoder_dense = Dense(sae_vocab_size, activation=activation)
    else:
        decoder_dense = Dense(sae_vocab_size, activation='softmax')

    decoder_output = decoder_dense(decoder_concat_input)

    model = Model([encoder_input, decoder_input], decoder_output)

    # Choose optimizer
    if optimizer == 'adam':
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

def calculate_bleu_score(model, dataset):
    bleu_scores = []
    smoothie = SmoothingFunction().method4 

    for batch in dataset:
        input_data, target_data = batch
        predictions = np.argmax(model.predict(input_data), axis=-1)

        for input_seq, pred, actual in zip(input_data['encoder_input'], predictions, target_data):
            input_sentence = [aave_vectorization.get_vocabulary()[i] for i in input_seq.numpy() if i != 0]
            pred_sentence = [sae_vectorization.get_vocabulary()[i] for i in pred if i != 0]
            actual_sentence = [sae_vectorization.get_vocabulary()[i] for i in actual.numpy() if i != 0]

            if len(pred_sentence) == 0 or len(actual_sentence) == 0:
                continue

            bleu_score = sentence_bleu([actual_sentence], pred_sentence, smoothing_function=smoothie)
            bleu_scores.append(bleu_score)

    average_bleu_score = np.mean(bleu_scores) if len(bleu_scores) > 0 else 0.0
    return average_bleu_score



def train_and_evaluate_model(patience, units, learning_rate, activation, dropout, regularizer, optimizer):
    # Build the model with the specified hyperparameters
    model = build_model(units, activation, dropout, regularizer, learning_rate, optimizer)

    # Early Stopping Callback
    early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=1)

    # Train the model with a fixed number of epochs (100) but use early stopping
    history = model.fit(train_dataset, epochs=100, validation_data=test_dataset, callbacks=[early_stopping_callback])

    # The epoch at which training stopped
    stopped_epoch = early_stopping_callback.stopped_epoch

    # Evaluate the model (e.g., calculate BLEU score)
    bleu_score = calculate_and_save_bleu(model, test_dataset)
    
    return bleu_score, model, stopped_epoch, history


results = []

# Iterate over all combinations
for combination in hyperparameter_combinations:
    patience, units, learning_rate, activation, dropout, regularizer, optimizer = combination
    bleu_score, trained_model, stopped_epoch = train_and_evaluate_model(patience, units, learning_rate, activation, dropout, regularizer, optimizer)
    
    # Save the model
    model_dir_name = f'model_patience{patience}_units{units}_lr{learning_rate}_act{activation}_dropout{dropout}_reg{regularizer}_opt{optimizer}'
    os.makedirs(model_dir_name, exist_ok=True)
    model_path = os.path.join(model_dir_name, 'model.h5')
    trained_model.save(model_path)
    
    # Save the loss curve
    plt.figure()
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    loss_curve_path = os.path.join(model_dir_name, 'loss_curve.png')
    plt.savefig(loss_curve_path)
    plt.close()
    
    # Store results
    results.append({
        'patience': patience, 'units': units, 'learning_rate': learning_rate, 'activation': activation, 
        'dropout': dropout, 'regularizer': regularizer, 'optimizer': optimizer, 'stopped_epoch': stopped_epoch,
        'bleu_score': bleu_score
    })

# Save results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv('hyperparameter_tuning_results.csv', index=False)

print("Hyperparameter tuning completed. Results saved to 'hyperparameter_tuning_results.csv'.")


2024-01-30 02:10:41.803257: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /N/soft/sles15/python/gnu/3.10.5/lib:/opt/cray/pe/gcc/11.2.0/snos/lib64:/opt/cray/pe/papi/6.0.0.17/lib64:/opt/cray/libfabric/1.11.0.4.111/lib64
2024-01-30 02:10:41.803301: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


KeyboardInterrupt: 