##### Imports

In [None]:
import sys
from pathlib import Path
import warnings

import warnings
import pandas as pd
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

import sys
# Custom library paths
sys.path.extend(['../', './scr'])

from scr.utils import set_seed
from scr.utils import read_words
from pathlib import Path
import random
from collections import Counter, defaultdict
import pickle
from tqdm import tqdm
from torch.utils.data import Dataset

from scr.utils import read_words, save_words_to_file

import pickle
from pathlib import Path
from scr.dataset import *
from scr.utils import *
# # For inference
from scr.feature_engineering import *
from scr.encoder import *
from scr.trainer_ import *
from scr.decoder import SimpleLSTM

import gc

set_seed(42)

import torch
import torch.nn as nn
from pathlib import Path
import random

from scr.utils import print_scenarios
torch.set_float32_matmul_precision('medium')
from pathlib import Path

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Read and Shuffle Word List
word_list = read_words('data/words_250000_train.txt') # , limit=10000)
# word_list = read_words('data/250k.txt', limit=10000)

random.shuffle(word_list)

# Calculate Frequencies and Max Word Length
word_frequencies = calculate_word_frequencies(word_list)
char_frequency = calculate_char_frequencies(word_list)
max_word_length = max(len(word) for word in word_list)

##### Data Dir

In [None]:
NUM_STRATIFIED_SAMPLES = 5_000 # This will be overwritten by Papermill

NUM_WORD_SAMPLE = 1_000 # words for testing

FAST_DEV_RUN = False

MAX_EPOCH = 250

In [None]:
from pathlib import Path
from scr.custom_sampler import *

# Define the base directory and the paths for training and validation parquet files
base_dataset_dir = Path("/media/sayem/510B93E12554BBD1/dataset/")
stratified_samples_dir = base_dataset_dir / str(NUM_STRATIFIED_SAMPLES)
parquet_path = stratified_samples_dir / 'parquets'

# Create directories for train and validation parquets if they don't exist
parquet_path.mkdir(parents=True, exist_ok=True)
# parquet_valid_path.mkdir(parents=True, exist_ok=True)

# Define and create the directory for models
models_dir = Path("/home/sayem/Desktop/Hangman/models")
models_dir.mkdir(parents=True, exist_ok=True)

# Define your output directory
# Define your output directory and logger directory
output_dir = Path("/home/sayem/Desktop/Hangman/training_outputs")
logger_dir = output_dir / "lightning_logs"

# Create the output and logger directories if they don't exist
output_dir.mkdir(parents=True, exist_ok=True)
logger_dir.mkdir(parents=True, exist_ok=True)

# Define the file path for saving the testing words
testing_words_file_path = stratified_samples_dir / "testing_words.txt"

try:
    testing_word_list = read_words(testing_words_file_path)
    print(f"Length of the testing word list: {len(testing_word_list)}")
    sampled_test_words = stratified_sample_by_length_and_uniqueness(testing_word_list, NUM_WORD_SAMPLE)
    print(f"Sampled {len(sampled_test_words)} unique words for testing.")
except FileNotFoundError:
    print(f"File not found: {testing_words_file_path}")

print(len(sampled_test_words))

##### Dataset Loading and train test split

In [None]:
# Create datasets directly from the saved parquet files
hangman_dataset = HangmanDataset(parquet_path)
# valid_dataset = HangmanDataset(parquet_valid_path)

from scr.utils import *

# Split the dataset into training and validation sets
train_dataset, valid_dataset = hangman_dataset.split(test_size=0.2)

In [None]:
def check_seq_len_distribution(dataset):
    distribution = {}
    for seq_len, indices in dataset.seq_len_index.items():
        distribution[seq_len] = len(indices)
    return distribution

def check_seq_len_distribution(dataset):
    distribution = {}
    for seq_len, indices in dataset.seq_len_index.items():
        distribution[seq_len] = len(indices)
    return distribution

# Check proportions
print(f"Total samples in original dataset: {len(hangman_dataset)}")
print(f"Samples in training dataset: {len(train_dataset)}")
print(f"Samples in validation dataset: {len(valid_dataset)}")

# Verify that the sum of train and validation samples equals the total samples
assert len(train_dataset) + len(valid_dataset) == len(hangman_dataset), "Mismatch in total sample count"

# Check sequence length distribution in each dataset
train_distribution = check_seq_len_distribution(train_dataset)
valid_distribution = check_seq_len_distribution(valid_dataset)

print("Training dataset sequence length distribution:", train_distribution)
print("Validation dataset sequence length distribution:", valid_distribution)

# Ensure each sequence length is represented in both datasets
assert set(train_distribution.keys()) == set(valid_distribution.keys()), "Mismatch in sequence length representation"

In [None]:
def count_unique_seq_lens(dataset):
    # Count the unique sequence lengths in the dataset
    unique_seq_lens = len(dataset.seq_len_index)
    return unique_seq_lens

train_unique_seq_lens = count_unique_seq_lens(train_dataset)
valid_unique_seq_lens = count_unique_seq_lens(valid_dataset)

print(f"Unique sequence lengths in training dataset: {train_unique_seq_lens}")
print(f"Unique sequence lengths in validation dataset: {valid_unique_seq_lens}")

##### Model Initialization

In [None]:
# Fixed parameters
max_word_length = 29  # Maximum word length
num_embeddings = 28   # Vocabulary size (fixed, based on unique characters in the game)
num_features = 159     # Number of features per character
missed_char_dim = 28  # Additional dimension for missed characters
output_dim = 28       # Output dimension for the model
char_feature_dim = 5  # Features per character
additional_state_features = num_features \
    - max_word_length * char_feature_dim   # Additional features per state


# Tunable hyperparameters
LEARNING_RATE = 0.0009279463391755308
HIDDEN_DIM = 32 ### 32
NUM_LAYERS = 1
EMBEDDING_DIM = 50
DROPOUT_PROB = 0.33

L1_FACTOR = 0.0005236754641745983
L2_FACTOR = 0.004375162823431166

OPTIMIZER = 'RMSprop'

print(f"Addition state fetatures: {additional_state_features}")
# Initialize the Encoder

encoder = Encoder(num_embeddings, EMBEDDING_DIM, max_word_length, \
    char_feature_dim, additional_state_features)

input_dim = max_word_length * EMBEDDING_DIM + additional_state_features
# Initialize the SimpleLSTM decoder
decoder = SimpleLSTM(input_dim=input_dim, 
                     hidden_dim=HIDDEN_DIM, 
                     output_dim=output_dim, 
                     num_layers=NUM_LAYERS,
                     missed_char_dim=missed_char_dim,
                     dropout_prob=DROPOUT_PROB)


# Initialize the HangmanModel
lightning_model = HangmanModel(encoder, decoder, 
                LEARNING_RATE, char_frequency, 
                max_word_length, optimizer_type=OPTIMIZER, \
                l1_factor=L1_FACTOR,
                l2_factor=L2_FACTOR, test_words=sampled_test_words)

# Save the model's state dictionary
untrained_model_file = models_dir / f"{NUM_STRATIFIED_SAMPLES}_untrained_model.pth"
torch.save(lightning_model, untrained_model_file)
print(f"Model saved at {untrained_model_file}")

##### Data Module

In [None]:
from scr.data_module import *

# Initialize Data Module
initial_batch_size = 128 # Set your initial batch size

# Initialize Data Module with the required arguments
data_module = HangmanDataModule(train_dataset, valid_dataset, 
                                initial_batch_size, 
                                custom_collate_fn)
                                # performance_metrics=None)

##### Tuning the Model

In [None]:
## callbacks
from pytorch_lightning.callbacks import Callback, EarlyStopping
from scr.custom_callbacks import *

# Setup EarlyStopping to monitor the test_win_rate
early_stop_callback = EarlyStopping(
    monitor='win_rate',
    min_delta=0.00,
    patience=15,
    verbose=True,
    mode='max'  # Maximize the win rate
)

step_level_early_stopping = StepLevelEarlyStopping(
    monitor='val_miss_penalty', # Metric to monitor
    min_delta=0.0,              # Minimum change to qualify as an improvement
    patience=5                  # Number of steps with no improvement after which training will be stopped
)

In [10]:
import optuna
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from pathlib import Path
import shutil  # For directory cleanup

# Assuming necessary imports from your custom modules and PyTorch Lightning are done here

# Clear previous runs
base_dir = Path("/home/sayem/Desktop/Hangman")
checkpoints_dir = base_dir / "checkpoints"

# Clear out the old checkpoints and logs before starting a new run
if checkpoints_dir.exists():
    shutil.rmtree(checkpoints_dir)  # Remove the directory and all its contents

# Recreate the checkpoints directory after clearing it
checkpoints_dir.mkdir(parents=True, exist_ok=True)

# Now, create the tb_logs_dir inside the newly created checkpoints_dir
tb_logs_dir = checkpoints_dir / "tb_logs" / "HangmanModel"
tb_logs_dir.mkdir(parents=True, exist_ok=True)  # Ensure this directory is created after the checkpoints_dir


# Define a callback function for Optuna
def trial_callback(study, trial):
    # This function will be called at the end of each trial
    print(f"Trial {trial.number} finished with value: {trial.value} and parameters: {trial.params}.")
    print(f" Best trial so far: Trial {study.best_trial.number}")
    print(f" Best win rate so far: {study.best_trial.value}")

def objective(trial):
    # Hyperparameters to be tuned by Optuna
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [128, 256, 512, 1024])
    hidden_dim = trial.suggest_categorical('hidden_dim', [16, 32, 64, 128])
    num_layers = trial.suggest_int('num_layers', 1, 5)
    embedding_dim = trial.suggest_categorical('embedding_dim', [20, 30, 40, 50])
    optimizer_type = trial.suggest_categorical('optimizer_type', ['Adam', 'AdamW', 'SGD', 'RMSprop'])
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)  # Suggesting dropout probability
    l1_factor = trial.suggest_float('l1_factor', 1e-5, 1e-1, log=True)  # Suggesting L1 regularization factor
    l2_factor = trial.suggest_float('l2_factor', 1e-5, 1e-1, log=True)  # Suggesting L2 regularization factor

    print(f"Starting trial with parameters:\n"
          f"  Learning Rate: {learning_rate}\n"
          f"  Batch Size: {batch_size}\n"
          f"  Hidden Dim: {hidden_dim}\n"
          f"  Num Layers: {num_layers}\n"
          f"  Embedding Dim: {embedding_dim}\n"
          f"  Optimizer Type: {optimizer_type}\n"
          f"  Dropout Prob: {dropout_prob}\n"
          f"  L1 Factor: {l1_factor}\n"
          f"  L2 Factor: {l2_factor}")  # Printing all tuned parameters

    # Update the batch size in the data module
    data_module.batch_size = batch_size

    # Re-instantiate the encoder and decoder with the new hyperparameters
    encoder = Encoder(num_embeddings, embedding_dim, max_word_length, char_feature_dim, additional_state_features)
    
    input_dim = max_word_length * embedding_dim + additional_state_features
    
    decoder = SimpleLSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim,
                        num_layers=num_layers, missed_char_dim=missed_char_dim, \
                        dropout_prob=dropout_prob)  # Using the dropout probability

    # Re-instantiate the model with the new encoder, decoder, learning rate, optimizer type, and regularization factors
    model = HangmanModel(encoder, decoder, learning_rate, char_frequency, max_word_length, 
                         optimizer_type=optimizer_type, l1_factor=l1_factor, l2_factor=l2_factor, 
                         test_words=sampled_test_words)

    # Set up PyTorch Lightning Trainer with Optuna integration
    logger = TensorBoardLogger(checkpoints_dir / "tb_logs", name="HangmanModel")
    
    checkpoint_callback = ModelCheckpoint(dirpath=checkpoints_dir, filename="best-checkpoint", 
                                          monitor="win_rate", mode="max", save_top_k=1)

    trainer = pl.Trainer(
        default_root_dir=checkpoints_dir,
        logger=logger,
        callbacks=[checkpoint_callback, early_stop_callback],
        max_epochs=MAX_EPOCH,
        log_every_n_steps=10,
        enable_progress_bar=True,
        fast_dev_run=False,
        num_sanity_val_steps=0,
        reload_dataloaders_every_n_epochs=1
    )

    # Fit the model
    trainer.fit(model, datamodule=data_module)

    # Return the best win rate recorded during training
    best_win_rate = checkpoint_callback.best_model_score.item() \
        if checkpoint_callback.best_model_score is not None else 0

    return best_win_rate
   
# Create an Optuna study with a name and optimize the objective function
study_name = f"NUM_STRATIFIED_SAMPLES_{NUM_STRATIFIED_SAMPLES}_HangmanModelTuning"

study = optuna.create_study(direction='maximize', study_name=study_name)
study.optimize(objective, n_trials=200, callbacks=[trial_callback])

# Output the best trial
best_trial = study.best_trial
print(f"Best trial: LR={best_trial.params['learning_rate']}, Batch Size={best_trial.params['batch_size']}")
print(f"Best win rate: {best_trial.value}")

# Save the study results for further analysis
# Construct the filename with the NUM_STRATIFIED_SAMPLES prefix
filename = f"NUM_STRATIFIED_SAMPLES_{NUM_STRATIFIED_SAMPLES}_optuna_study_results.csv"

# Save the study results to the specified file
study.trials_dataframe().to_csv(checkpoints_dir / filename)

# # Load the best model - This needs to be done after the study has concluded
# best_model_path = checkpoint_callback.best_model_path
# best_model = HangmanModel.load_from_checkpoint(checkpoint_path=best_model_path)

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 156 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-32-05.png


[I 2024-02-02 06:32:06,214] Trial 141 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.006961665862633315, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.13256795857046738, 'l1_factor': 2.518264388598221e-05, 'l2_factor': 4.93459768183141e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 1.6 M 
---------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.216     Total estimated model params size (MB)


Trial 141 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.006961665862633315, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.13256795857046738, 'l1_factor': 2.518264388598221e-05, 'l2_factor': 4.93459768183141e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.009861870777627738
  Batch Size: 1024
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 20
  Optimizer Type: RMSprop
  Dropout Prob: 0.11606118560956237
  L1 Factor: 4.444968331262646e-05
  L2 Factor: 1.9395885665362768e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 157 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-34-40.png


[I 2024-02-02 06:34:40,915] Trial 142 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.009861870777627738, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.11606118560956237, 'l1_factor': 4.444968331262646e-05, 'l2_factor': 1.9395885665362768e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 763 K 
---------------------------------------
763 K     Trainable params
0         Non-trainable params
763 K     Total params
3.054     Total estimated model params size (MB)


Trial 142 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.009861870777627738, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.11606118560956237, 'l1_factor': 4.444968331262646e-05, 'l2_factor': 1.9395885665362768e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.016433788579504584
  Batch Size: 1024
  Hidden Dim: 128
  Num Layers: 1
  Embedding Dim: 20
  Optimizer Type: RMSprop
  Dropout Prob: 0.09650337803453213
  L1 Factor: 3.8544596680492025e-05
  L2 Factor: 8.996839502384515e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 158 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-37-16.png


[I 2024-02-02 06:37:16,404] Trial 143 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.016433788579504584, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.09650337803453213, 'l1_factor': 3.8544596680492025e-05, 'l2_factor': 8.996839502384515e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 763 K 
---------------------------------------
763 K     Trainable params
0         Non-trainable params
763 K     Total params
3.054     Total estimated model params size (MB)


Trial 143 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.016433788579504584, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.09650337803453213, 'l1_factor': 3.8544596680492025e-05, 'l2_factor': 8.996839502384515e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.002243884800599902
  Batch Size: 1024
  Hidden Dim: 128
  Num Layers: 1
  Embedding Dim: 20
  Optimizer Type: RMSprop
  Dropout Prob: 0.15136252557107513
  L1 Factor: 2.9418741020350796e-05
  L2 Factor: 6.1126158639485e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 159 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-39-50.png


[I 2024-02-02 06:39:50,835] Trial 144 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.002243884800599902, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.15136252557107513, 'l1_factor': 2.9418741020350796e-05, 'l2_factor': 6.1126158639485e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 144 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.002243884800599902, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.15136252557107513, 'l1_factor': 2.9418741020350796e-05, 'l2_factor': 6.1126158639485e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.013496819460804391
  Batch Size: 1024
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: RMSprop
  Dropout Prob: 0.43162515637414445
  L1 Factor: 1.73905666322033e-05
  L2 Factor: 0.00018467231429204943


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 160 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-42-28.png


[I 2024-02-02 06:42:28,737] Trial 145 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.013496819460804391, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.43162515637414445, 'l1_factor': 1.73905666322033e-05, 'l2_factor': 0.00018467231429204943}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.1 K 
1 | decoder | SimpleLSTM | 645 K 
---------------------------------------
646 K     Trainable params
0         Non-trainable params
646 K     Total params
2.587     Total estimated model params size (MB)


Trial 145 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.013496819460804391, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.43162515637414445, 'l1_factor': 1.73905666322033e-05, 'l2_factor': 0.00018467231429204943}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.00813777659610658
  Batch Size: 256
  Hidden Dim: 64
  Num Layers: 1
  Embedding Dim: 40
  Optimizer Type: RMSprop
  Dropout Prob: 0.3526467562686977
  L1 Factor: 8.196313363213585e-05
  L2 Factor: 0.00024212342805287248


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 161 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-44-56.png


[I 2024-02-02 06:44:56,448] Trial 146 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.00813777659610658, 'batch_size': 256, 'hidden_dim': 64, 'num_layers': 1, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.3526467562686977, 'l1_factor': 8.196313363213585e-05, 'l2_factor': 0.00024212342805287248}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 1.6 M 
---------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.216     Total estimated model params size (MB)


Trial 146 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.00813777659610658, 'batch_size': 256, 'hidden_dim': 64, 'num_layers': 1, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.3526467562686977, 'l1_factor': 8.196313363213585e-05, 'l2_factor': 0.00024212342805287248}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.005727872353789356
  Batch Size: 128
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 20
  Optimizer Type: SGD
  Dropout Prob: 0.3908173510998916
  L1 Factor: 4.997095303930643e-05
  L2 Factor: 0.0006949562898050915


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 162 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-47-25.png


[I 2024-02-02 06:47:26,282] Trial 147 finished with value: 0.8294931054115295 and parameters: {'learning_rate': 0.005727872353789356, 'batch_size': 128, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 20, 'optimizer_type': 'SGD', 'dropout_prob': 0.3908173510998916, 'l1_factor': 4.997095303930643e-05, 'l2_factor': 0.0006949562898050915}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 439 K 
---------------------------------------
440 K     Trainable params
0         Non-trainable params
440 K     Total params
1.762     Total estimated model params size (MB)


Trial 147 finished with value: 0.8294931054115295 and parameters: {'learning_rate': 0.005727872353789356, 'batch_size': 128, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 20, 'optimizer_type': 'SGD', 'dropout_prob': 0.3908173510998916, 'l1_factor': 4.997095303930643e-05, 'l2_factor': 0.0006949562898050915}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.0029038128093195927
  Batch Size: 256
  Hidden Dim: 32
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: Adam
  Dropout Prob: 0.07798726822833671
  L1 Factor: 1.2142904689905577e-05
  L2 Factor: 3.828907906429955e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 163 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-50-00.png


[I 2024-02-02 06:50:00,385] Trial 148 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.0029038128093195927, 'batch_size': 256, 'hidden_dim': 32, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.07798726822833671, 'l1_factor': 1.2142904689905577e-05, 'l2_factor': 3.828907906429955e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.1 K 
1 | decoder | SimpleLSTM | 168 K 
---------------------------------------
169 K     Trainable params
0         Non-trainable params
169 K     Total params
0.677     Total estimated model params size (MB)


Trial 148 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.0029038128093195927, 'batch_size': 256, 'hidden_dim': 32, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.07798726822833671, 'l1_factor': 1.2142904689905577e-05, 'l2_factor': 3.828907906429955e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.012120767613563633
  Batch Size: 256
  Hidden Dim: 16
  Num Layers: 3
  Embedding Dim: 40
  Optimizer Type: RMSprop
  Dropout Prob: 0.10538622870274797
  L1 Factor: 2.07135930628135e-05
  L2 Factor: 0.0003711937797578068


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 164 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-54-02.png


[I 2024-02-02 06:54:02,769] Trial 149 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.012120767613563633, 'batch_size': 256, 'hidden_dim': 16, 'num_layers': 3, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.10538622870274797, 'l1_factor': 2.07135930628135e-05, 'l2_factor': 0.0003711937797578068}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 348 K 
---------------------------------------
349 K     Trainable params
0         Non-trainable params
349 K     Total params
1.397     Total estimated model params size (MB)


Trial 149 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.012120767613563633, 'batch_size': 256, 'hidden_dim': 16, 'num_layers': 3, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.10538622870274797, 'l1_factor': 2.07135930628135e-05, 'l2_factor': 0.0003711937797578068}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.023967637614203337
  Batch Size: 1024
  Hidden Dim: 64
  Num Layers: 1
  Embedding Dim: 20
  Optimizer Type: AdamW
  Dropout Prob: 0.12435262540885553
  L1 Factor: 6.603664641188004e-05
  L2 Factor: 0.00011200492593637298


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 165 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_06-57-56.png


[I 2024-02-02 06:57:57,129] Trial 150 finished with value: 0.921658992767334 and parameters: {'learning_rate': 0.023967637614203337, 'batch_size': 1024, 'hidden_dim': 64, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'AdamW', 'dropout_prob': 0.12435262540885553, 'l1_factor': 6.603664641188004e-05, 'l2_factor': 0.00011200492593637298}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 150 finished with value: 0.921658992767334 and parameters: {'learning_rate': 0.023967637614203337, 'batch_size': 1024, 'hidden_dim': 64, 'num_layers': 1, 'embedding_dim': 20, 'optimizer_type': 'AdamW', 'dropout_prob': 0.12435262540885553, 'l1_factor': 6.603664641188004e-05, 'l2_factor': 0.00011200492593637298}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.002514129320410215
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: Adam
  Dropout Prob: 0.4100768937629882
  L1 Factor: 1.580519424157579e-05
  L2 Factor: 1.6503427150818986e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 166 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-00-25.png


[I 2024-02-02 07:00:26,337] Trial 151 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.002514129320410215, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.4100768937629882, 'l1_factor': 1.580519424157579e-05, 'l2_factor': 1.6503427150818986e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 151 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.002514129320410215, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.4100768937629882, 'l1_factor': 1.580519424157579e-05, 'l2_factor': 1.6503427150818986e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.0014305257429964551
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: Adam
  Dropout Prob: 0.1591596742151849
  L1 Factor: 2.1246305294430046e-05
  L2 Factor: 3.21089830578222e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 167 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-02-54.png


[I 2024-02-02 07:02:54,811] Trial 152 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.0014305257429964551, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.1591596742151849, 'l1_factor': 2.1246305294430046e-05, 'l2_factor': 3.21089830578222e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 152 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.0014305257429964551, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.1591596742151849, 'l1_factor': 2.1246305294430046e-05, 'l2_factor': 3.21089830578222e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.00354626128196008
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: Adam
  Dropout Prob: 0.37633392594915166
  L1 Factor: 2.784984110143471e-05
  L2 Factor: 4.2956830825391964e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 168 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-05-22.png


[I 2024-02-02 07:05:23,228] Trial 153 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.00354626128196008, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.37633392594915166, 'l1_factor': 2.784984110143471e-05, 'l2_factor': 4.2956830825391964e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 153 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.00354626128196008, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.37633392594915166, 'l1_factor': 2.784984110143471e-05, 'l2_factor': 4.2956830825391964e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.004678119915923383
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: Adam
  Dropout Prob: 0.14882522088090702
  L1 Factor: 3.309097748209895e-05
  L2 Factor: 2.970936233245522e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 169 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-07-52.png


[I 2024-02-02 07:07:53,647] Trial 154 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.004678119915923383, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.14882522088090702, 'l1_factor': 3.309097748209895e-05, 'l2_factor': 2.970936233245522e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 2.4 M 
---------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.783     Total estimated model params size (MB)


Trial 154 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.004678119915923383, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'Adam', 'dropout_prob': 0.14882522088090702, 'l1_factor': 3.309097748209895e-05, 'l2_factor': 2.970936233245522e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.001969731500073775
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 50
  Optimizer Type: RMSprop
  Dropout Prob: 0.1774886555971204
  L1 Factor: 9.512580518373897e-05
  L2 Factor: 0.0001559182182350699


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 170 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-10-25.png


[I 2024-02-02 07:10:26,264] Trial 155 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.001969731500073775, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.1774886555971204, 'l1_factor': 9.512580518373897e-05, 'l2_factor': 0.0001559182182350699}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.1 K 
1 | decoder | SimpleLSTM | 389 K 
---------------------------------------
391 K     Trainable params
0         Non-trainable params
391 K     Total params
1.564     Total estimated model params size (MB)


Trial 155 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.001969731500073775, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.1774886555971204, 'l1_factor': 9.512580518373897e-05, 'l2_factor': 0.0001559182182350699}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.003172585540835048
  Batch Size: 128
  Hidden Dim: 32
  Num Layers: 4
  Embedding Dim: 40
  Optimizer Type: Adam
  Dropout Prob: 0.13693105273458522
  L1 Factor: 5.617854515264804e-05
  L2 Factor: 2.1644183461633463e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 171 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-12-56.png


[I 2024-02-02 07:12:57,171] Trial 156 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.003172585540835048, 'batch_size': 128, 'hidden_dim': 32, 'num_layers': 4, 'embedding_dim': 40, 'optimizer_type': 'Adam', 'dropout_prob': 0.13693105273458522, 'l1_factor': 5.617854515264804e-05, 'l2_factor': 2.1644183461633463e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.4 K 
1 | decoder | SimpleLSTM | 1.1 M 
---------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.374     Total estimated model params size (MB)


Trial 156 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.003172585540835048, 'batch_size': 128, 'hidden_dim': 32, 'num_layers': 4, 'embedding_dim': 40, 'optimizer_type': 'Adam', 'dropout_prob': 0.13693105273458522, 'l1_factor': 5.617854515264804e-05, 'l2_factor': 2.1644183461633463e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.006308802996640763
  Batch Size: 512
  Hidden Dim: 64
  Num Layers: 4
  Embedding Dim: 50
  Optimizer Type: RMSprop
  Dropout Prob: 0.45496297036116434
  L1 Factor: 3.6743857646055286e-05
  L2 Factor: 1.0811148898415757e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 172 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-15-31.png


[I 2024-02-02 07:15:31,780] Trial 157 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.006308802996640763, 'batch_size': 512, 'hidden_dim': 64, 'num_layers': 4, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.45496297036116434, 'l1_factor': 3.6743857646055286e-05, 'l2_factor': 1.0811148898415757e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 1.1 K 
1 | decoder | SimpleLSTM | 2.1 M 
---------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.594     Total estimated model params size (MB)


Trial 157 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.006308802996640763, 'batch_size': 512, 'hidden_dim': 64, 'num_layers': 4, 'embedding_dim': 50, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.45496297036116434, 'l1_factor': 3.6743857646055286e-05, 'l2_factor': 1.0811148898415757e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.004009330376139733
  Batch Size: 256
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 40
  Optimizer Type: RMSprop
  Dropout Prob: 0.1878832959645087
  L1 Factor: 1.0181800909269853e-05
  L2 Factor: 0.00021203628596276004


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 173 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-18-01.png


[I 2024-02-02 07:18:02,162] Trial 158 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.004009330376139733, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.1878832959645087, 'l1_factor': 1.0181800909269853e-05, 'l2_factor': 0.00021203628596276004}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 840   
1 | decoder | SimpleLSTM | 1.9 M 
---------------------------------------
1.9 M     Trainable params
0         Non-trainable params
1.9 M     Total params
7.405     Total estimated model params size (MB)


Trial 158 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.004009330376139733, 'batch_size': 256, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 40, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.1878832959645087, 'l1_factor': 1.0181800909269853e-05, 'l2_factor': 0.00021203628596276004}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.019865407341374063
  Batch Size: 1024
  Hidden Dim: 128
  Num Layers: 3
  Embedding Dim: 30
  Optimizer Type: Adam
  Dropout Prob: 0.3212800524717899
  L1 Factor: 7.899445155744692e-05
  L2 Factor: 1.4133709403945858e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 174 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-20-35.png


[I 2024-02-02 07:20:36,432] Trial 159 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.019865407341374063, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 30, 'optimizer_type': 'Adam', 'dropout_prob': 0.3212800524717899, 'l1_factor': 7.899445155744692e-05, 'l2_factor': 1.4133709403945858e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 746 K 
---------------------------------------
746 K     Trainable params
0         Non-trainable params
746 K     Total params
2.987     Total estimated model params size (MB)


Trial 159 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.019865407341374063, 'batch_size': 1024, 'hidden_dim': 128, 'num_layers': 3, 'embedding_dim': 30, 'optimizer_type': 'Adam', 'dropout_prob': 0.3212800524717899, 'l1_factor': 7.899445155744692e-05, 'l2_factor': 1.4133709403945858e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.008868173101356784
  Batch Size: 256
  Hidden Dim: 64
  Num Layers: 5
  Embedding Dim: 20
  Optimizer Type: RMSprop
  Dropout Prob: 0.3930979177879184
  L1 Factor: 4.590635769329991e-05
  L2 Factor: 2.6975362858689393e-05


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Processing words:   0%|          | 0/1085 [00:00<?, ?word/s]

Monitored metric win_rate did not improve in the last 175 records. Best score: 16.774. Signaling Trainer to stop.


Plot saved to /home/sayem/Desktop/Hangman/plots/win_rates_plot_epoch_0_2024-02-02_07-23-08.png


[I 2024-02-02 07:23:08,716] Trial 160 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.008868173101356784, 'batch_size': 256, 'hidden_dim': 64, 'num_layers': 5, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.3930979177879184, 'l1_factor': 4.590635769329991e-05, 'l2_factor': 2.6975362858689393e-05}. Best is trial 0 with value: 16.774192810058594.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Encoder    | 560   
1 | decoder | SimpleLSTM | 348 K 
---------------------------------------
349 K     Trainable params
0         Non-trainable params
349 K     Total params
1.397     Total estimated model params size (MB)


Trial 160 finished with value: 16.774192810058594 and parameters: {'learning_rate': 0.008868173101356784, 'batch_size': 256, 'hidden_dim': 64, 'num_layers': 5, 'embedding_dim': 20, 'optimizer_type': 'RMSprop', 'dropout_prob': 0.3930979177879184, 'l1_factor': 4.590635769329991e-05, 'l2_factor': 2.6975362858689393e-05}.
 Best trial so far: Trial 0
 Best win rate so far: 16.774192810058594
Starting trial with parameters:
  Learning Rate: 0.08544684954294562
  Batch Size: 1024
  Hidden Dim: 64
  Num Layers: 1
  Embedding Dim: 20
  Optimizer Type: RMSprop
  Dropout Prob: 0.04702646986418184
  L1 Factor: 2.4462965514827083e-05
  L2 Factor: 1.9355622975918024e-05


Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
STOP

In [None]:
from pytorch_lightning.profilers import SimpleProfiler
import pytorch_lightning as pl
from scr.custom_callbacks import *
from scr.dataset import *
from scr.data_module import *
from scr.trainer_ import *

torch.cuda.empty_cache()

# # Create Callbacks
# loss_logging_callback = LossLoggingCallback()

# , SchedulerSetupCallback()] # , loss_logging_callback]
callbacks = [early_stop_callback, step_level_early_stopping] 
FAST_DEV_RUN = False
# # Calculate the minimum percentage of validation batches
# min_val_batches = 1 / len(data_module.val_dataloader())

# # Create Trainer with Callbacks
trainer = pl.Trainer(
    default_root_dir=output_dir,
    fast_dev_run=FAST_DEV_RUN, 
    max_epochs=MAX_EPOCH, 
    callbacks=callbacks,
    num_sanity_val_steps=0,
    reload_dataloaders_every_n_epochs=1,
    enable_progress_bar=True 
    # val_check_interval=0.5
    # limit_train_batches=2,  # Limit the number of training batches to 2
    # limit_val_batches=2     # Limit the number of validation batches to 2
)

# # # # # print(f"Running for {NUM_STRATIFIED_SAMPLES} samples...")
# # # print()
# # # # # # # Assuming combined_eval_metrics is a list of dictionaries
# combined_eval_metrics = trainer.validate(model=lightning_model, datamodule=data_module)

# # performance_metrics_dict = {k: v for d in combined_eval_metrics for k, v in d.items()}

# # # # # # print("Converted Performance Metrics Dictb ionary:", performance_metrics_dict)

# # trainer.datamodule.update_performance_metrics(combined_eval_metrics)

# data_module.update_performance_metrics(performance_metrics_dict)


# # Define the objective function for Optuna
# def objective(trial):
#     # Hyperparameters to be tuned by Optuna
#     learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
#     batch_size = trial.suggest_categorical('batch_size', [128, 256, 512, 1024])
#     hidden_dim = trial.suggest_categorical('hidden_dim', [16, 32, 64, 128])
#     num_layers = trial.suggest_int('num_layers', 1, 5)
#     embedding_dim = trial.suggest_categorical('embedding_dim', [20, 30, 40, 50])

#     print(f"Starting trial with parameters:\n"
#         f"  Learning Rate: {learning_rate}\n"
#         f"  Batch Size: {batch_size}\n"
#         f"  Hidden Dim: {hidden_dim}\n"
#         f"  Num Layers: {num_layers}\n"
#         f"  Embedding Dim: {embedding_dim}")

#     # Update the batch size in the data module
#     data_module.batch_size = batch_size

#     # Re-instantiate the encoder and decoder with the new hyperparameters
#     encoder = Encoder(num_embeddings, embedding_dim, \
#         max_word_length, char_feature_dim, additional_state_features)

#     input_dim = max_word_length * embedding_dim + additional_state_features

#     decoder = SimpleLSTM(input_dim=input_dim, hidden_dim=hidden_dim, \
#             output_dim=output_dim, num_layers=num_layers, \
#             missed_char_dim=missed_char_dim)

#     # Re-instantiate the model with the new encoder, decoder, and learning rate
#     model = HangmanModel(encoder, decoder, learning_rate, char_frequency, \
#         max_word_length, test_words=sampled_test_words)

#     # Set up PyTorch Lightning Trainer with Optuna integration
#     logger = TensorBoardLogger(checkpoints_dir / "tb_logs", name="HangmanModel")
#     checkpoint_callback = ModelCheckpoint(dirpath=checkpoints_dir, \
#         filename="best-checkpoint", monitor="win_rate", mode="max", save_top_k=1)

#     trainer = pl.Trainer(
#         default_root_dir=checkpoints_dir,
#         logger=logger,
#         callbacks=[checkpoint_callback],
#         max_epochs=MAX_EPOCH,
#         log_every_n_steps=10,
#         enable_progress_bar=True,
#         fast_dev_run=False,
#         num_sanity_val_steps=0,
#         reload_dataloaders_every_n_epochs=1
#     )

#     # Fit the model
#     trainer.fit(model, datamodule=data_module)

#     # Return the best win rate recorded during training
#     best_win_rate = checkpoint_callback.best_model_score.item() \
#         if checkpoint_callback.best_model_score is not None else 0

#     return best_win_rate


##### Tuning: lr

In [None]:
# from pytorch_lightning.tuner.tuning import Tuner

# # Assuming lightning_model, train_loader, and val_loader are already defined
# # Initialize the tuner with your trainer
# tuner = Tuner(trainer)

# # Run the learning rate finder using the data module
# lr_finder = tuner.lr_find(model=lightning_model, 
#                         datamodule=data_module)

# # Plot the learning rate finder results
# fig = lr_finder.plot(suggest=True)
# fig.show()

# # Get the suggested learning rate
# new_lr = lr_finder.suggestion()
# print(f"Suggested Learning Rate: {new_lr}")

# # Update model's learning rate
# lightning_model.learning_rate = 0.017378008287493765 # new_lr

# # Optionally, you can view the results of the LR finder
# print(lr_finder.results)

In [None]:
# Update model's learning rate
lightning_model.learning_rate = 0.017378008287493765 # new_lr

##### Tuning: Batch

In [None]:
# # Assuming lightning_model is already defined
# new_batch_size = tuner.scale_batch_size(
#     model=lightning_model,
#     datamodule=data_module,
#     mode='power',  # or 'binsearch'
#     steps_per_trial=10,
#     init_val=64,
#     max_trials=4
# )

# Update the batch size in the data module
data_module.batch_size = 1024 # new_batch_size

# print(f"Tune Batch size: ", new_batch_size)

##### Training

In [None]:
# # # # # Validate the model (if needed)
# trainer.validate(model=lightning_model, datamodule=data_module)
print(f"Training Begin for {NUM_STRATIFIED_SAMPLES} words: {len(train_dataset)} Games")
# # # # # # Fit the model
trainer.fit(lightning_model, data_module)

# # Optionally print the profiler summary
# # print(profiler.summary())

# # Save the entire model
# trained_model_file = models_dir / f"{NUM_STRATIFIED_SAMPLES}_trained_model.pth"
# torch.save(lightning_model, trained_model_file)
# print(f"Model saved at {trained_model_file}")

In [None]:
# Save the entire model
trained_model_file = models_dir / f"{NUM_STRATIFIED_SAMPLES}_trained_model.pth"
torch.save(lightning_model, trained_model_file)
print(f"Model saved at {trained_model_file}")

##### Testing

In [None]:
# Finding the maximum word length in the list
max_word_length_in_list = max(len(word) for word in sampled_test_words)

max_word_length_in_list

In [None]:
# # Load the entire LSTM model object
# untrained_model_file_path = models_dir / f"{NUM_STRATIFIED_SAMPLES}_trained_model.pth"

untrained_model = torch.load(untrained_model_file)

# Example usage
result = play_games_and_calculate_stats(untrained_model, \
    sampled_test_words, char_frequency, max_word_length)

print(f"Untrained model performence: {result['overall_win_rate']} % win rate")

performance_metrics = result['length_wise_stats']

# print(performance_metrics)

from scr.utils import *

plot_hangman_stats(performance_metrics)

for length, data in result["length_wise_stats"].items():
    print(f"Length {length}: Win Rate: {data['win_rate']}%, Average Attempts: {data['average_attempts_used']}")

In [None]:
# trained_model_file_path = models_dir / f"{NUM_STRATIFIED_SAMPLES}_trained_model.pth"
# trained_model = torch.load(trained_model_file_path)
# print(type(trained_model))

In [None]:
# # Load the entire LSTM model object

# trained_model_file_path = models_dir / f"{NUM_STRATIFIED_SAMPLES}_trained_model.pth"
# trained_model = torch.load(trained_model_file_path)

# # # If you want to use the model for inference
# # trained_model.eval()  # Set the model to evaluation mode

# from scr.game import *

# word = 'may'

# play_game_with_a_word(trained_model, \
#     word, char_frequency, max_word_length)

In [None]:
# # Example usage
# result = play_games_and_calculate_stats(trained_model, \
#     sampled_test_words, char_frequency, max_word_length)

# print(f"Overall Win Rate: {result['overall_win_rate']}%, Overall Average Attempts: {result['overall_avg_attempts']}")

# # for length, data in result["length_wise_stats"].items():
# #     print(f"Length {length}: Win Rate: {data['win_rate']}%, Average Attempts: {data['average_attempts_used']}")

In [None]:
performance_metrics = result['length_wise_stats']

plot_hangman_stats(performance_metrics)

In [None]:
performance_metrics