In [1]:
!ls

new_ai_player.py  sample_data  words.txt


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
import time
from new_ai_player import HangmanLSTM

def generate_training_data(word_list, num_samples, max_word_length=10):
    """Generate training data based on word list."""
    print("Generating training data...")
    data = []
    for _ in range(num_samples):
        word = random.choice(word_list)
        word_letters = set(word)
        guessed_letters = set()
        obscured_word = ['_' for _ in word]

        while len(guessed_letters) < len(word_letters):
            next_letter = random.choice(list(word_letters - guessed_letters))
            guessed_letters.add(next_letter)

            for i, char in enumerate(word):
                if char in guessed_letters:
                    obscured_word[i] = char

            word_input = encode_word_state(''.join(obscured_word), max_word_length)
            guessed_input = encode_guessed_letters(guessed_letters)
            target_letter = ord(next_letter) - ord('a')

            if '_' in ''.join(obscured_word):
                for _ in range(3):  # Repeat to balance the dataset
                    data.append((word_input, guessed_input, target_letter))
            else:
                data.append((word_input, guessed_input, target_letter))
    print(f"Generated {len(data)} training samples.")
    return data

def encode_word_state(word_display, max_word_length):
    """Encode word state as a one-hot matrix."""
    word_vector = np.zeros((max_word_length, 27))  # 27: 26 letters + 1 for '_'
    for i, char in enumerate(word_display[:max_word_length]):
        if char == '_':
            word_vector[i, 26] = 1  # Represent blanks as the 27th feature
        elif 'a' <= char <= 'z':
            word_vector[i, ord(char) - ord('a')] = 1
    return word_vector

def encode_guessed_letters(guessed_letters):
    """Encode guessed letters as a one-hot vector."""
    guessed_vector = np.zeros(26)
    for letter in guessed_letters:
        guessed_vector[ord(letter) - ord('a')] = 1
    return guessed_vector

def train_model(word_list, model_path='large_hangman_model_normal_parallel.pth', num_samples=10000, epochs=25, batch_size=32, lr=0.001):
    """Train the HangmanLSTM model."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Generate training data
    print("Generating training data...")
    start_time_data = time.time()
    data = generate_training_data(word_list, num_samples)
    end_time_data = time.time()
    print(f"Data generation took {end_time_data - start_time_data:.2f} seconds.")

    # Prepare tensors
    print("Preparing data tensors...")
    inputs_word = torch.tensor(np.array([item[0] for item in data]), dtype=torch.float32)
    inputs_guessed = torch.tensor(np.array([item[1] for item in data]), dtype=torch.float32)
    targets = torch.tensor(np.array([item[2] for item in data]), dtype=torch.long)

    # Create DataLoader
    print("Creating DataLoader...")
    dataset = torch.utils.data.TensorDataset(inputs_word, inputs_guessed, targets)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    # Initialize model
    model = HangmanLSTM().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    print("Starting training loop...")
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for word_batch, guessed_batch, target_batch in dataloader:
            word_batch = word_batch.to(device)
            guessed_batch = guessed_batch.to(device)
            target_batch = target_batch.to(device)

            optimizer.zero_grad()
            outputs = model(word_batch, guessed_batch)
            loss = criterion(outputs, target_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

    # Save the trained model
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

if __name__ == "__main__":
    # Load the word list from file
    words_file = "words.txt"
    try:
        with open(words_file, 'r') as f:
            word_list = [line.strip().lower() for line in f if line.strip()]
        print(f"Loaded {len(word_list)} words from {words_file}.")
    except FileNotFoundError:
        print(f"Error: {words_file} not found.")
        exit(1)

    # Train the model
    print("Starting non-parallel training...")
    start_time = time.time()
    train_model(word_list, model_path="large_hangman_model_normal_parallel.pth", num_samples=20000, epochs=25, batch_size=32, lr=0.001)
    end_time = time.time()
    print(f"Non-parallel training completed in {end_time - start_time:.2f} seconds.")


Loaded 852 words from words.txt.
Starting non-parallel training...
Using device: cuda
Generating training data...
Generating training data...
Generated 245249 training samples.
Data generation took 0.74 seconds.
Preparing data tensors...
Creating DataLoader...
Starting training loop...
Epoch 1/25, Loss: 2.9377
Epoch 2/25, Loss: 2.8326
Epoch 3/25, Loss: 2.8113
Epoch 4/25, Loss: 2.8065
Epoch 5/25, Loss: 2.8026
Epoch 6/25, Loss: 2.7997
Epoch 7/25, Loss: 2.7980
Epoch 8/25, Loss: 2.7950
Epoch 9/25, Loss: 2.7932
Epoch 10/25, Loss: 2.7908
Epoch 11/25, Loss: 2.7880
Epoch 12/25, Loss: 2.7850
Epoch 13/25, Loss: 2.7821
Epoch 14/25, Loss: 2.7799
Epoch 15/25, Loss: 2.7759
Epoch 16/25, Loss: 2.7728
Epoch 17/25, Loss: 2.7695
Epoch 18/25, Loss: 2.7662
Epoch 19/25, Loss: 2.7627
Epoch 20/25, Loss: 2.7589
Epoch 21/25, Loss: 2.7559
Epoch 22/25, Loss: 2.7519
Epoch 23/25, Loss: 2.7474
Epoch 24/25, Loss: 2.7439
Epoch 25/25, Loss: 2.7405
Model saved to large_hangman_model_normal_parallel.pth
Non-parallel trai

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
import time
from multiprocessing import Pool
from new_ai_player import HangmanLSTM

def generate_training_data(word_list, num_samples, max_word_length=10):
    """Generate training data based on word list."""
    data = []
    for _ in range(num_samples):
        word = random.choice(word_list)
        word_letters = set(word)
        guessed_letters = set()
        obscured_word = ['_' for _ in word]

        while len(guessed_letters) < len(word_letters):
            next_letter = random.choice(list(word_letters - guessed_letters))
            guessed_letters.add(next_letter)

            for i, char in enumerate(word):
                if char in guessed_letters:
                    obscured_word[i] = char

            word_input = encode_word_state(''.join(obscured_word), max_word_length)
            guessed_input = encode_guessed_letters(guessed_letters)
            target_letter = ord(next_letter) - ord('a')

            if '_' in ''.join(obscured_word):
                for _ in range(3):  # Repeat to balance the dataset
                    data.append((word_input, guessed_input, target_letter))
            else:
                data.append((word_input, guessed_input, target_letter))
    return data

def encode_word_state(word_display, max_word_length):
    """Encode word state as a one-hot matrix."""
    word_vector = np.zeros((max_word_length, 27))  # 27: 26 letters + 1 for '_'
    for i, char in enumerate(word_display[:max_word_length]):
        if char == '_':
            word_vector[i, 26] = 1  # Represent blanks as the 27th feature
        elif 'a' <= char <= 'z':
            word_vector[i, ord(char) - ord('a')] = 1
    return word_vector

def encode_guessed_letters(guessed_letters):
    """Encode guessed letters as a one-hot vector."""
    guessed_vector = np.zeros(26)
    for letter in guessed_letters:
        guessed_vector[ord(letter) - ord('a')] = 1
    return guessed_vector

def parallel_generate_training_data(args):
    """Wrapper for multiprocessing."""
    word_list, num_samples, max_word_length = args
    return generate_training_data(word_list, num_samples, max_word_length)

def train_model_parallel(word_list, model_path='large_hangman_model_super_parallel.pth', num_samples=10000, epochs=25, batch_size=32, lr=0.001, num_workers=4):
    """Train the HangmanLSTM model with parallel data generation."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Parallel data generation
    print("Generating training data in parallel...")
    chunk_size = num_samples // num_workers
    pool_args = [(word_list, chunk_size, 10) for _ in range(num_workers)]

    with Pool(num_workers) as pool:
        data_chunks = pool.map(parallel_generate_training_data, pool_args)

    data = [item for chunk in data_chunks for item in chunk]  # Flatten the list
    print(f"Generated {len(data)} training samples.")

    # Prepare tensors
    print("Preparing data tensors...")
    inputs_word = torch.tensor(np.array([item[0] for item in data]), dtype=torch.float32)
    inputs_guessed = torch.tensor(np.array([item[1] for item in data]), dtype=torch.float32)
    targets = torch.tensor(np.array([item[2] for item in data]), dtype=torch.long)

    # Create DataLoader
    print("Creating DataLoader...")
    dataset = torch.utils.data.TensorDataset(inputs_word, inputs_guessed, targets)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    # Initialize model
    model = HangmanLSTM().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    print("Starting training loop...")
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for word_batch, guessed_batch, target_batch in dataloader:
            word_batch = word_batch.to(device)
            guessed_batch = guessed_batch.to(device)
            target_batch = target_batch.to(device)

            optimizer.zero_grad()
            outputs = model(word_batch, guessed_batch)
            loss = criterion(outputs, target_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

    # Save the trained model
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

if __name__ == "__main__":
    # Load the word list from file
    words_file = "words.txt"
    try:
        with open(words_file, 'r') as f:
            word_list = [line.strip().lower() for line in f if line.strip()]
        print(f"Loaded {len(word_list)} words from {words_file}.")
    except FileNotFoundError:
        print(f"Error: {words_file} not found.")
        exit(1)

    # Train the model in parallel
    print("Starting parallel training...")
    start_time = time.time()
    train_model_parallel(word_list, model_path="large_hangman_model_super_parallel.pth", num_samples=20000, epochs=25, batch_size=32, lr=0.001, num_workers=4)
    end_time = time.time()
    print(f"Parallel training completed in {end_time - start_time:.2f} seconds.")


Loaded 852 words from words.txt.
Starting parallel training...
Using device: cuda
Generating training data in parallel...
Generated 244091 training samples.
Preparing data tensors...
Creating DataLoader...
Starting training loop...
Epoch 1/25, Loss: 2.9479
Epoch 2/25, Loss: 2.8439
Epoch 3/25, Loss: 2.8109
Epoch 4/25, Loss: 2.8061
Epoch 5/25, Loss: 2.8026
Epoch 6/25, Loss: 2.7988
Epoch 7/25, Loss: 2.7963
Epoch 8/25, Loss: 2.7958
Epoch 9/25, Loss: 2.7946
Epoch 10/25, Loss: 2.7910
Epoch 11/25, Loss: 2.7888
Epoch 12/25, Loss: 2.7840
Epoch 13/25, Loss: 2.7802
Epoch 14/25, Loss: 2.7763
Epoch 15/25, Loss: 2.7718
Epoch 16/25, Loss: 2.7674
Epoch 17/25, Loss: 2.7634
Epoch 18/25, Loss: 2.7574
Epoch 19/25, Loss: 2.7520
Epoch 20/25, Loss: 2.7473
Epoch 21/25, Loss: 2.7417
Epoch 22/25, Loss: 2.7378
Epoch 23/25, Loss: 2.7330
Epoch 24/25, Loss: 2.7285
Epoch 25/25, Loss: 2.7239
Model saved to large_hangman_model_super_parallel.pth
Parallel training completed in 604.43 seconds.
