##  Cynaptics Bonus Induction Task

Train a model to play the hangman game

You can use the training dataset provided to train your model

The validation dataset should only be used to evaluate your model

Submit your model training script in this notebook as well

Model Definition

In [14]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class HangmanGuessModel(nn.Module):
    def __init__(self, input_size_word=30, input_size_letters=26, hidden_size=128, output_size=26):
        super(HangmanGuessModel, self).__init__()

        # Linear layers for processing inputs
        self.word_fc = nn.Linear(input_size_word, hidden_size)
        self.guessed_fc = nn.Linear(input_size_letters, hidden_size)

        # Output layer
        self.output_fc = nn.Linear(hidden_size, output_size)

    def create_guess_tensors(self , guessed_letters):
        guessed_tensor = torch.zeros((1,26))
        for letter in guessed_letters:
            # Convert the letter to its corresponding index (0 for 'a', 1 for 'b', ..., 25 for 'z')
            index = ord(letter) - ord('a')
            if 0 <= index < 26:  # Ensure the index is within bounds
                guessed_tensor[0][index] = 1
        return guessed_tensor

    def preprocess_word(self, word):
        """
        Preprocess the word by removing spaces and converting the letters to numerical indices.
        """
        clean_word = word.replace(" ", "")
        # Create an empty tensor to hold the numerical representation of the word
        input_tensor = torch.zeros((1, 30))

        pos = 0
        for letter in clean_word:
            if letter == '_':
                input_tensor[0, pos] = 27
            elif letter == ' ':
                input_tensor[0, pos] = 28
            else:
                input_tensor[0, pos] = ord(letter) - 96  # Convert letter to index
            pos += 1

        return input_tensor

    def forward(self, masked_word, guessed_letters):
        """
        Forward pass for the HangmanGuessModel.

        Args:
            masked_word (torch.Tensor): Tensor of shape (batch_size, input_size_word).
            guessed_letters (torch.Tensor): Tensor of shape (batch_size, input_size_letters).

        Returns:
            torch.Tensor: Logits of shape (batch_size, output_size).
        """
        guessed_letters = self.create_guess_tensors(guessed_letters)
        masked_word = self.preprocess_word(masked_word)

        # Process masked word through the first linear layer
        word_features = F.relu(self.word_fc(masked_word))

        # Process guessed letters through the second linear layer
        guessed_features = F.relu(self.guessed_fc(guessed_letters))

        # Combine the features and pass through the output layer
        combined_features = word_features + guessed_features
        logits = self.output_fc(combined_features)

        return logits

Example Use

In [16]:
model = HangmanGuessModel()

model.eval()

logits = model('c n a _ t i c s' , ['c', 'n', 'a', 't', 'i', 's', 'v', 'u', 'e'])

logits

tensor([[ 1.5492, -1.9266,  1.5964, -0.5573,  2.0904, -2.4219,  5.7052, -0.3462,
          1.8838, -4.6508, -3.3523, -1.0865, -1.5030, -0.5102,  0.3036, -2.0485,
          0.2601, -1.7595,  0.5761,  0.3553, -0.4888,  0.2919,  2.6719, -0.0295,
         -2.0799, -0.0568]], grad_fn=<AddmmBackward0>)

Hangman Game

In [41]:
import random
import string
import torch

class CynapticsHangman():

  def __init__(self):
        self.guessed_letters = []     # your current guessed letters
        self.train_dataset_location = "/content/train.txt" # train dataset location
        self.validation_dataset_location = "/content/valid.txt" # validation dataset location

        self.train_dictionary = self.build_dictionary(self.train_dataset_location)
        self.validation_dictionary = self.build_dictionary(self.validation_dataset_location)
        self.lives_remaining = 6 # number of lives remaining in the game
        self.model = HangmanGuessModel()

  def init_model(self , model_path ):
        self.model = self.load_model_weights(model_path)

  # a function to build a dictionary from a dataset
  def build_dictionary(self , dictionary_file_location):
    text_file = open(dictionary_file_location,"r")
    full_dictionary = text_file.read().splitlines()
    text_file.close()
    return full_dictionary

  # the guess function. Put your model here to predict the next guessed letter based on th einput masked word and already guessed letters
  # IT IS ADVISIBLE TO ONLY CHANGE THIS SECTION OF CODE
  def guess(self , masked_word , lives_left):
    # function that returns the guessed word
    # you are to put your model here
    # you can access the guessed words , masked word , lives remaining in this function to make your prediction

    # an example input maybe as follows
    # masked_word: 'c n a _ t i c s'   lives_remaining: 2    guessed_letters: ['c', 'n', 'a', 't', 'i', 's', 'v', 'u', 'e']    your next guess should be ideally 'p'.

    # Given below is a random guess function
    all_letters = list(string.ascii_lowercase)  # List of all lowercase letters
    remaining_letters = [letter for letter in all_letters if letter not in self.guessed_letters] # remaining letters to guess from
    if remaining_letters:
      guess = random.choice(remaining_letters) # randomly selecting a letter from unguessed letters
    else:
      guess = 'e'  # All letters have been guessed , returning a default guess


    # Given below is a ML model based approach
    probabilities = self.model(masked_word , self.guessed_letters)
    mask = torch.ones_like(probabilities)  # Start with a mask of ones
    for letter in self.guessed_letters:
      if letter != ' ':
        letter_index = ord(letter) - 97  # Convert letter to index (a -> 0, b -> 1, ..., z -> 25)
        mask[0][letter_index] = 0  # Set the probability of already guessed letters to zero

    # Apply the mask to the probabilities
    masked_probabilities = probabilities * mask

    # Now select the letter with the highest probability that hasn't been guessed yet
    predicted_letter_index = torch.argmax(masked_probabilities).item()
    guessed_letter = chr(predicted_letter_index + 97)
    return guessed_letter


  # Save the model weights
  def save_model_weights(self , model, file_path):
      """
      Save model weights to a file.

      Args:
          model (nn.Module): The model whose weights are to be saved.
          file_path (str): Path to save the weights.
      """
      torch.save(model.state_dict(), file_path)
      print(f"Model weights saved to {file_path}")

  # Load the model weights
  def load_model_weights(self , file_path):
      """
      Load model weights from a file.

      Args:
          file_path (str): Path to the saved weights file.
      """
      model = HangmanGuessModel()
      model.load_state_dict(torch.load(file_path))
      model.eval()  # Set the model to evaluation mode
      print(f"Model weights loaded from {file_path}")
      return model


  def train(self , total_episodes = 10):
    print("Training started")
    model = HangmanGuessModel()
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for episode in range(total_episodes):

        print(f"Episode {episode}")
        word = random.choice(self.train_dictionary)
        masked_word = '_' * len(word)

        guessed_letters = []
        lives_remaining = 6
        ground_truths = []
        predicted_truths = []
        max_tries = len(word) + 6


        while lives_remaining > 0 and '_' in masked_word and max_tries > 0:
            max_tries -= 1
            # Format the masked word for the model
            input_to_model = ' '.join(masked_word)  # e.g., "_ _ _ _ _"

            probabilities = model(input_to_model , guessed_letters)

            # Create a tensor to mask out the probabilities of already guessed letters
            mask = torch.ones_like(probabilities)  # Start with a mask of ones
            for letter in guessed_letters:
                letter_index = ord(letter) - 97  # Convert letter to index (a -> 0, b -> 1, ..., z -> 25)
                mask[0][letter_index] = 0  # Set the probability of already guessed letters to zero

            # Apply the mask to the probabilities
            masked_probabilities = probabilities * mask

            # Now select the letter with the highest probability that hasn't been guessed yet
            predicted_letter_index = torch.argmax(masked_probabilities).item()
            guessed_letter = chr(predicted_letter_index + 97)


            # Prepare ground truth
            ground_truth = torch.zeros((1, 26)).to(device)  # Assuming 26 letters

            norm = 0
            for letter in word:
                if letter not in masked_word:
                    ground_truth[0, ord(letter) - 97] += 1
                    norm += 1
            if norm > 0 :
                ground_truth /= norm

            if guessed_letter in word:
                masked_word = ''.join(
                    [c if c in guessed_letters + [guessed_letter] else '_' for c in word]
                )
            else:
                lives_remaining -= 1

            guessed_letters.append(guessed_letter)
            ground_truths.append(ground_truth)
            predicted_truths.append(probabilities)


        # Compute loss
        total_loss = 0
        for i in range(len(ground_truths)):
            total_loss += criterion(predicted_truths[i], ground_truths[i])

        # Backpropagation
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        if (episode + 1) % 10 == 0:
            print(f'Episode: {episode + 1}, Loss: {total_loss.item()}')
        # File path to save/load weights
    weights_file = "hangman_model_weights.pth"

    # Save weights
    self.save_model_weights(model, weights_file)

  # function which returns the status of the current game based on the prediction
  def return_status (self , unmasked_word , masked_word  , guessed_letter):
      self.max_tries -= 1
      if guessed_letter in unmasked_word:
                masked_word = ''.join(
                    [c if c in self.guessed_letters else '_' for c in unmasked_word]
                )
                if '_' in masked_word:
                  return "ongoing" , self.lives_remaining , "Guess correct" , masked_word # successful guess
                else :
                  return "success" , self.lives_remaining , "Guess correct" , masked_word # successful guess and game over
      else:
                self.lives_remaining -= 1
                if self.lives_remaining == 0 :
                  return "failed" , self.lives_remaining , "Guess incorrect" , masked_word # unsuccessful guess
                else:
                  return "ongoing" , self.lives_remaining , "Guess incorrect" , masked_word # unsuccessful guess and game over


  # function to start a game
  def start_game(self, game_id , verbose=True):

        self.guessed_letters = [] #guessed letters
        self.guessed_letters.append(' ')
        word = random.choice(self.validation_dictionary) # a random word from the validation dataset
        masked_word = '_' * len(word)
        masked_word = ' '.join(masked_word)
        word = ' '.join(word)
        self.lives_remaining = 6 # default number of lives remianing at the start of the game

        if verbose:
              print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}. Masked Word: {3}.".format(game_id, self.lives_remaining, word , masked_word))

        self.max_tries = len(word) + 6 # maximum rounds a game can run for

        while self.lives_remaining > 0 and self.max_tries > 0 :

                guess_letter = self.guess(masked_word , self.lives_remaining)
                self.guessed_letters.append(guess_letter)

                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))

                status , self.lives_remaining , res , masked_word = self.return_status(word , masked_word , guess_letter)

                if verbose:
                    print("Sever response: {0}. Masked Word: {1}. Lives remaining: {2}".format(res , masked_word , self.lives_remaining))

                if status=="success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True , 1

                elif status=="failed":
                    reason = '# of tries exceeded!'
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False , 0

Model Evaluation Code

Try out your model on 100 iterations before submitting

In [44]:
# creating an instance
hangman_game = CynapticsHangman()

# training the model
hangman_game.train(100)

# loading the model
hangman_game.init_model('/content/hangman_model_weights.pth')



Training started
Episode 0
Episode 1
Episode 2
Episode 3
Episode 4
Episode 5
Episode 6
Episode 7
Episode 8
Episode 9
Episode: 10, Loss: 79.0287857055664
Episode 10
Episode 11
Episode 12
Episode 13
Episode 14
Episode 15
Episode 16
Episode 17
Episode 18
Episode 19
Episode: 20, Loss: 46.92208480834961
Episode 20
Episode 21
Episode 22
Episode 23
Episode 24
Episode 25
Episode 26
Episode 27
Episode 28
Episode 29
Episode: 30, Loss: 31.923555374145508
Episode 30
Episode 31
Episode 32
Episode 33
Episode 34
Episode 35
Episode 36
Episode 37
Episode 38
Episode 39
Episode: 40, Loss: 36.39842987060547
Episode 40
Episode 41
Episode 42
Episode 43
Episode 44
Episode 45
Episode 46
Episode 47
Episode 48
Episode 49
Episode: 50, Loss: 35.47499465942383
Episode 50
Episode 51
Episode 52
Episode 53
Episode 54
Episode 55
Episode 56
Episode 57
Episode 58
Episode 59
Episode: 60, Loss: 56.754310607910156
Episode 60
Episode 61
Episode 62
Episode 63
Episode 64
Episode 65
Episode 66
Episode 67
Episode 68
Episode 69


  model.load_state_dict(torch.load(file_path))


In [46]:
win = 0 # number of games won
played = 0 # number of games played
games_to_play = 10 # number of games to play

for i in range(games_to_play):
    try :
        print('Playing ', i+1 , ' th game')
        status , state = hangman_game.start_game(game_id = i , verbose=True)
        win += state
        played+=1
        print()
    except Exception as e:
        print("Error " , e)


success_rate = win/played
print('Overall Success Rate = %.3f' % success_rate)

Playing  1  th game
Successfully start a new game! Game ID: 0. # of tries remaining: 6. Word: s p i n s t e r d o m. Masked Word: _ _ _ _ _ _ _ _ _ _ _.
Guessing letter: o
Sever response: Guess correct. Masked Word: _ _ _ _ _ _ _ _ _ o _. Lives remaining: 6
Guessing letter: n
Sever response: Guess correct. Masked Word: _ _ _ n _ _ _ _ _ o _. Lives remaining: 6
Guessing letter: e
Sever response: Guess correct. Masked Word: _ _ _ n _ _ e _ _ o _. Lives remaining: 6
Guessing letter: t
Sever response: Guess correct. Masked Word: _ _ _ n _ t e _ _ o _. Lives remaining: 6
Guessing letter: r
Sever response: Guess correct. Masked Word: _ _ _ n _ t e r _ o _. Lives remaining: 6
Guessing letter: a
Sever response: Guess incorrect. Masked Word: _ _ _ n _ t e r _ o _. Lives remaining: 5
Guessing letter: b
Sever response: Guess incorrect. Masked Word: _ _ _ n _ t e r _ o _. Lives remaining: 4
Guessing letter: c
Sever response: Guess incorrect. Masked Word: _ _ _ n _ t e r _ o _. Lives remaining: 3
G