In [1]:
!pip install ipywidgets -q

import numpy as np
import random
from collections import defaultdict, Counter
import pickle
from typing import List, Set, Tuple, Dict
import os
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import time

%matplotlib inline

print("All imports done!")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m47.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hAll imports done!


In [2]:
# CELL 2: Hidden Markov Model (HMM)
class HangmanHMM:
    def __init__(self, max_length=20):
        self.max_length = max_length
        self.emission_probs = {}
        self.transition_probs = {}
        self.initial_probs = {}
        self.letter_freq = Counter()

    def train(self, word_list: List[str]):
        print("Training HMM...")
        words_by_length = defaultdict(list)
        for word in word_list:
            word = word.lower().strip()
            if word.isalpha():
                words_by_length[len(word)].append(word)
                for char in word:
                    self.letter_freq[char] += 1

        for length, words in words_by_length.items():
            if length > self.max_length or length == 0:
                continue
            self.emission_probs[length] = {}
            self.transition_probs[length] = {}

            emission_counts = {pos: Counter() for pos in range(length)}
            initial_counts = Counter()
            transition_counts = {pos: {} for pos in range(1, length)}

            for word in words:
                for pos, char in enumerate(word):
                    emission_counts[pos][char] += 1
                    if pos == 0:
                        initial_counts[char] += 1
                    else:
                        prev = word[pos-1]
                        if prev not in transition_counts[pos]:
                            transition_counts[pos][prev] = Counter()
                        transition_counts[pos][prev][char] += 1

            # Emission probs
            for pos in range(length):
                total = sum(emission_counts[pos].values())
                self.emission_probs[length][pos] = {
                    c: (emission_counts[pos].get(c, 0) + 1) / (total + 26)
                    for c in 'abcdefghijklmnopqrstuvwxyz'
                }

            # Transition probs
            for pos in range(1, length):
                self.transition_probs[length][pos] = {}
                for prev in 'abcdefghijklmnopqrstuvwxyz':
                    if prev in transition_counts[pos]:
                        tot = sum(transition_counts[pos][prev].values())
                        self.transition_probs[length][pos][prev] = {
                            c: (transition_counts[pos][prev].get(c, 0) + 1) / (tot + 26)
                            for c in 'abcdefghijklmnopqrstuvwxyz'
                        }
                    else:
                        self.transition_probs[length][pos][prev] = {c: 1/26 for c in 'abcdefghijklmnopqrstuvwxyz'}

            # Initial probs
            tot_init = sum(initial_counts.values())
            self.initial_probs[length] = {
                c: (initial_counts.get(c, 0) + 1) / (tot_init + 26)
                for c in 'abcdefghijklmnopqrstuvwxyz'
            }

        print(f"HMM trained on {len(word_list)} words")

    def predict_letter_probs(self, masked_word: str, guessed_letters: Set[str]) -> Dict[str, float]:
        length = len(masked_word)
        scores = defaultdict(float)

        if length not in self.emission_probs:
            total = sum(self.letter_freq.values()) or 1
            for c in 'abcdefghijklmnopqrstuvwxyz':
                if c not in guessed_letters:
                    scores[c] = self.letter_freq.get(c, 1) / total
            return dict(scores)

        for pos, ch in enumerate(masked_word):
            if ch != '_':
                continue
            for let in 'abcdefghijklmnopqrstuvwxyz':
                if let in guessed_letters:
                    continue
                scores[let] += self.emission_probs[length][pos].get(let, 1/26)

            if pos > 0 and masked_word[pos-1] != '_':
                prev = masked_word[pos-1]
                if prev in self.transition_probs[length][pos]:
                    for let in 'abcdefghijklmnopqrstuvwxyz':
                        if let not in guessed_letters:
                            scores[let] += self.transition_probs[length][pos][prev].get(let, 1/26) * 1.5

        total = sum(scores.values())
        if total > 0:
            scores = {k: v/total for k, v in scores.items()}
        else:
            remaining = [c for c in 'abcdefghijklmnopqrstuvwxyz' if c not in guessed_letters]
            scores = {c: 1/len(remaining) for c in remaining}
        return dict(scores)

    def save(self, path):
        with open(path, 'wb') as f:
            pickle.dump({
                'emission': self.emission_probs,
                'transition': self.transition_probs,
                'initial': self.initial_probs,
                'freq': self.letter_freq,
                'max_len': self.max_length
            }, f)
        print(f"HMM saved to {path}")

    def load(self, path):
        with open(path, 'rb') as f:
            data = pickle.load(f)
            self.emission_probs = data['emission']
            self.transition_probs = data['transition']
            self.initial_probs = data['initial']
            self.letter_freq = data['freq']
            self.max_length = data['max_len']
        print(f"HMM loaded from {path}")

In [3]:
# CELL 3: Hangman Environment
class HangmanEnvironment:
    def __init__(self, word: str, max_wrong: int = 6):
        self.word = word.lower()
        self.max_wrong = max_wrong
        self.reset()

    def reset(self):
        self.guessed = set()
        self.wrong = 0
        self.repeated = 0
        self.over = False
        self.won = False
        return self.get_state()

    def get_state(self):
        masked = ''.join(c if c in self.guessed else '_' for c in self.word)
        return {
            'masked_word': masked,
            'guessed_letters': self.guessed.copy(),
            'wrong_guesses': self.wrong,
            'lives_remaining': self.max_wrong - self.wrong,
            'game_over': self.over,
            'won': self.won,
            'word_length': len(self.word)
        }

    def step(self, letter: str):
        letter = letter.lower()
        if letter in self.guessed:
            self.repeated += 1
            return self.get_state(), -2, self.over

        self.guessed.add(letter)
        if letter in self.word:
            occ = self.word.count(letter)
            reward = 1 * occ
            if all(c in self.guessed for c in self.word):
                self.won = True
                self.over = True
                reward += 20
        else:
            self.wrong += 1
            reward = -5
            if self.wrong >= self.max_wrong:
                self.over = True
                reward -= 10
        return self.get_state(), reward, self.over

    def get_available_actions(self):
        return [c for c in 'abcdefghijklmnopqrstuvwxyz' if c not in self.guessed]

In [4]:
# CELL 4: State Encoder
class StateEncoder:
    def encode(self, state: Dict, hmm_probs: Dict[str, float]) -> np.ndarray:
        features = []
        masked = state['masked_word']
        length = state['word_length']

        revealed = sum(1 for c in masked if c != '_')
        features.append(revealed / length if length > 0 else 0)
        features.append(state['lives_remaining'] / 6.0)
        features.append(len(state['guessed_letters']) / 26.0)
        features.append(length / 20.0)

        vowels = sum(1 for c in masked if c in 'aeiou')
        consonants = revealed - vowels
        features.append(vowels / length if length > 0 else 0)
        features.append(consonants / length if length > 0 else 0)
        features.append(masked.count('_') / length if length > 0 else 0)

        for c in 'abcdefghijklmnopqrstuvwxyz':
            features.append(hmm_probs.get(c, 0.0))

        return np.array(features, dtype=np.float32)

In [9]:
# CELL 5: Q-Learning Agent (FULL signature)
class HangmanQLearningAgent:
    def __init__(self,
                 hmm,
                 learning_rate: float = 0.01,
                 discount_factor: float = 0.95,
                 epsilon_start: float = 1.0,
                 epsilon_end: float = 0.01,
                 epsilon_decay: float = 0.995):
        self.hmm = hmm
        self.encoder = StateEncoder()
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay

        # one weight vector per letter (lazy-init)
        self.weights = {c: None for c in 'abcdefghijklmnopqrstuvwxyz'}

        # training stats
        self.stats = {'episodes': 0, 'wins': 0, 'total_reward': 0.0}

    # -------------------------------------------------
    # Helper: initialise weights when first needed
    # -------------------------------------------------
    def _init_weights(self, size: int):
        for c in 'abcdefghijklmnopqrstuvwxyz':
            if self.weights[c] is None:
                self.weights[c] = np.random.randn(size) * 0.01

    # -------------------------------------------------
    # Q-value
    # -------------------------------------------------
    def get_q(self, features: np.ndarray, action: str) -> float:
        if self.weights[action] is None:
            self._init_weights(len(features))
        return np.dot(self.weights[action], features)

    # -------------------------------------------------
    # Action selection (epsilon-greedy + HMM boost)
    # -------------------------------------------------
    def choose_action(self, state: Dict, actions: List[str], training: bool = True) -> str:
        hmm_probs = self.hmm.predict_letter_probs(state['masked_word'], state['guessed_letters'])
        feats = self.encoder.encode(state, hmm_probs)

        if training and random.random() < self.epsilon:
            # explore with HMM probabilities
            probs = [hmm_probs.get(a, 1e-6) for a in actions]
            total = sum(probs)
            probs = [p/total for p in probs]
            return np.random.choice(actions, p=probs)
        else:
            # exploit
            vals = {}
            for a in actions:
                q = self.get_q(feats, a)
                boost = hmm_probs.get(a, 0) * 2.0
                vals[a] = q + boost
            return max(vals.items(), key=lambda x: x[1])[0]

    # -------------------------------------------------
    # TD update
    # -------------------------------------------------
    def update(self, state: Dict, action: str, reward: float,
               next_state: Dict, done: bool):
        hmm_probs = self.hmm.predict_letter_probs(state['masked_word'], state['guessed_letters'])
        feats = self.encoder.encode(state, hmm_probs)
        q_curr = self.get_q(feats, action)

        if done:
            q_target = reward
        else:
            next_hmm = self.hmm.predict_letter_probs(
                next_state['masked_word'], next_state['guessed_letters'])
            next_feats = self.encoder.encode(next_state, next_hmm)
            next_actions = [c for c in 'abcdefghijklmnopqrstuvwxyz'
                           if c not in next_state['guessed_letters']]
            max_q = max(self.get_q(next_feats, a) for a in next_actions) if next_actions else 0.0
            q_target = reward + self.gamma * max_q

        td_error = q_target - q_curr
        self.weights[action] += self.lr * td_error * feats

    # -------------------------------------------------
    # Epsilon decay
    # -------------------------------------------------
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)

    # -------------------------------------------------
    # One training episode
    # -------------------------------------------------
    def train_episode(self, word: str) -> Dict:
        env = HangmanEnvironment(word)
        state = env.reset()
        total_r = 0.0

        while not state['game_over']:
            actions = env.get_available_actions()
            action = self.choose_action(state, actions, training=True)
            next_state, r, done = env.step(action)
            self.update(state, action, r, next_state, done)
            total_r += r
            state = next_state

        self.stats['episodes'] += 1
        self.stats['total_reward'] += total_r
        if state['won']:
            self.stats['wins'] += 1

        self.decay_epsilon()
        return {'won': state['won'], 'reward': total_r}

    # -------------------------------------------------
    # Play (no training)
    # -------------------------------------------------
    def play(self, word: str) -> Dict:
        env = HangmanEnvironment(word)
        state = env.reset()
        while not state['game_over']:
            actions = env.get_available_actions()
            action = self.choose_action(state, actions, training=False)
            state, _, _ = env.step(action)
        return {
            'won': state['won'],
            'word': word,
            'wrong': state['wrong_guesses']
        }

    # -------------------------------------------------
    # Save / Load
    # -------------------------------------------------
    def save(self, path: str):
        with open(path, 'wb') as f:
            pickle.dump({
                'weights': self.weights,
                'stats': self.stats,
                'epsilon': self.epsilon
            }, f)
        print(f"Agent saved → {path}")

    def load(self, path: str):
        with open(path, 'rb') as f:
            data = pickle.load(f)
            self.weights = data['weights']
            self.stats = data['stats']
            self.epsilon = data.get('epsilon', self.epsilon_end)
        print(f"Agent loaded ← {path}")

In [11]:
# CELL 6: Upload Your Real Datasets
from google.colab import files
import io

print("Please upload your 'corpus.txt' and 'test.txt' files:")
uploaded = files.upload()

# Verify and load
def load_words_from_uploaded(name):
    if name not in uploaded:
        raise FileNotFoundError(f"{name} not uploaded!")
    content = uploaded[name].decode('utf-8')
    words = [line.strip().lower() for line in content.splitlines() if line.strip() and line.strip().isalpha()]
    print(f"Loaded {len(words)} words from {name}")
    return words

# Load both
train_words = load_words_from_uploaded("corpus.txt")
test_words = load_words_from_uploaded("test.txt")

# Save to disk so later cells can read them (required for training)
with open("corpus.txt", "w") as f:
    f.write("\n".join(train_words))
with open("test.txt", "w") as f:
    f.write("\n".join(test_words))

print("Your real datasets are ready for training!")

Please upload your 'corpus.txt' and 'test.txt' files:


Saving test.txt to test.txt
Saving corpus.txt to corpus.txt
Loaded 49979 words from corpus.txt
Loaded 2000 words from test.txt
Your real datasets are ready for training!


In [15]:
# CELL 7: Train the System on YOUR Data
def load_words(path):
    with open(path, 'r') as f:
        return [line.strip().lower() for line in f if line.strip() and line.strip().isalpha()]

print("Loading your corpus...")
words = load_words("corpus.txt")

print(f"Training HMM on {len(words)} words...")
hmm = HangmanHMM(max_length=20)
hmm.train(words)
hmm.save("hangman_hmm.pkl")

print("Initializing Q-Learning Agent...")
agent = HangmanQLearningAgent(
    hmm=hmm,
    learning_rate=0.01,
    discount_factor=0.95,
    epsilon_start=1.0,
    epsilon_end=0.01,
    epsilon_decay=0.995
)

print("Starting training (10,000 episodes)...")
eval_every = 1000
for ep in range(100000):
    word = random.choice(words)
    result = agent.train_episode(word)

    if (ep + 1) % eval_every == 0:
        win_rate = agent.stats['wins'] / agent.stats['episodes']
        avg_reward = agent.stats['total_reward'] / agent.stats['episodes']
        print(f"Episode {ep+1}/{100000} | Win Rate: {win_rate:.2%} | Avg Reward: {avg_reward:+.2f} | ε: {agent.epsilon:.3f}")

agent.save("hangman_agent.pkl")
print("Training complete! Models saved.")

Loading your corpus...
Training HMM on 49979 words...
Training HMM...
HMM trained on 49979 words
HMM saved to hangman_hmm.pkl
Initializing Q-Learning Agent...
Starting training (10,000 episodes)...
Episode 1000/100000 | Win Rate: 13.80% | Avg Reward: -27.93 | ε: 0.010
Episode 2000/100000 | Win Rate: 18.05% | Avg Reward: -25.64 | ε: 0.010
Episode 3000/100000 | Win Rate: 20.27% | Avg Reward: -24.52 | ε: 0.010
Episode 4000/100000 | Win Rate: 20.88% | Avg Reward: -24.22 | ε: 0.010
Episode 5000/100000 | Win Rate: 21.66% | Avg Reward: -23.81 | ε: 0.010
Episode 6000/100000 | Win Rate: 22.17% | Avg Reward: -23.51 | ε: 0.010
Episode 7000/100000 | Win Rate: 22.83% | Avg Reward: -23.18 | ε: 0.010
Episode 8000/100000 | Win Rate: 23.15% | Avg Reward: -22.98 | ε: 0.010
Episode 9000/100000 | Win Rate: 23.66% | Avg Reward: -22.72 | ε: 0.010
Episode 10000/100000 | Win Rate: 24.13% | Avg Reward: -22.47 | ε: 0.010
Episode 11000/100000 | Win Rate: 24.35% | Avg Reward: -22.36 | ε: 0.010
Episode 12000/10000

In [17]:
# CELL 8: Evaluate on Your Test Set
print("Loading models for evaluation...")
hmm = HangmanHMM()
hmm.load("hangman_hmm.pkl")
agent = HangmanQLearningAgent(hmm)
agent.load("hangman_agent.pkl")

test_words = load_words("test.txt")
print(f"Evaluating on {len(test_words)} test words...")

results = [agent.play(word) for word in test_words]

wins = sum(1 for r in results if r['won'])
success_rate = wins / len(results)
total_wrong = sum(r['wrong'] for r in results)

print("\n" + "="*50)
print("EVALUATION RESULTS")
print("="*50)
print(f"Total Games:     {len(results)}")
print(f"Wins:            {wins} ({success_rate:.2%})")
print(f"Losses:          {len(results) - wins}")
print(f"Avg Wrong Guesses: {total_wrong/len(results):.2f}")
print(f"Final Score:     {success_rate * 100 - total_wrong * 0.5:.2f}")
print("="*50)

Loading models for evaluation...
HMM loaded from hangman_hmm.pkl
Agent loaded ← hangman_agent.pkl
Evaluating on 2000 test words...

EVALUATION RESULTS
Total Games:     2000
Wins:            605 (30.25%)
Losses:          1395
Avg Wrong Guesses: 5.27
Final Score:     -5239.75
