# ML Hackathon: HMM and Reinforcement Learning Word Completion

## Problem Statement
This notebook implements:
1. **Part 1**: Hidden Markov Model for letter probability estimation
2. **Part 2**: Reinforcement Learning agent using HMM guidance

---

## Import Required Libraries

In [57]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
from tqdm import tqdm
from typing import List, Tuple
import math, re, random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

## Load Data

In [58]:
# Load corpus
with open('Data/Data/corpus.txt', 'r') as f:
    corpus_words = [line.strip().lower() for line in f if line.strip()]

# Load test data
with open('Data/Data/test.txt', 'r') as f:
    test_words = [line.strip().lower() for line in f if line.strip()]

print(f"Corpus size: {len(corpus_words)} words")
print(f"Test size: {len(test_words)} words")

Corpus size: 50000 words
Test size: 2000 words


In [59]:
def create_proper_train_test_split(all_words, test_size=0.1):
    """Split corpus into train/test ensuring clean data."""
    all_words = list(set(w.lower().strip() for w in all_words if w.strip()))
    random.shuffle(all_words)
    split_idx = int(len(all_words) * (1 - test_size))
    return all_words[:split_idx], all_words[split_idx:]

---
# Part 1: Hidden Markov Model Implementation

In [60]:
class ContextualHiddenMarkovModel:
    def __init__(self, order=2, smoothing=0.01):
        """HMM for character-level probability estimation - keep it simple."""
        self.order = order
        self.smoothing = smoothing
        self.transition_counts = defaultdict(Counter)
        self.positional_counts = defaultdict(Counter)
        self.letter_priors = Counter()
        self.vocabulary = set()
        self.corpus_words = []
        self.start_token = '<S>'
        self.end_token = '<E>'

    def train(self, words: List[str]):
        print("Training HMM...")
        self.corpus_words = list(set(w.lower().strip() for w in words if w.strip()))

        for word in tqdm(self.corpus_words, desc="Building counts"):
            padded = self.start_token * (self.order - 1) + word + self.end_token
            self.vocabulary.update(c for c in word if c.isalpha())

            for i in range(len(padded) - self.order + 1):
                context = padded[i:i+self.order-1]
                nxt = padded[i+self.order-1]
                pos = min(i, 15)
                self.transition_counts[context][nxt] += 1
                self.positional_counts[(pos, context[-1])][nxt] += 1
                self.letter_priors[nxt] += 1

        self.vocab_list = sorted(self.vocabulary)
        print(f"‚úÖ Vocabulary size: {len(self.vocab_list)}")

    def _prob(self, counter: Counter, char: str) -> float:
        total = sum(counter.values())
        V = len(self.vocabulary)
        return (counter[char] + self.smoothing) / (total + self.smoothing * V)

    def get_conditional_prob(self, context: str, pos: int, char: str) -> float:
        """Get probability of character given context and position."""
        p1 = self._prob(self.transition_counts[context], char)
        p2 = self._prob(self.positional_counts[(pos, context[-1])], char)
        p3 = (self.letter_priors[char] + self.smoothing) / \
             (sum(self.letter_priors.values()) + self.smoothing * len(self.vocabulary))
        
        # Standard weighting
        return 0.70*p1 + 0.20*p2 + 0.10*p3

    def _get_context_at_position(self, word_list: List[str], pos: int) -> str:
        """Extract context for position."""
        context_start = max(0, pos - (self.order - 1))
        context = word_list[context_start:pos]
        needed_padding = (self.order - 1) - len(context)
        if needed_padding > 0:
            context = [self.start_token] * needed_padding + context
        context = ['a' if c == '_' else c for c in context]
        return ''.join(context)

## Train HMM Model

In [61]:
# Train HMM on full corpus - simple and reliable
hmm_model = ContextualHiddenMarkovModel(order=2, smoothing=0.01)
hmm_model.train(corpus_words)

print(f"‚úÖ HMM trained on {len(corpus_words)} words")

Training HMM...


Building counts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49398/49398 [00:03<00:00, 12518.79it/s]

‚úÖ Vocabulary size: 26
‚úÖ HMM trained on 50000 words





---
# Part 2: Reinforcement Learning Implementation

In [62]:
class ImprovedLetterGuessingEnv:
    """Compact Hangman environment."""
    def __init__(self, vocabulary: set, max_wrong_guesses: int = 6):
        self.alphabet = sorted(list(vocabulary))
        self.char_to_idx = {char: idx for idx, char in enumerate(self.alphabet)}
        self.idx_to_char = {idx: char for char, idx in self.char_to_idx.items()}
        self.max_wrong_guesses = max_wrong_guesses
        
    def reset(self, target_word: str):
        self.target_word = target_word.lower().strip()
        self.guessed_letters = set()
        self.wrong_guesses = 0
        self.repeated_guesses = 0
        self.done = False
        self.current_masked = ['_'] * len(self.target_word)
        return self._get_state()
    
    def _get_state(self):
        """Compact state: (word_length, num_blanks, lives_left, last_char)."""
        masked_str = ''.join(self.current_masked)
        num_blanks = masked_str.count('_')
        lives_left = self.max_wrong_guesses - self.wrong_guesses
        word_length = len(self.target_word)
        last_char = ''
        for c in reversed(self.current_masked):
            if c != '_':
                last_char = c
                break
        return (word_length, num_blanks, lives_left, last_char)
    
    def get_masked_word(self):
        return ''.join(self.current_masked)
    
    def step(self, action: int):
        if self.done:
            return self._get_state(), 0, True, {}
        
        if not isinstance(action, (int, np.integer)) or action < 0 or action >= len(self.alphabet):
            return self._get_state(), -5.0, False, {'error': True}
        
        guessed_char = self.idx_to_char[action]
        
        if guessed_char in self.guessed_letters:
            self.repeated_guesses += 1
            reward = -3.0
            info = {'repeated': True, 'correct': False}
        else:
            self.guessed_letters.add(guessed_char)
            
            if guessed_char in self.target_word:
                count = sum(1 for i, c in enumerate(self.target_word) if c == guessed_char)
                for i, c in enumerate(self.target_word):
                    if c == guessed_char:
                        self.current_masked[i] = guessed_char
                reward = 2.0 * count
                info = {'repeated': False, 'correct': True, 'count': count}
            else:
                self.wrong_guesses += 1
                reward = -1.0
                info = {'repeated': False, 'correct': False}
        
        if '_' not in self.current_masked:
            self.done = True
            reward += 10.0
            info['won'] = True
        elif self.wrong_guesses >= self.max_wrong_guesses:
            self.done = True
            reward -= 5.0
            info['won'] = False
        
        return self._get_state(), reward, self.done, info
    
    def get_stats(self):
        return {
            'wrong_guesses': self.wrong_guesses,
            'repeated_guesses': self.repeated_guesses,
            'won': '_' not in self.current_masked and self.wrong_guesses < self.max_wrong_guesses
        }

In [63]:
class ImprovedQLearningAgent:
    """Q-Learning with HMM-Q blending - simple and stable."""
    def __init__(self, n_actions: int, learning_rate: float = 0.1,
                 discount_factor: float = 0.9, epsilon: float = 0.2,
                 hmm_blend_weight: float = 0.97):
        self.n_actions = n_actions
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.998
        self.hmm_blend_weight = hmm_blend_weight
        self.q_table = defaultdict(lambda: np.zeros(n_actions))
        self.idx_to_char = {}
        self.visit_counts = defaultdict(int)

    def get_action(self, state, action_probs=None, guessed_letters=set()):
        if state is None:
            return 0

        valid_mask = np.ones(self.n_actions)
        for letter in guessed_letters:
            for idx, char in self.idx_to_char.items():
                if char == letter:
                    valid_mask[idx] = 0
                    break
        
        if np.sum(valid_mask) == 0:
            valid_mask = np.ones(self.n_actions)
        
        if np.random.random() < self.epsilon:
            # Explore with HMM guidance
            if action_probs is not None and len(action_probs) == self.n_actions:
                masked_probs = action_probs * valid_mask
                if masked_probs.sum() > 0:
                    masked_probs = masked_probs / masked_probs.sum()
                    return np.random.choice(self.n_actions, p=masked_probs)
            valid_actions = np.where(valid_mask > 0)[0]
            return np.random.choice(valid_actions) if len(valid_actions) > 0 else 0
        else:
            # Exploit: Blend Q-values with HMM
            state_key = str(state)
            q_values = self.q_table[state_key].copy()
            
            if action_probs is not None and len(action_probs) == self.n_actions:
                hmm_scores = np.log(action_probs + 1e-10)
                combined_scores = (1 - self.hmm_blend_weight) * q_values + \
                                  self.hmm_blend_weight * hmm_scores
            else:
                combined_scores = q_values
            
            combined_scores[valid_mask == 0] = -np.inf
            
            if np.all(combined_scores == -np.inf):
                valid_actions = np.where(valid_mask > 0)[0]
                return np.random.choice(valid_actions) if len(valid_actions) > 0 else 0
            
            return np.argmax(combined_scores)

    def update(self, state, action, reward, next_state):
        if state is None or action < 0 or action >= self.n_actions:
            return
        state_key = str(state)
        next_state_key = str(next_state) if next_state is not None else None
        self.visit_counts[state_key] += 1
        current_q = self.q_table[state_key][action]
        max_next_q = np.max(self.q_table[next_state_key]) if next_state_key else 0
        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state_key][action] = new_q
    
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
    
    def get_diagnostics(self):
        return {
            'epsilon': self.epsilon,
            'q_table_size': len(self.q_table),
            'unique_states': len(self.visit_counts)
        }

In [64]:
class ImprovedHybridAgent:
    """Hybrid HMM+RL agent with frequency-aware strategy."""
    def __init__(self, hmm_model: ContextualHiddenMarkovModel, vocabulary: set,
                 learning_rate: float = 0.2, epsilon: float = 0.5, 
                 hmm_blend_weight: float = 0.75, max_wrong: int = 6):
        self.hmm = hmm_model
        self.env = ImprovedLetterGuessingEnv(vocabulary, max_wrong_guesses=max_wrong)
        self.agent = ImprovedQLearningAgent(
            n_actions=len(vocabulary),
            learning_rate=learning_rate,
            epsilon=epsilon,
            hmm_blend_weight=hmm_blend_weight
        )
        self.char_to_idx = self.env.char_to_idx
        self.idx_to_char = self.env.idx_to_char
        self.agent.idx_to_char = self.idx_to_char
        
        # Common English letter frequencies (helps early in game)
        self.common_letters = 'etaoinshrdlcumwfgypbvkjxqz'
        
        print(f"‚úÖ Agent: {len(vocabulary)} actions, HMM blend={hmm_blend_weight}")

    def get_letter_probabilities_from_hmm(self, masked_word: str, guessed_letters: set) -> np.ndarray:
        probs = np.zeros(len(self.env.alphabet))
        blank_positions = [i for i, c in enumerate(masked_word) if c == '_']
        
        if len(blank_positions) == 0:
            return np.ones(len(self.env.alphabet)) / len(self.env.alphabet)
        
        # Calculate HMM probabilities
        for pos in blank_positions:
            context = self.hmm._get_context_at_position(list(masked_word), pos)
            for char, idx in self.char_to_idx.items():
                prob = self.hmm.get_conditional_prob(context, pos, char)
                probs[idx] += prob
        
        probs = probs / len(blank_positions)
        
        # Boost common letters early in the game - FINE-TUNED
        if len(guessed_letters) < 6:  # Extended to 6 guesses
            for char in self.common_letters[:10]:  # Top 10 letters (e,t,a,o,i,n,s,h,r,d)
                if char in self.char_to_idx:
                    idx = self.char_to_idx[char]
                    probs[idx] *= 1.5  # 50% boost (increased from 30%)
        
        return probs / probs.sum() if probs.sum() > 0 else np.ones(len(probs)) / len(probs)

    def train(self, training_words: List[str], episodes: int = 20000, eval_interval: int = 2000):
        print(f"Training RL agent for {episodes} episodes...")
        total_rewards = []
        win_rate_history = []
        
        for episode in tqdm(range(episodes)):
            target_word = training_words[np.random.randint(len(training_words))]
            state = self.env.reset(target_word)
            done = False
            episode_reward = 0
            step_count = 0
            max_steps = len(target_word) * 3
            
            while not done and step_count < max_steps:
                masked_word = self.env.get_masked_word()
                guessed_letters = self.env.guessed_letters
                hmm_probs = self.get_letter_probabilities_from_hmm(masked_word, guessed_letters)
                action = self.agent.get_action(state, hmm_probs, guessed_letters)
                
                if action < 0 or action >= len(self.idx_to_char):
                    valid_actions = [i for i in range(len(self.idx_to_char)) 
                                   if self.idx_to_char[i] not in guessed_letters]
                    action = np.random.choice(valid_actions) if valid_actions else 0
                
                next_state, reward, done, info = self.env.step(action)
                self.agent.update(state, action, reward, next_state)
                episode_reward += reward
                state = next_state
                step_count += 1
            
            total_rewards.append(episode_reward)
            self.agent.decay_epsilon()
            
            if episode >= 99:
                recent_wins = sum(1 for r in total_rewards[episode-99:episode+1] if r > 5)
                win_rate_history.append(recent_wins / 100)
            
            if episode % eval_interval == 0 and episode > 0:
                diag = self.agent.get_diagnostics()
                print(f"\nEp {episode}: Œµ={diag['epsilon']:.3f}, "
                      f"States={diag['unique_states']}, "
                      f"AvgRew={np.mean(total_rewards[-100:]):.2f}")
        
        print(f"\n‚úÖ Training complete!")
        if win_rate_history:
            print(f"Final win rate: {win_rate_history[-1]*100:.2f}%")
        
        return total_rewards, win_rate_history

In [65]:
# Train RL agent - focus on frequency-aware strategy
print("="*60)
print("TRAINING RL AGENT WITH FREQUENCY BOOST")
print("="*60)

improved_rl_agent = ImprovedHybridAgent(
    hmm_model,
    hmm_model.vocabulary,
    learning_rate=0.1,
    epsilon=0.15,            # Even lower exploration (reduced from 0.2)
    hmm_blend_weight=0.98,   # Maximum HMM trust (98%, increased from 97%)
    max_wrong=6
)

# Use focused training sample
training_sample = random.sample(corpus_words, min(60000, len(corpus_words)))
print(f"Training on {len(training_sample)} words...")

improved_rewards, improved_win_rates = improved_rl_agent.train(
    training_sample,
    episodes=70000,  # Increased training (60k ‚Üí 70k)
    eval_interval=5000
)

print("\n‚úÖ RL agent training complete!")

TRAINING RL AGENT WITH FREQUENCY BOOST
‚úÖ Agent: 26 actions, HMM blend=0.98
Training on 50000 words...
Training RL agent for 70000 episodes...
Training on 50000 words...
Training RL agent for 70000 episodes...


  7%|‚ñã         | 5056/70000 [00:23<04:03, 266.62it/s]


Ep 5000: Œµ=0.010, States=5856, AvgRew=10.08


 14%|‚ñà‚ñç        | 10090/70000 [00:45<03:25, 291.50it/s]


Ep 10000: Œµ=0.010, States=6890, AvgRew=6.36


 21%|‚ñà‚ñà‚ñè       | 15034/70000 [01:02<03:14, 282.82it/s]


Ep 15000: Œµ=0.010, States=7417, AvgRew=10.15


 29%|‚ñà‚ñà‚ñä       | 20048/70000 [01:23<02:49, 295.55it/s]


Ep 20000: Œµ=0.010, States=7831, AvgRew=6.20


 36%|‚ñà‚ñà‚ñà‚ñå      | 25060/70000 [01:41<02:34, 291.21it/s]


Ep 25000: Œµ=0.010, States=8137, AvgRew=6.86


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 30062/70000 [02:01<02:10, 306.68it/s]


Ep 30000: Œµ=0.010, States=8385, AvgRew=9.65


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 35029/70000 [02:22<02:35, 224.87it/s]


Ep 35000: Œµ=0.010, States=8596, AvgRew=7.62


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 40084/70000 [02:39<01:42, 291.32it/s]


Ep 40000: Œµ=0.010, States=8758, AvgRew=6.53


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 45051/70000 [03:01<01:28, 282.54it/s]


Ep 45000: Œµ=0.010, States=8970, AvgRew=6.68


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 50087/70000 [03:18<01:05, 302.37it/s]


Ep 50000: Œµ=0.010, States=9187, AvgRew=8.08


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 55062/70000 [03:38<00:46, 321.92it/s]


Ep 55000: Œµ=0.010, States=9315, AvgRew=8.18


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 60043/70000 [03:56<00:32, 303.26it/s]


Ep 60000: Œµ=0.010, States=9429, AvgRew=6.83


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 65036/70000 [04:17<00:16, 300.24it/s]


Ep 65000: Œµ=0.010, States=9504, AvgRew=6.89


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70000/70000 [04:34<00:00, 255.10it/s]


‚úÖ Training complete!
Final win rate: 44.00%

‚úÖ RL agent training complete!





## Hackathon Evaluation

In [66]:
def evaluate_hackathon_hybrid_agent(improved_agent, test_words: List[str], num_games: int = 2000):
    """Official hackathon evaluation."""
    print(f"\n{'='*70}")
    print(f"HACKATHON EVALUATION: {num_games} games")
    print(f"{'='*70}\n")
    
    eval_words = random.sample(test_words, min(num_games, len(test_words)))
    num_games = len(eval_words)
    
    wins = 0
    total_wrong_guesses = 0
    total_repeated_guesses = 0
    predictions = []
    
    old_epsilon = improved_agent.agent.epsilon
    improved_agent.agent.epsilon = 0  # Greedy
    
    for word in tqdm(eval_words, desc="Playing Hangman"):
        state = improved_agent.env.reset(word)
        done = False
        step_count = 0
        max_steps = len(word) * 3
        
        while not done and step_count < max_steps:
            masked_word = improved_agent.env.get_masked_word()
            guessed_letters = improved_agent.env.guessed_letters
            hmm_probs = improved_agent.get_letter_probabilities_from_hmm(masked_word, guessed_letters)
            action = improved_agent.agent.get_action(state, hmm_probs, guessed_letters)
            
            if action < 0 or action >= len(improved_agent.idx_to_char):
                valid_actions = [i for i in range(len(improved_agent.idx_to_char)) 
                               if improved_agent.idx_to_char[i] not in guessed_letters]
                action = np.random.choice(valid_actions) if valid_actions else 0
            
            next_state, reward, done, info = improved_agent.env.step(action)
            state = next_state
            step_count += 1
        
        stats = improved_agent.env.get_stats()
        final_guess = improved_agent.env.get_masked_word()
        
        if stats['won']:
            wins += 1
        
        total_wrong_guesses += stats['wrong_guesses']
        total_repeated_guesses += stats['repeated_guesses']
        
        predictions.append({
            'word': word,
            'final_guess': final_guess,
            'won': stats['won'],
            'wrong_guesses': stats['wrong_guesses'],
            'repeated_guesses': stats['repeated_guesses']
        })
    
    improved_agent.agent.epsilon = old_epsilon
    
    success_rate = (wins / num_games) * 100
    
    # Original scoring formula (reverted)
    final_score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)
    
    results = {
        'num_games': num_games,
        'wins': wins,
        'success_rate': success_rate,
        'total_wrong_guesses': total_wrong_guesses,
        'total_repeated_guesses': total_repeated_guesses,
        'final_score': final_score,
        'predictions': predictions
    }
    
    print(f"\n{'='*70}")
    print(f"HACKATHON FINAL RESULTS")
    print(f"{'='*70}")
    print(f"Games Played:           {num_games}")
    print(f"Games Won:              {wins} ({success_rate:.2f}%)")
    print(f"Total Wrong Guesses:    {total_wrong_guesses}")
    print(f"Total Repeated Guesses: {total_repeated_guesses}")
    print(f"{'='*70}")
    print(f"üèÜ FINAL SCORE:          {final_score:.2f}")
    print(f"{'='*70}")
    print(f"\nBreakdown:")
    print(f"  + Success bonus:        {(success_rate * 2000):.2f}")
    print(f"  - Wrong guess penalty:  {total_wrong_guesses * 5}")
    print(f"  - Repeat guess penalty: {total_repeated_guesses * 2}")
    print(f"{'='*70}\n")
    
    return results

# Run evaluation
hackathon_results = evaluate_hackathon_hybrid_agent(improved_rl_agent, test_words, num_games=2000)

# Save results
hackathon_df = pd.DataFrame(hackathon_results['predictions'])
hackathon_df.to_csv('hackathon_hybrid_results.csv', index=False)
print("‚úÖ Results saved to 'hackathon_hybrid_results.csv'")


HACKATHON EVALUATION: 2000 games



Playing Hangman: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2000/2000 [00:12<00:00, 163.30it/s]




HACKATHON FINAL RESULTS
Games Played:           2000
Games Won:              539 (26.95%)
Total Wrong Guesses:    10807
Total Repeated Guesses: 0
üèÜ FINAL SCORE:          -135.00

Breakdown:
  + Success bonus:        53900.00
  - Wrong guess penalty:  54035
  - Repeat guess penalty: 0

‚úÖ Results saved to 'hackathon_hybrid_results.csv'


In [67]:
# Final Summary
print(f"\n{'='*70}")
print(f"HACKATHON SUBMISSION SUMMARY")
print(f"{'='*70}\n")

print("üìä FINAL RESULTS:")
print(f"  Games Played:       {hackathon_results['num_games']}")
print(f"  Success Rate:       {hackathon_results['success_rate']:.2f}%")
print(f"  Wrong Guesses:      {hackathon_results['total_wrong_guesses']}")
print(f"  Repeated Guesses:   {hackathon_results['total_repeated_guesses']}")
print(f"\n  üèÜ FINAL SCORE:      {hackathon_results['final_score']:.2f}")

print(f"\nüí° OPTIMIZED STRATEGY:")
print(f"  ‚Ä¢ HMM Order: 2 (reliable bigrams)")
print(f"  ‚Ä¢ HMM Smoothing: 0.01 (balanced)")
print(f"  ‚Ä¢ HMM Weighting: 70% context + 20% position + 10% frequency")
print(f"  ‚Ä¢ Learning rate: 0.1 (stable)")
print(f"  ‚Ä¢ Initial epsilon: 0.15 (ultra-low exploration)")
print(f"  ‚Ä¢ Epsilon decay: 0.998")
print(f"  ‚Ä¢ HMM blend weight: 98% (maximum HMM trust)")
print(f"  ‚Ä¢ Discount factor: 0.9")
print(f"  ‚Ä¢ Training episodes: 70,000")
print(f"  ‚Ä¢ Training sample: 60k words")
print(f"  ‚Ä¢ Rewards: +2/letter, -1/wrong, +10/win, -5/loss")
print(f"  ‚Ä¢ KEY: Frequency boost on top 10 letters in first 6 guesses (50% boost)")

print(f"\nüìà SCORE FORMULA:")
print(f"  Score = (success_rate √ó 2000) - (wrong_guesses √ó 5) - (repeated_guesses √ó 2)")

print(f"\n{'='*70}\n")

# Save summary
with open('hackathon_summary.txt', 'w') as f:
    f.write(f"Final Score: {hackathon_results['final_score']:.2f}\n")
    f.write(f"Success Rate: {hackathon_results['success_rate']:.2f}%\n")
    f.write(f"Games Won: {hackathon_results['wins']}/{hackathon_results['num_games']}\n")
    f.write(f"Total Wrong Guesses: {hackathon_results['total_wrong_guesses']}\n")
    f.write(f"Total Repeated Guesses: {hackathon_results['total_repeated_guesses']}\n")

print("‚úÖ Summary saved to 'hackathon_summary.txt'")


HACKATHON SUBMISSION SUMMARY

üìä FINAL RESULTS:
  Games Played:       2000
  Success Rate:       26.95%
  Wrong Guesses:      10807
  Repeated Guesses:   0

  üèÜ FINAL SCORE:      -135.00

üí° OPTIMIZED STRATEGY:
  ‚Ä¢ HMM Order: 2 (reliable bigrams)
  ‚Ä¢ HMM Smoothing: 0.01 (balanced)
  ‚Ä¢ HMM Weighting: 70% context + 20% position + 10% frequency
  ‚Ä¢ Learning rate: 0.1 (stable)
  ‚Ä¢ Initial epsilon: 0.15 (ultra-low exploration)
  ‚Ä¢ Epsilon decay: 0.998
  ‚Ä¢ HMM blend weight: 98% (maximum HMM trust)
  ‚Ä¢ Discount factor: 0.9
  ‚Ä¢ Training episodes: 70,000
  ‚Ä¢ Training sample: 60k words
  ‚Ä¢ Rewards: +2/letter, -1/wrong, +10/win, -5/loss
  ‚Ä¢ KEY: Frequency boost on top 10 letters in first 6 guesses (50% boost)

üìà SCORE FORMULA:
  Score = (success_rate √ó 2000) - (wrong_guesses √ó 5) - (repeated_guesses √ó 2)


‚úÖ Summary saved to 'hackathon_summary.txt'
