In [178]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from collections import deque

In [179]:
wordle_data = pd.read_csv('wordle.csv')
wordle_data.drop('day', axis=1, inplace=True)
wordle_data.head()

Unnamed: 0,word,occurrence
0,aahed,1.85095e-09
1,aalii,6.224471e-10
2,aargh,2.158188e-10
3,aarti,7.668332e-10
4,abaca,6.320646e-08


In [180]:
class WordleEnvironment:
  def __init__(self, word_list):
    self.word_list = word_list
    self.current_state = np.zeros([6, 5, 3])
    self.target_word = None
    self.max_guesses = 6
    self.guess_count = 0

  def reset(self):
    probabilities = wordle_data['occurrence'] / wordle_data['occurrence'].sum()
    self.current_state = np.zeros([6, 5, 3])
    self.target_word = np.random.choice(self.word_list, p=probabilities)
    self.guess_count = 0
    for i in range(5):
      self.current_state[0][i][2] = ord(self.target_word[i]) - ord('a') + 1
    return self.current_state

  def step(self, action):
    guessed_word = self.word_list[action]
    self.guess_count += 1
    reward = 0 
    
    for i in range(5):
      self.current_state[self.guess_count - 1][i][0] = ord(guessed_word[i]) - ord('a') + 1
      if guessed_word[i] == self.target_word[i]:
        self.current_state[self.guess_count - 1][i][1] = 3
        reward += 2
      elif guessed_word[i] in self.target_word:
        self.current_state[self.guess_count - 1][i][1] = 2
        reward += 1
      else:
        self.current_state[self.guess_count - 1][i][1] = 1

    
    if guessed_word == self.target_word:
      reward += 10
      reward += 3 * (5 - self.guess_count)
    
    done = (guessed_word == self.target_word) or (self.guess_count >= self.max_guesses)
    
    if done and guessed_word != self.target_word:
        reward -= 10
    
    return self.current_state, reward, done

  def render(self):
    feedback_colors = {0: 'None', 1: 'Grey', 2: 'Yellow', 3: 'Green'}
    
    for i in range(self.guess_count):
      guessed_word_display = []
      for j in range(5):
        letter = chr(int(self.current_state[i][j][0] + ord('a') - 1) if self.current_state[i][j][0] != 0 else '-')
        feedback = feedback_colors[self.current_state[i][j][1]]
        guessed_word_display.append(f"{letter}({feedback})")
      print(f"Target Word: {self.target_word}")
      print(f"Guess {i + 1}: {' '.join(guessed_word_display)}")


In [181]:
word_list = list(wordle_data['word'])
env = WordleEnvironment(word_list)

env.reset()

for a in range(6):
  action = random.randint(0, len(word_list) - 1) 
  state, reward, done = env.step(action)
  env.render()
  print(f"Reward: {reward}\n")
  if done:
    break

Target Word: water
Guess 1: b(Grey) u(Grey) l(Grey) s(Grey) e(Yellow)
Reward: -2.0

Target Word: water
Guess 1: b(Grey) u(Grey) l(Grey) s(Grey) e(Yellow)
Target Word: water
Guess 2: p(Grey) a(Green) c(Grey) e(Green) r(Green)
Reward: 0.5

Target Word: water
Guess 1: b(Grey) u(Grey) l(Grey) s(Grey) e(Yellow)
Target Word: water
Guess 2: p(Grey) a(Green) c(Grey) e(Green) r(Green)
Target Word: water
Guess 3: l(Grey) u(Grey) g(Grey) e(Green) r(Green)
Reward: -0.5

Target Word: water
Guess 1: b(Grey) u(Grey) l(Grey) s(Grey) e(Yellow)
Target Word: water
Guess 2: p(Grey) a(Green) c(Grey) e(Green) r(Green)
Target Word: water
Guess 3: l(Grey) u(Grey) g(Grey) e(Green) r(Green)
Target Word: water
Guess 4: b(Grey) e(Yellow) i(Grey) g(Grey) e(Yellow)
Reward: -1.5

Target Word: water
Guess 1: b(Grey) u(Grey) l(Grey) s(Grey) e(Yellow)
Target Word: water
Guess 2: p(Grey) a(Green) c(Grey) e(Green) r(Green)
Target Word: water
Guess 3: l(Grey) u(Grey) g(Grey) e(Green) r(Green)
Target Word: water
Guess 4: b

In [182]:
def create_q_network(input_dim, output_dim):
  model = Sequential()
  model.add(layers.Dense(128, activation='relu', input_shape=(input_dim,)))
  model.add(layers.Dense(256, activation='relu'))
  model.add(layers.Dense(output_dim))
  model.compile(optimizer='adam', loss='mse')
  return model

In [183]:
class ReplayBuffer:
  def __init__(self, capacity):
    self.buffer = deque(maxlen=capacity)

  def push(self, state, action, reward, next_state, done):
    self.buffer.append((state, action, reward, next_state, done))

  def sample(self, batch_size):
    state, action, reward, next_state, done = zip(*random.sample(self.buffer, batch_size))
    return state, action, reward, next_state, done

  def __len__(self):
    return len(self.buffer)


In [184]:
class DQNAgent:
  def __init__(self, input_dim, output_dim):
    self.q_network = create_q_network(input_dim, output_dim)
    self.target_network = create_q_network(input_dim, output_dim)
    self.target_network.set_weights(self.q_network.get_weights())
    self.replay_buffer = ReplayBuffer(10000)

  def select_action(self, state, epsilon):
    if random.random() < epsilon:
      return random.randint(0, len(word_list) - 1)
    else:
      state_flat = np.reshape(state, (1, -1))
      q_values = self.q_network.predict(state_flat)
      return np.argmax(q_values[0])

In [None]:
agent = DQNAgent(input_dim=6*5*3, output_dim=len(word_list))

epsilon_initial = 1.0
epsilon_final = 0.01
discount_factor = 0.99
epsilon_decay = 0.995
batch_size = 32
max_episodes = 1000

for episode in range(max_episodes):
  state = env.reset()
  total_reward = 0
  for timestep in range(env.max_guesses):
    epsilon = max(epsilon_final, epsilon_initial * epsilon_decay**episode)
    action = agent.select_action(state, epsilon)
    next_state, reward, done = env.step(action)
    agent.replay_buffer.push(state, action, reward, next_state, done)
    if len(agent.replay_buffer) >= batch_size:
      state_batch, action_batch, reward_batch, next_state_batch, done_batch = agent.replay_buffer.sample(batch_size)
      next_state_batch = np.reshape(next_state_batch, (batch_size, -1))
      q_values_next = agent.target_network.predict(np.array(next_state_batch))
      q_values_next_target = agent.q_network.predict(np.array(next_state_batch))
      target_q_values = reward_batch + (1 - np.array(done_batch)) * discount_factor * np.max(q_values_next_target, axis=1)
      agent.q_network.fit(np.reshape(state_batch, (batch_size, -1)), target_q_values, verbose=0)           
    total_reward += reward
    state = next_state
        
    if done:
      break
            
  print(f"Episode {episode + 1}: Total Reward = {total_reward}")

  if episode % 50 == 0:
    agent.target_network.set_weights(agent.q_network.get_weights())

Episode 1: Total Reward = -18.5
Episode 2: Total Reward = -20.0
Episode 3: Total Reward = -20.0
Episode 4: Total Reward = -22.0
Episode 5: Total Reward = -22.5
Episode 6: Total Reward = -16.0
Episode 7: Total Reward = -19.0
Episode 8: Total Reward = -19.0
Episode 9: Total Reward = -21.5
Episode 10: Total Reward = -21.5
Episode 11: Total Reward = -19.5
Episode 12: Total Reward = -22.0
Episode 13: Total Reward = -20.5
Episode 14: Total Reward = -18.5
Episode 15: Total Reward = -17.5
Episode 16: Total Reward = -20.5
Episode 17: Total Reward = -18.5
Episode 18: Total Reward = -20.0
Episode 19: Total Reward = -16.0
Episode 20: Total Reward = -18.5
Episode 21: Total Reward = -19.0
Episode 22: Total Reward = -21.0
Episode 23: Total Reward = -18.0
Episode 24: Total Reward = -17.5
Episode 25: Total Reward = -21.5
Episode 26: Total Reward = -23.0
Episode 27: Total Reward = -19.0
Episode 28: Total Reward = -20.5
Episode 29: Total Reward = -19.5
Episode 30: Total Reward = -19.5
Episode 31: Total R

Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x000001F881DF0A40>
Traceback (most recent call last):
  File "c:\Users\Shrey\AppData\Local\Programs\Python\Python311\Lib\weakref.py", line 370, in remove
    self = selfref()
           ^^^^^^^^^
KeyboardInterrupt: 


Episode 39: Total Reward = -20.5
Episode 40: Total Reward = -21.0
Episode 41: Total Reward = -22.5
Episode 42: Total Reward = -23.0
Episode 43: Total Reward = -18.5
Episode 44: Total Reward = -20.5
Episode 45: Total Reward = -18.5
Episode 46: Total Reward = -19.5


Episode 47: Total Reward = -20.5
Episode 48: Total Reward = -20.0
