# Environment Wrapper for Farm

In [50]:
import numpy as np
import random
from mcts import MCTS
from agents import * 

import farmgame 

# Define the FarmEnv class
class FarmEnv:
    def __init__(self, farm: Farm, reward_function=None):
        self.game = farm  # Accept a Farm instance directly
        self.done = False
        self.reward_function = reward_function or self.default_reward

    def reset(self):
        self.game = configure_game()  # Reset the game
        self.done = False
        return self.get_state()

    def step(self, action):
        current_player = self.game.whose_turn()["name"]
        self.game = self.game.take_action(action, inplace=True)  # Apply action
        reward = self.reward_function(self.game, current_player)  # Calculate reward
        self.done = self.game.is_done()  # Check if game is done
        new_state = self.get_state()  # Get the updated state
        return new_state, reward, self.done, {}  # Return updated state

    def get_state(self):
        return tuple(self.game)  # Represent state as a tuple

    def render(self):
        self.game.print_farm()

    def default_reward(self, game, player):
        return game.playersDict[player]["score"]

# Q Learning Agent

In [28]:
class QLearningAgent:
    def __init__(self, actions, learning_rate=0.1, discount_factor=0.99, epsilon=0.1):
        self.q_table = {}  # Dictionary to store Q-values
        self.actions = actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def choose_action(self, state):
        if random.random() < self.epsilon:  # Explore
            return random.choice(self.actions)
        else:  # Exploit
            q_values = [self.get_q_value(state, a) for a in self.actions]
            return self.actions[np.argmax(q_values)]

    def update_q_value(self, state, action, reward, next_state):
        max_q_next = max([self.get_q_value(next_state, a) for a in self.actions], default=0)
        td_target = reward + self.discount_factor * max_q_next
        td_error = td_target - self.get_q_value(state, action)
        new_q_value = self.get_q_value(state, action) + self.learning_rate * td_error
        self.q_table[(state, action)] = new_q_value


# Training Loop

In [3]:
def train_rl_agent_against_self(rl_agent, episodes=1000, reward_function=None):
    results = []
    for episode in range(episodes):
        print(f"Episode {episode + 1}/{episodes}")
        
        # Initialize the environment
        config = configure_game()  # Default game configuration
        env = FarmEnv(config, reward_function=reward_function)
        
        # Track rewards for each episode
        episode_reward = {"red": 0, "purple": 0}
        state = env.reset()
        done = False

        while not done:
            current_player = env.game.whose_turn()["name"]
            legal_actions = env.game.legal_actions()
            print(f"Legal actions for {current_player}: {legal_actions}")  # Debugging
            
            # RL agent takes action for both players
            action = rl_agent.choose_action(state)
            print(f"{current_player.capitalize()} chooses: {action}")
        
            # Step the environment
            next_state, reward, done, _ = env.step(action)
            episode_reward[current_player] += reward
            print(f"Reward for action: {reward}")  # Debugging
        
            # Update the RL agent
            rl_agent.update_q_value(state, action, reward, next_state)
        
            # Update the RL agent's state to the new state
            state = next_state

        total_reward = episode_reward["red"] + episode_reward["purple"]
        results.append(total_reward)
        print(f"Episode {episode + 1}: Total Reward = {total_reward}")

    return results

In [None]:
# Initialize RL agent and training parameters
config = configure_game()
env = FarmEnv(config)  # Initialize the environment
actions = env.game.legal_actions()  # Pass Action objects directly
rl_agent = QLearningAgent(actions=actions)  # Pass the actions to the agent

# Define reward function
reward_function = lambda game, player: game.playersDict[player]["score"]

# Train the RL agent against itself
training_rewards = train_rl_agent_against_self(
    rl_agent=rl_agent,
    episodes=1000,
    reward_function=reward_function
)

# Plot training rewards
import matplotlib.pyplot as plt

plt.plot(training_rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("RL Agent Training Rewards Against Itself")
plt.show()


In [62]:
#reward_function = lambda game, player: game.playersDict[player]["score"]

#config = configure_game()  # Default game configuration
#env = FarmEnv(config, reward_function=reward_function)

#actions = env.game.legal_actions()  # Pass Action objects directly
rl_agent = QLearningAgent()  # Pass the actions to the agent

# Track rewards for each episode
#episode_reward = {"red": 0, "purple": 0}
#state = env.reset()
#done = False

TheFarm = farmgame.configure_game(layer="Items00",resourceCond="even",costCond="low",visibilityCond="full",redFirst=True)
state = TheFarm

rl_agent.update(state)
#while not done:
for i in range(10):
    current_player =  state.players[state.turn]#env.game.whose_turn()["name"]
    legal_actions = state.legal_actions()#env.game.legal_actions()
    print(f"Legal actions for {current_player['name']}: {[a.name for a in legal_actions]}")  # Debugging
    
    rl_agent.update(state)
    # RL agent takes action for both players
    action = rl_agent.choose_action()
    print(f"{current_player['name']} chooses: {action}")

    # Step the environment
    state = state.take_action(action,inplace=True) #pick first veg in list
    rwd, done = state.reward(current_player['name'])
    #next_state, reward, done, _ = env.step(action)
    #episode_reward[current_player] += reward
    #print(f"Reward for action: {reward}")  # Debugging

    # Update the RL agent
    rl_agent.update_q_value(action, reward, state)

    # Update the RL agent's state to the new state
    #state = next_state

total_reward = episode_reward["red"] + episode_reward["purple"]
#results.append(total_reward)
#print(f"Episode {episode + 1}: Total Reward = {total_reward}")


Legal actions for red: ['tomato', 'turnip', 'turnip', 'strawberry', 'strawberry', 'eggplant', 'tomato', 'turnip', 'pillow']
red chooses: Tomato00(8,7)
Legal actions for purple: ['turnip', 'turnip', 'strawberry', 'strawberry', 'eggplant', 'tomato', 'turnip', 'pillow']
purple chooses: Turnip01(13,13)
Legal actions for red: ['turnip', 'strawberry', 'strawberry', 'eggplant', 'tomato', 'turnip', 'box', 'pillow']
red chooses: Turnip00(12,13)
Legal actions for purple: ['strawberry', 'strawberry', 'eggplant', 'tomato', 'turnip', 'box', 'pillow']
purple chooses: Strawberry01(8,8)
Legal actions for red: ['strawberry', 'eggplant', 'tomato', 'turnip', 'box', 'pillow']
red chooses: Strawberry00(7,7)
Legal actions for purple: ['eggplant', 'tomato', 'turnip', 'box', 'pillow']
purple chooses: Eggplant00(12,14)
Legal actions for red: ['tomato', 'turnip', 'box', 'pillow']
red chooses: Tomato01(7,8)
Legal actions for purple: ['turnip', 'box', 'pillow']
purple chooses: Turnip02(13,14)
Legal actions for re