# Run Games

In [1]:
# imports
from random import randint
import numpy as np
import random
import datetime # for limiting calculation to wall clock time
import math
import copy
import matplotlib.pyplot as plt
import csv
import sys
import pandas as pd

import farmgame
from mcts import MCTS
from agents import RandomPolicy


In [2]:
# Game configuration
def configure_game():
    return farmgame.configure_game(
        layer="Items00", 
        resourceCond="even", 
        costCond="low", 
        visibilityCond="full", 
        redFirst=True
    )

In [3]:
policies = ["selfish", "altruistic", "collaborative"]

In [4]:
# Simulate a game for a given combination of policies and count helping actions
def simulate_game_with_helping(red_policy, purple_policy):
    print(f"Simulating a game: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
    game = configure_game()
    red_agent = MCTS(time=2.0, C=2, max_moves=10, color="red", policy=red_policy)
    purple_agent = MCTS(time=2.0, C=2, max_moves=10, color="purple", policy=purple_policy)
    
    # Initialize agents with the starting state
    red_agent.update(game)
    purple_agent.update(game)
    
    state = game
    done = False
    turn_count = 0
    red_rewards = []
    purple_rewards = []
    helping_actions = {"red": 0, "purple": 0}
    
    while not done:
        current_player = state.players[state.turn]["name"]
        
        if current_player == "red":
            action = red_agent.choose_action()
        else:
            action = purple_agent.choose_action()
        
        # Check if the action is helping
        transition = farmgame.Transition(state, action)
        if transition.is_helping():
            helping_actions[current_player] += 1
        
        # Take the action
        state = state.take_action(action, inplace=True)
        red_agent.update(state)
        purple_agent.update(state)
        
        # Get rewards
        red_rwd, red_done = state.reward("red")
        purple_rwd, purple_done = state.reward("purple")
        done = red_done and purple_done
        
        # Log rewards
        red_rewards.append(red_rwd)
        purple_rewards.append(purple_rwd)
        
        turn_count += 1
    
    print(f"Game completed: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
    print(f"Total Red Reward: {sum(red_rewards)}, Total Purple Reward: {sum(purple_rewards)}")
    print(f"Helping Actions - Red: {helping_actions['red']}, Purple: {helping_actions['purple']}")
    return red_rewards, purple_rewards, helping_actions

# Simulate games with helping actions
results_with_helping = []
for red_policy in policies:
    for purple_policy in policies:
        print(f"\nStarting simulations: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
        for i in range(100):  # Simulate 10 games per combination
            print(f"  Simulation {i+1}: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
            red_rewards, purple_rewards, helping_actions = simulate_game_with_helping(red_policy, purple_policy)
            results_with_helping.append({
                "Red Policy": red_policy,
                "Purple Policy": purple_policy,
                "Red Reward": sum(red_rewards),
                "Purple Reward": sum(purple_rewards),
                "Total Reward": sum(red_rewards) + sum(purple_rewards),
                "Red Helping Actions": helping_actions["red"],
                "Purple Helping Actions": helping_actions["purple"]
            })

# Convert the results to a DataFrame
results_with_helping_df = pd.DataFrame(results_with_helping)




Starting simulations: Red Policy = selfish, Purple Policy = selfish
  Simulation 1: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 196, Total Purple Reward: 216
Helping Actions - Red: 2, Purple: 2
  Simulation 2: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 196, Total Purple Reward: 256
Helping Actions - Red: 1, Purple: 1
  Simulation 3: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 188, Total Purple Reward: 216
Helping Actions - Red: 2, Purple: 2
  Simulation 4: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Po

In [5]:
results_with_helping_df

Unnamed: 0,Red Policy,Purple Policy,Red Reward,Purple Reward,Total Reward,Red Helping Actions,Purple Helping Actions
0,selfish,selfish,196,216,412,2,2
1,selfish,selfish,196,256,452,1,1
2,selfish,selfish,188,216,404,2,2
3,selfish,selfish,168,208,376,3,3
4,selfish,selfish,172,240,412,2,2
...,...,...,...,...,...,...,...
895,collaborative,collaborative,196,216,412,3,3
896,collaborative,collaborative,136,172,308,2,2
897,collaborative,collaborative,192,112,304,1,1
898,collaborative,collaborative,228,240,468,3,3


In [6]:
results_with_helping_df.groupby(['Red Policy','Purple Policy']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Red Reward,Purple Reward,Total Reward,Red Helping Actions,Purple Helping Actions
Red Policy,Purple Policy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
altruistic,altruistic,186.6,207.12,393.72,1.79,1.77
altruistic,collaborative,184.16,205.72,389.88,1.85,1.77
altruistic,selfish,187.48,216.16,403.64,1.87,1.78
collaborative,altruistic,195.92,208.16,404.08,2.0,1.97
collaborative,collaborative,196.88,211.64,408.52,1.96,1.88
collaborative,selfish,188.32,213.48,401.8,1.95,1.84
selfish,altruistic,201.96,206.56,408.52,1.77,1.76
selfish,collaborative,197.64,209.76,407.4,1.95,1.89
selfish,selfish,200.68,214.88,415.56,1.9,1.88


In [7]:
results_with_helping_df.groupby('Red Policy')['Red Helping Actions'].mean()

Red Policy
altruistic       1.836667
collaborative    1.970000
selfish          1.873333
Name: Red Helping Actions, dtype: float64