# Run Games

In [1]:
# imports
from random import randint
import numpy as np
import random
import datetime # for limiting calculation to wall clock time
import math
import copy
import matplotlib.pyplot as plt
import csv
import sys
import pandas as pd

import farmgame
from mcts import MCTS
from agents import RandomPolicy


In [2]:
# Game configuration
def configure_game():
    return farmgame.configure_game(
        layer="Items00", 
        resourceCond="even", 
        costCond="low", 
        visibilityCond="full", 
        redFirst=True
    )

In [3]:
policies = ["selfish", "altruistic", "collaborative"]

# Try to differentiate before and after helping

In [4]:
# Simulate a game for a given combination of policies and count helping actions
def simulate_game_with_helping(red_policy, purple_policy):
    print(f"Simulating a game: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
    game = configure_game()
    red_agent = MCTS(time=2.0, nsims=5000000, C=1.0, max_moves=10, color="red", policy=red_policy)
    purple_agent = MCTS(time=2.0, nsims=5000000, C=1.0, max_moves=10, color="purple", policy=purple_policy)
    
    # Initialize agents with the starting state
    red_agent.update(game)
    purple_agent.update(game)
    
    state = game
    done = False
    turn_count = 0
    red_rewards = []
    purple_rewards = []
    
    # Helping action counters
    helping_actions = {
        "red": {"before": 0, "after": 0},
        "purple": {"before": 0, "after": 0}
    }
    
    # Flags for object pickup completion
    objects_picked_up = {"red": False, "purple": False}
    
    while not done:
        current_player = state.players[state.turn]["name"]
        
        if current_player == "red":
            action = red_agent.choose_action()
        else:
            action = purple_agent.choose_action()
        
        # Check if the action is helping
        transition = farmgame.Transition(state, action)
        is_helping = transition.is_helping()
        
        # Update helping action counters based on the flag
        if is_helping:
            if not objects_picked_up[current_player]:
                helping_actions[current_player]["before"] += 1
            else:
                helping_actions[current_player]["after"] += 1
        
        # Take the action
        state = state.take_action(action, inplace=True)
        red_agent.update(state)
        purple_agent.update(state)
        
        # Check if all objects are picked up for the current player
        if not objects_picked_up[current_player]:
            if state.all_objects_picked_up(current_player):  # Assuming this method exists in farmgame
                objects_picked_up[current_player] = True
        
        # Get rewards
        red_rwd, red_done = state.reward("red")
        purple_rwd, purple_done = state.reward("purple")
        done = red_done and purple_done
        
        # Log rewards
        red_rewards.append(red_rwd)
        purple_rewards.append(purple_rwd)
        
        turn_count += 1
    
    print(f"Game completed: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
    print(f"Total Red Reward: {sum(red_rewards)}, Total Purple Reward: {sum(purple_rewards)}")
    print(f"Helping Actions Before - Red: {helping_actions['red']['before']}, Purple: {helping_actions['purple']['before']}")
    print(f"Helping Actions After - Red: {helping_actions['red']['after']}, Purple: {helping_actions['purple']['after']}")
    return red_rewards, purple_rewards, helping_actions

# Simulate games with helping actions
results_with_helping = []
for red_policy in policies:
    for purple_policy in policies:
        print(f"\nStarting simulations: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
        for i in range(100):  # Simulate 10 games per combination
            print(f"  Simulation {i+1}: Red Policy = {red_policy}, Purple Policy = {purple_policy}")
            red_rewards, purple_rewards, helping_actions = simulate_game_with_helping(red_policy, purple_policy)
            results_with_helping.append({
                "Red Policy": red_policy,
                "Purple Policy": purple_policy,
                "Red Reward": sum(red_rewards),
                "Purple Reward": sum(purple_rewards),
                "Total Reward": sum(red_rewards) + sum(purple_rewards),
                "Red Helping Actions Before": helping_actions["red"]["before"],
                "Purple Helping Actions Before": helping_actions["purple"]["before"],
                "Red Helping Actions After": helping_actions["red"]["after"],
                "Purple Helping Actions After": helping_actions["purple"]["after"]
            })

# Convert the results to a DataFrame
results_with_helping_df = pd.DataFrame(results_with_helping)


Starting simulations: Red Policy = selfish, Purple Policy = selfish
  Simulation 1: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 196, Total Purple Reward: 236
Helping Actions Before - Red: 1, Purple: 0
Helping Actions After - Red: 0, Purple: 1
  Simulation 2: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 148, Total Purple Reward: 208
Helping Actions Before - Red: 2, Purple: 2
Helping Actions After - Red: 0, Purple: 0
  Simulation 3: Red Policy = selfish, Purple Policy = selfish
Simulating a game: Red Policy = selfish, Purple Policy = selfish
Game completed: Red Policy = selfish, Purple Policy = selfish
Total Red Reward: 236, Total Purple Reward: 208
Helping Actions Before - Red: 0, Purple: 1
Helping

In [5]:
results_with_helping_df

Unnamed: 0,Red Policy,Purple Policy,Red Reward,Purple Reward,Total Reward,Red Helping Actions Before,Purple Helping Actions Before,Red Helping Actions After,Purple Helping Actions After
0,selfish,selfish,196,236,432,1,0,0,1
1,selfish,selfish,148,208,356,2,2,0,0
2,selfish,selfish,236,208,444,0,1,1,0
3,selfish,selfish,244,200,444,3,3,0,0
4,selfish,selfish,180,276,456,3,4,1,0
...,...,...,...,...,...,...,...,...,...
895,collaborative,collaborative,88,224,312,1,2,2,0
896,collaborative,collaborative,212,160,372,2,2,0,0
897,collaborative,collaborative,220,188,408,3,3,0,0
898,collaborative,collaborative,196,208,404,1,1,0,0


In [6]:
results_with_helping_df.groupby(['Red Policy','Purple Policy']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Red Reward,Purple Reward,Total Reward,Red Helping Actions Before,Purple Helping Actions Before,Red Helping Actions After,Purple Helping Actions After
Red Policy,Purple Policy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
altruistic,altruistic,190.04,208.6,398.64,1.88,1.82,0.1,0.16
altruistic,collaborative,188.4,208.0,396.4,1.91,1.73,0.06,0.2
altruistic,selfish,182.6,214.84,397.44,1.8,1.64,0.15,0.22
collaborative,altruistic,194.84,207.48,402.32,1.69,1.65,0.14,0.17
collaborative,collaborative,191.28,209.44,400.72,1.73,1.71,0.15,0.11
collaborative,selfish,188.68,206.84,395.52,1.67,1.51,0.14,0.21
selfish,altruistic,201.32,201.12,402.44,1.72,1.61,0.06,0.2
selfish,collaborative,207.52,213.36,420.88,1.68,1.51,0.11,0.25
selfish,selfish,194.16,212.4,406.56,1.73,1.71,0.18,0.14


In [7]:
results_with_helping_df.groupby('Red Policy')['Red Helping Actions Before'].mean()

Red Policy
altruistic       1.863333
collaborative    1.696667
selfish          1.710000
Name: Red Helping Actions Before, dtype: float64

In [8]:
# results_with_helping_df.to_csv('results_time_2_nsims_5000000_C_1_900_trials.csv')