# Q Learning for Pacman Capture the Flag

In [3]:
import json
import random

def update_parameters(param_json):
    """
    Update epsilon, alpha, and discount based on the total reward per episode.
    """
    # Load parameters from the JSON file
    with open(param_json, 'r') as file:
        params = json.load(file)

    # Extract current values
    epsilon = params["epsilon"][-1]
    alpha = params["alpha"][-1]
    discount = params["discount"][-1]

    reset_chance = 0.05  # Chance to reset parameters to initial values
    
    if len(params["total_reward"]) < 2:
        # Use initial values if there's not enough history
        epsilon = 1.0
        alpha = 0.2
        discount = 0.8
    elif random.random() < reset_chance:
        # Reset parameters to initial values
        epsilon = 1.0
        alpha = 0.2
        discount = 0.8
    else:
        total_reward = params["total_reward"][-1]
        prev_reward = params["total_reward"][-2]

        # Update epsilon based on total reward
        if total_reward > prev_reward:
            epsilon *= 0.95  # Decrease epsilon if total reward is high
        elif total_reward < prev_reward:
            epsilon *= 1.1  # Increase epsilon if total reward is low

        # Update alpha based on total reward
        if total_reward > prev_reward:
            alpha *= 0.95  # Decrease alpha if total reward is high
        elif total_reward < prev_reward:
            alpha *= 1.1  # Increase alpha if total reward is low

        # Update discount based on total reward
        if total_reward > prev_reward:
            discount *= 0.95  # Decrease discount if total reward is high
        elif total_reward < prev_reward:
            discount *= 1.1  # Increase discount if total reward is low

        # Clip values to ensure they remain within valid ranges
        epsilon = max(0.0, min(1.0, epsilon))
        alpha = max(0.0, min(1.0, alpha))
        discount = max(0.0, min(1.0, discount))

    # Update the parameters dictionary
    params["epsilon"].append(epsilon)
    params["alpha"].append(alpha)
    params["discount"].append(discount)

    # Save the updated parameters back to the JSON file
    with open(param_json, 'w') as file:
        json.dump(params, file, indent=4)


In [10]:
import os
os.chdir("pacman-contest-3-master/")
os.getcwd()

'c:\\Users\\ryanb\\OneDrive\\Desktop\\RIT\\AI Explorations\\AI_Explorations_Pacman_Capture_the_Flag\\pacman-contest-3-master'

In [18]:
update_parameters("offensiveParams.json")
update_parameters("defensiveParams.json")

In [19]:
for episode in range(1000):
    !python capture.py -r myteam -b myteam  --delay-step 0 -q
    update_parameters("offensiveParams.json")
    update_parameters("defensiveParams.json")


Red team myteam with {}:
myteam.py
Loading Team: myteam.py
Arguments: {}

Blue team myteam with {}:
myteam.py
Loading Team: myteam.py
Arguments: {}
Red team starts
Time is up.
Tie game!

Total Time Game: 3.0


  import sys, util, types, time, random, imp
