# Results Formatting

This notebook was used for collecting and formatting the results of the experiments, for the evaluation section of the paper.

### Imports

In [1]:
import pandas as pd
import os
import src.Common.Utils.Config.ConfigHelper as ConfigHelper
import shutil
import src.Common.EpisodeReplay.EpisodeReplay as EpisodeReplay

### Define Constants

In [9]:
RunGroup = "12"
EnvNames = ["FrozenLake"]
BehaviouralTypes = ["Human", "Curated", "HighScore"]
BehaviouralTypesToReview = ["Human", "Curated"]

# manual Review of the results config
MaxChoicesPerAgent = 5
MaxReplaysPerChoice = 5


In [3]:
# copy demos of each behavioural type to the run group folder
def CopyDemos(envName, runGroup):
	fromPath = os.path.join("Data", envName, "dev", "replays", "Human")
	toPath = os.path.join("Data", envName, runGroup, "replays", "Human")

	if os.path.exists(toPath):
		shutil.rmtree(toPath)
	shutil.copytree(fromPath, toPath)

	print("Copied Human Demos for: " + envName)
	return

def AddDemoIdsToBehaviour(envName, runGroup, behaviourType):
	# load the results
	statsPath = os.path.join("Data", envName, runGroup, "replays", "Human", "stats.tsv")
	stats = pd.read_csv(statsPath, sep="\t")

	lowerBehaviourType = behaviourType.lower()

	stats["Behaviour"] = stats["loggerName"].apply(lambda x: x.split("_")[-2])

	if lowerBehaviourType != "highscore":
		stats = stats[stats["Behaviour"] == lowerBehaviourType]

	episodeIds = stats["EpisodeId"].unique().tolist()

	loggerName = "Human_" + behaviourType

	# load the json with the episode Ids of the behavioural type
	episodeIdsPath = os.path.join("Data", envName, runGroup, f"{behaviourType}_Episodes.json")
	
	episodeIdsJson = ConfigHelper.LoadConfig(episodeIdsPath)
	episodeIdsJson[loggerName] = episodeIds

	ConfigHelper.SaveConfig(episodeIdsJson, episodeIdsPath)

	print(f"Added {len(episodeIds)} {behaviourType} episodes to {envName}")
	return

for envName in EnvNames:
	CopyDemos(envName, RunGroup)

	for behaviourType in BehaviouralTypes:
		AddDemoIdsToBehaviour(envName, RunGroup, behaviourType)


Copied Human Demos for: FrozenLake
Added 59 Human episodes to FrozenLake
Added 23 Curated episodes to FrozenLake
Added 82 HighScore episodes to FrozenLake


## Collect Replays For Manual Reviewed

In [5]:
def LoadReplay(envName, runGroup, agentType, episodeId):
	
	path = os.path.join("Data", envName, runGroup, "replays", agentType, episodeId)
	try:
		replay = EpisodeReplay.EpisodeReplay.LoadFromFolder(path)
		return replay
	except:
		return None

def CollectReplaysToReview(envName, runGroup, behaviourType):
	episodeIdsPath = os.path.join("Data", envName, runGroup, f"{behaviourType}_Episodes.json")
	replays =  ConfigHelper.LoadConfig(episodeIdsPath)

	columns = ["AgentId", "Predicted", "AgentType"]
	columns += [f"Replay_{i}" for i in range(MaxReplaysPerChoice)]

	replaysToReview = pd.DataFrame(columns=columns)

	for agentId, episodeIds in replays.items():
		
		agentType = agentId.split("_")[0]
		ids = []

		for i in range(len(episodeIds)):
			episodeId = episodeIds[i]

			replay = LoadReplay(envName, runGroup, agentType, episodeId)
			if replay is None:
				continue

			ids.append(episodeId)

			if len(ids) >= MaxReplaysPerChoice or i == len(episodeIds) - 1:
				row = {}
				row["AgentId"] = [agentId]
				row["Predicted"] = [None]
				row["AgentType"] = [agentType]
				for i, id in enumerate(ids):
					row[f"Replay_{i}"] = [id]

				replaysToReview = pd.concat([replaysToReview, pd.DataFrame(row)], ignore_index=True)
				ids = []

	return replaysToReview
	
for envName in EnvNames:
	for behaviourType in BehaviouralTypesToReview:

		replaysToReview = CollectReplaysToReview(envName, RunGroup, behaviourType)
		replaysToReview = replaysToReview.sample(frac=1)
		replaysToReviewPath = os.path.join("Data", envName, RunGroup, f"ReplaysToReview_{behaviourType}.json")
		replaysToReview.to_json(replaysToReviewPath, orient="records", indent=4)

		print(f"Collected {len(replaysToReview)} replays to review for {behaviourType} in {envName}")


Collected 92 replays to review for Human in FrozenLake
Collected 85 replays to review for Curated in FrozenLake


## Formate the results of the review

In [12]:
def LoadReplaysToReview(envName, runGroup, behaviourType):
	replaysToReviewPath = os.path.join("Data", envName, runGroup, f"ReplaysToReview_{behaviourType}.json")
	replaysToReview = pd.read_json(replaysToReviewPath)

	grouped = replaysToReview.groupby("AgentType").aggregate({"Predicted": ["count", "sum"]})


	grouped["Percent"] = grouped["Predicted"]["sum"] / grouped["Predicted"]["count"]
	grouped["Norm_Percent"] = grouped["Percent"] / grouped["Percent"]["Human"]

	# format the percent columns
	grouped["Percent"] = grouped["Percent"].apply(lambda x: f"{x:.0%}")
	grouped["Norm_Percent"] = grouped["Norm_Percent"].apply(lambda x: f"{x:.0%}")

	return grouped


for envName in EnvNames:
	for behaviourType in BehaviouralTypesToReview:
		grouped = LoadReplaysToReview(envName, RunGroup, behaviourType)
		print(f"{envName} - {behaviourType}")
		display(grouped)

FrozenLake - Human


Unnamed: 0_level_0,Predicted,Predicted,Percent,Norm_Percent
Unnamed: 0_level_1,count,sum,Unnamed: 3_level_1,Unnamed: 4_level_1
AgentType,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
HardCoded,0,0.0,nan%,nan%
Human,0,0.0,nan%,nan%
ML,0,0.0,nan%,nan%
Random,0,0.0,nan%,nan%


FrozenLake - Curated


Unnamed: 0_level_0,Predicted,Predicted,Percent,Norm_Percent
Unnamed: 0_level_1,count,sum,Unnamed: 3_level_1,Unnamed: 4_level_1
AgentType,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
HardCoded,0,0.0,nan%,nan%
Human,0,0.0,nan%,nan%
ML,0,0.0,nan%,nan%
Random,0,0.0,nan%,nan%
