# Results Formatting

This notebook was used for collecting and formatting the results of the experiments, for the evaluation section of the paper.

### Imports

In [19]:
import pandas as pd
import os
import src.Common.Utils.Config.ConfigHelper as ConfigHelper
import shutil
import src.Common.EpisodeReplay.EpisodeReplay as EpisodeReplay
from tqdm import tqdm
import pyperclip as pc
import scipy.stats as stats
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Define Constants

In [280]:
RunGroup = "13"
EnvNames = ["FrozenLake", "CartPole"]
BehaviouralTypes = ["Human", "Curated", "HighScore"]
BehaviouralTypesToReview = ["Human", "Curated"]
AgentTypes = ["HardCoded", "ML", "Random", "Human"]

# manual Review of the results config
MaxChoicesPerAgent = 5
MaxReplaysPerChoice = 5


In [131]:
# copy demos of each behavioural type to the run group folder
def CopyDemos(envName, runGroup):
	fromPath = os.path.join("Data", envName, "dev", "replays", "Human")
	toPath = os.path.join("Data", envName, runGroup, "replays", "Human")

	if os.path.exists(toPath):
		shutil.rmtree(toPath)
	shutil.copytree(fromPath, toPath)

	print("Copied Human Demos for: " + envName)
	return

def AddDemoIdsToBehaviour(envName, runGroup, behaviourType):
	# load the results
	statsPath = os.path.join("Data", envName, runGroup, "replays", "Human", "stats.tsv")
	stats = pd.read_csv(statsPath, sep="\t")

	lowerBehaviourType = behaviourType.lower()

	stats["Behaviour"] = stats["metricName"].apply(lambda x: x.split("_")[-2])

	if lowerBehaviourType != "highscore":
		stats = stats[stats["Behaviour"] == lowerBehaviourType]

	episodeIds = stats["EpisodeId"].unique().tolist()

	metricName = "Human_" + behaviourType

	# load the json with the episode Ids of the behavioural type
	episodeIdsPath = os.path.join("Data", envName, runGroup, f"{behaviourType}_Episodes.json")
	
	episodeIdsJson = ConfigHelper.LoadConfig(episodeIdsPath)
	episodeIdsJson[metricName] = episodeIds

	ConfigHelper.SaveConfig(episodeIdsJson, episodeIdsPath)

	print(f"Added {len(episodeIds)} {behaviourType} episodes to {envName}")
	return

for envName in EnvNames:
	CopyDemos(envName, RunGroup)

	for behaviourType in BehaviouralTypes:
		AddDemoIdsToBehaviour(envName, RunGroup, behaviourType)


Copied Human Demos for: FrozenLake
Added 59 Human episodes to FrozenLake
Added 23 Curated episodes to FrozenLake
Added 82 HighScore episodes to FrozenLake


## Collect Replays For Manual Reviewed

In [286]:
def LoadReplay(envName, runGroup, agentType, episodeId):
	
	path = os.path.join("Data", envName, runGroup, "replays", agentType, episodeId)
	try:
		replay = EpisodeReplay.EpisodeReplay.LoadFromFolder(path)
		return replay
	except:
		return None

def CollectReplaysToReview(envName, runGroup, behaviourType):
	episodeIdsPath = os.path.join("Data", envName, runGroup, f"{behaviourType}_Episodes.json")
	replays =  ConfigHelper.LoadConfig(episodeIdsPath)

	columns = ["AgentId", "Predicted", "AgentType"]
	columns += [f"Replay_{i}" for i in range(MaxReplaysPerChoice)]

	replaysToReview = pd.DataFrame(columns=columns)

	for agentId, episodeIds in replays.items():
		
		agentType = agentId.split("_")[0]
		ids = []

		for i in range(len(episodeIds)):
			episodeId = episodeIds[i]

			replay = LoadReplay(envName, runGroup, agentType, episodeId)
			if replay is None:
				continue

			ids.append(episodeId)

			if len(ids) >= MaxReplaysPerChoice or i == len(episodeIds) - 1:
				row = {}
				row["AgentId"] = [agentId]
				row["Predicted"] = [None]
				row["AgentType"] = [agentType]
				for i, id in enumerate(ids):
					row[f"Replay_{i}"] = [id]

				replaysToReview = pd.concat([replaysToReview, pd.DataFrame(row)], ignore_index=True)
				ids = []

	return replaysToReview
	
for envName in EnvNames:
	for behaviourType in BehaviouralTypesToReview:

		replaysToReview = CollectReplaysToReview(envName, RunGroup, behaviourType)
		replaysToReview = replaysToReview.sample(frac=1)
		replaysToReviewPath = os.path.join("Data", envName, RunGroup, f"ReplaysToReview_{behaviourType}.json")

		replaysToReview.to_json(replaysToReviewPath, orient="records", indent=4)

		print(f"Collected {len(replaysToReview)} replays to review for {behaviourType} in {envName}")


Collected 92 replays to review for Human in FrozenLake
Collected 39 replays to review for Curated in FrozenLake


Exception: configPath Data\CartPole\13\Human_Episodes.json does not exist

## Formate the results of the review

In [3]:
def CalculateBinaryCI(positives, totalCount, confidenceLevel=0.95):
	# Calculate the sample proportion (p)
	p = positives / totalCount
	
	# Calculate the standard error
	se = math.sqrt((p * (1 - p)) / totalCount)
	
	# Calculate the Z-score for the desired confidence level
	z = stats.norm.ppf(1 - (1 - confidenceLevel) / 2)
	
	# Calculate the margin of error
	margin_of_error = z * se
	
	# Calculate the lower and upper bounds of the confidence interval
	lower_bound = p - margin_of_error
	upper_bound = p + margin_of_error
	
	return lower_bound, upper_bound

In [26]:
def CalStandardError(positives, totalCount):
	p = positives / totalCount
	return math.sqrt((p * (1 - p)) / totalCount)

In [31]:
def LoadReplaysToReview(envName, runGroup, behaviourType):
	replaysToReviewPath = os.path.join("Data", envName, runGroup, f"ReplaysToReview_{behaviourType}.json")
	replaysToReview = pd.read_json(replaysToReviewPath)

	grouped = replaysToReview.groupby("AgentType").aggregate({"Predicted": ["count", "sum"]})


	grouped["Percent"] = grouped["Predicted"]["sum"] / grouped["Predicted"]["count"]
	grouped["Norm_Percent"] = grouped["Percent"] / grouped["Percent"]["Human"]

	# calculate the confidence intervals
	grouped["Error"] = grouped.apply(lambda x: CalStandardError(x["Predicted"]["sum"], x["Predicted"]["count"]), axis=1)
	return grouped

for envName in EnvNames:
	for behaviourType in BehaviouralTypesToReview:
		grouped = LoadReplaysToReview(envName, RunGroup, behaviourType)
		print(f"{envName} - {behaviourType}")
		display(grouped)

FrozenLake - Human


Unnamed: 0_level_0,Predicted,Predicted,Percent,Norm_Percent,Error
Unnamed: 0_level_1,count,sum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AgentType,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
HardCoded,20,1,0.05,0.066667,0.048734
Human,12,9,0.75,1.0,0.125
ML,40,34,0.85,1.133333,0.056458
Random,20,0,0.0,0.0,0.0


FrozenLake - Curated


  p = positives / totalCount


Unnamed: 0_level_0,Predicted,Predicted,Percent,Norm_Percent,Error
Unnamed: 0_level_1,count,sum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AgentType,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
HardCoded,0,0.0,,,
Human,0,0.0,,,
ML,0,0.0,,,
Random,0,0.0,,,


In [49]:
import math

positiveCount = 16
totalCount = 20


p = positiveCount / totalCount
error = math.sqrt((p * (1 - p)) / totalCount)

p *= 100
error *= 100
print(f"{p:.3g}% ({error:.3g}%)")

80% (8.94%)


## Formating tables and graphs

In [50]:
def CollectEvalIds(runGroup, envNames, behaviouralTypes):
	columns = ["EnvName", "AgentId", "AgentType", "EpisodeId", "Behaviour"]
	df = pd.DataFrame(columns=columns)

	for envName in envNames:
		for behaviourType in behaviouralTypes:
			episodeIdsPath = os.path.join("Data", envName, runGroup, f"{behaviourType}_Episodes.json")
			replays =  ConfigHelper.LoadConfig(episodeIdsPath)


			for agentId, episodeIds in replays.items():
				
				agentType = agentId.split("_")[0]

				for i in range(len(episodeIds)):
					episodeId = episodeIds[i]

					row = {}
					row["EnvName"] = [envName]
					row["AgentId"] = [agentId]
					row["AgentType"] = [agentType]
					row["Behaviour"] = [behaviourType]
					row["EpisodeId"] = [episodeId]
					df = pd.concat([df, pd.DataFrame(row)], ignore_index=True)

	return df

evalIds = CollectEvalIds(RunGroup, EnvNames, BehaviouralTypes)

In [51]:
def CleanStats(df):
	prefixesToDrop = ["LearnerConfig", "ModelConfigs", "DataTables"]
	columnsToDrop = [col for col in df.columns if col.startswith(tuple(prefixesToDrop))]
	df = df.drop(columns=columnsToDrop)

	# add duration column
	df["Duration"] = (df["EndTime"] - df["StartTime"])  / 1e9
	return df

def CombinedStats(runGroup, envNames, agentTypes):
	combinedStats = None

	for envName in envNames:
		for agentType in agentTypes:
			statsPath = os.path.join("Data", envName, runGroup, "replays", agentType, "stats.tsv")
			stats = pd.read_csv(statsPath, sep="\t")
			stats = CleanStats(stats)

			combinedStats = pd.concat([combinedStats, stats], ignore_index=True)
	return combinedStats

stats = CombinedStats(RunGroup, EnvNames, AgentTypes)
evalDf = pd.merge(evalIds, stats, on=["EpisodeId"], how="left")

# drop rows with nan values
evalDf = evalDf.dropna()

In [None]:
def ConvertToLatex(df):
	# Get column names
	columns = df.columns.tolist()

	headerCode = "\hline\n"
	headerCode += "\t\multicolumn{1}{|c|}{\\textbf{"
	headerCode += "}} &\n\t\multicolumn{1}{c|}{\\textbf{".join(columns)

	headerCode += "}} \\\\\n\hline\n"


	# Generate LaTeX table code
	latex_code = "\\begin{longtable}{|" + "c|" * len(columns) + "}\n"

	# add caption and label
	latex_code += "\\caption{Insert Caption Here.}\n"
	latex_code += "\\label{tab:InsertLabelHere} \\\\\n"

	latex_code += headerCode
	latex_code += "\endfirsthead\n\n"

	latex_code += "\multicolumn{" + str(len(columns)) + "}{c}%\n"
	latex_code += "{{\\bfseries \\tablename\\ \\thetable{} -- continued from previous page}} \\\\\n"
	latex_code += headerCode
	latex_code += "\endhead\n\n"

	latex_code += "\hline \multicolumn{" + str(len(columns)) + "}{|c|}{{Continued on next page}} \\\\ \hline\n\n"
	latex_code += "\endfoot\n"

	latex_code += "\hline\n"
	latex_code += "\endlastfoot\n"

	latex_code += "\n"


	# Add data rows
	for index, row in df.iterrows():
		values = row.tolist()
		latex_code += "\t" + " & ".join(str(value) for value in values) + " \\\\\n"
	latex_code += "\\hline\n"
	# Complete LaTeX table code
	latex_code += "\\end{longtable}"

	return latex_code

In [52]:
aggregateSettings = {}
aggregateSettings["EpisodeId"] = "count"
aggregateSettings["Duration"] = ["mean"]
aggregateSettings["EpisodeTotalReward"] = ["mean", "std"]
aggregateSettings["EpisodeTotalCuratedReward"] = ["mean", "std"]

evalDf.groupby(["EnvName", "Behaviour", "AgentId"]).aggregate(aggregateSettings)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,EpisodeId,Duration,EpisodeTotalReward,EpisodeTotalReward,EpisodeTotalCuratedReward,EpisodeTotalCuratedReward
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,mean,std,mean,std
EnvName,Behaviour,AgentId,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FrozenLake,Curated,ML_D_10_RT_False_Curated,100,0.424349,1.0,0.0,0.0,0.0
FrozenLake,Curated,ML_D_10_RT_True_Curated,100,0.250854,1.0,0.0,0.0,0.0
FrozenLake,HighScore,HardCoded_D_1_RT_True_HighScore,1000,0.020397,1.0,0.0,0.0,0.0
FrozenLake,HighScore,ML_D_10_RT_False_HighScore,1000,0.380918,1.0,0.0,0.0,0.0
FrozenLake,HighScore,ML_D_10_RT_True_HighScore,1000,0.138895,1.0,0.0,0.0,0.0
FrozenLake,HighScore,Random_D_1_RT_True_HighScore,1008,2.385675,0.03869,0.192952,0.124008,0.329754
FrozenLake,Human,HardCoded_D_1_RT_True_Human,100,0.014433,1.0,0.0,0.0,0.0
FrozenLake,Human,ML_D_10_RT_False_Human,100,0.884688,1.0,0.0,0.0,0.0
FrozenLake,Human,ML_D_10_RT_True_Human,100,0.345229,1.0,0.0,0.0,0.0
FrozenLake,Human,Random_D_1_RT_True_Human,116,1.755059,0.025862,0.159412,0.103448,0.305865


In [None]:


evalDf[evalDf["Behaviour"] == "HighScore"].groupby(["EnvName", "AgentId"])[["EpisodeTotalReward"]].aggregate(aggregateSettings["EpisodeTotalReward"])

Unnamed: 0_level_0,Unnamed: 1_level_0,EpisodeTotalReward,EpisodeTotalReward
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
EnvName,AgentId,Unnamed: 2_level_2,Unnamed: 3_level_2
FrozenLake,HardCoded_D_1_RT_True_HighScore,1.0,0.0
FrozenLake,ML_D_10_RT_False_HighScore,1.0,0.0
FrozenLake,ML_D_10_RT_True_HighScore,1.0,0.0
FrozenLake,Random_D_1_RT_True_HighScore,0.03869,0.192952


In [74]:
def CreateEnvAgentTypeTable(evalDf, envNames, agentTypes, metric):

	columns = ["EnvName"]
	columns += agentTypes

	df = pd.DataFrame(columns=columns)

	for envName in envNames:
		row = {}
		row["EnvName"] = [envName]

		for agentType in agentTypes:
			agentTypeDf = evalDf[evalDf["AgentType"] == agentType]
			agentTypeDf = agentTypeDf[agentTypeDf["EnvName"] == envName]

			avg = agentTypeDf[metric].mean()
			error = agentTypeDf[metric].std()
			cell = f"{avg:.2f} ({error:.2f})"
			row[agentType] = [cell]

		df = pd.concat([df, pd.DataFrame(row)], ignore_index=True)

	
	# set the index to be the env name
	df = df.set_index("EnvName")

	text = ConvertToLatex(df)
	pc.copy(text)
	print(text.replace("\n", " "))
	display(df)
	print("Copied to clipboard")

	return

In [None]:
curatedDf = evalDf[evalDf["Behaviour"] == "Curated"]
humanDf = evalDf[evalDf["Behaviour"] == "Human"]
highScoreDf = evalDf[evalDf["Behaviour"] == "HighScore"]

NameError: name 'evalDf' is not defined

In [None]:
CreateEnvAgentTypeTable(highScoreDf, EnvNames, AgentTypes, "EpisodeTotalReward")

\begin{longtable}{|c|c|c|c|} \caption{Insert Caption Here.} \label{tab:InsertLabelHere} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endfirsthead  \multicolumn{4}{c}% {{\bfseries \tablename\ \thetable{} -- continued from previous page}} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endhead  \hline \multicolumn{4}{|c|}{{Continued on next page}} \\ \hline  \endfoot \hline \endlastfoot  	1.00 ±0.00 & 1.00 ±0.00 & 0.04 ±0.19 & nan ±nan \\ \hline \end{longtable}


Unnamed: 0_level_0,HardCoded,ML,Random,Human
EnvName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FrozenLake,1.00 ±0.00,1.00 ±0.00,0.04 ±0.19,nan ±nan


Copied to clipboard


In [None]:
CreateEnvAgentTypeTable(curatedDf, EnvNames, AgentTypes, "EpisodeTotalCuratedReward")

\begin{longtable}{|c|c|c|c|} \caption{Insert Caption Here.} \label{tab:InsertLabelHere} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endfirsthead  \multicolumn{4}{c}% {{\bfseries \tablename\ \thetable{} -- continued from previous page}} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endhead  \hline \multicolumn{4}{|c|}{{Continued on next page}} \\ \hline  \endfoot \hline \endlastfoot  	1.00 ±0.00 & 0.00 ±0.00 & 0.09 ±0.29 & nan ±nan \\ \hline \end{longtable}


Unnamed: 0_level_0,HardCoded,ML,Random,Human
EnvName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FrozenLake,1.00 ±0.00,0.00 ±0.00,0.09 ±0.29,nan ±nan


Copied to clipboard


In [53]:
CreateEnvAgentTypeTable(highScoreDf, EnvNames, AgentTypes, "Duration")

\begin{longtable}{|c|c|c|c|} \caption{Insert Caption Here.} \label{tab:InsertLabelHere} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endfirsthead  \multicolumn{4}{c}% {{\bfseries \tablename\ \thetable{} -- continued from previous page}} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endhead  \hline \multicolumn{4}{|c|}{{Continued on next page}} \\ \hline  \endfoot \hline \endlastfoot  	0.02 & 0.26 & 2.39 & nan \\ \hline \end{longtable}


Unnamed: 0_level_0,HardCoded,ML,Random,Human
EnvName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FrozenLake,0.02,0.26,2.39,


Copied to clipboard


In [56]:
highScoreDf["TimePerStep"] = highScoreDf["Duration"] / highScoreDf["EpisodeSteps"]

highScoreDf["TimePerStep"]

690     9.979701
691     7.378715
692     6.106398
693     2.483409
694     1.422509
          ...   
4705    0.002112
4706    0.002134
4707    0.002237
4708    0.002278
4709    0.002250
Name: TimePerStep, Length: 4008, dtype: float64

In [75]:
CreateEnvAgentTypeTable(highScoreDf, EnvNames, AgentTypes, "TimePerStep")

\begin{longtable}{|c|c|c|c|} \caption{Insert Caption Here.} \label{tab:InsertLabelHere} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endfirsthead  \multicolumn{4}{c}% {{\bfseries \tablename\ \thetable{} -- continued from previous page}} \\ \hline 	\multicolumn{1}{|c|}{\textbf{HardCoded}} & 	\multicolumn{1}{c|}{\textbf{ML}} & 	\multicolumn{1}{c|}{\textbf{Random}} & 	\multicolumn{1}{c|}{\textbf{Human}} \\ \hline \endhead  \hline \multicolumn{4}{|c|}{{Continued on next page}} \\ \hline  \endfoot \hline \endlastfoot  	0.01 (0.00) & 0.06 (0.67) & 0.36 (0.12) & nan (nan) \\ \hline \end{longtable}


Unnamed: 0_level_0,HardCoded,ML,Random,Human
EnvName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FrozenLake,0.01 (0.00),0.06 (0.67),0.36 (0.12),nan (nan)


Copied to clipboard


In [229]:
envName = "CartPole"

In [220]:
envName = "FrozenLake"

In [263]:
runGroup = "dev"
agent = "HardCoded"
stats = pd.read_csv(f"Data//{envName}//{runGroup}//replays//{agent}//stats.tsv", sep="\t")


Unnamed: 0,StartTime,EndTime,EpisodeId,Terminated,Truncated,EpisodeTotalReward,EpisodeTotalCuratedReward,EpisodeSteps,metricName,Agent,...,ModelConfigs_PlayStyle_Discriminator_MaxDeploymentBatchSize,ModelConfigs_PlayStyle_Discriminator_DataTable,ModelConfigs_PlayStyle_Discriminator_ReplayExamples,ModelConfigs_PlayStyle_Discriminator_LearningRate,ModelConfigs_PlayStyle_Discriminator_DenseLayers,ModelConfigs_PlayStyle_Discriminator_Activations,ModelConfigs_PlayStyle_Discriminator_Dropout,ModelConfigs_PlayStyle_Discriminator_L1,ModelConfigs_PlayStyle_Discriminator_L2,DataTables
0,1694554919748454300,1694554921488929900,2023-09-12_22-41-59-748,False,True,500.0,324.213065,500,HardCoded_D_1_T_1_RT_True_HighScore,HardCoded,...,2048,PlayStyle_Trajectories,curated,0.001,[256],relu,0.5,0.5,0.5,"[{'TableName': 'Forward_Trajectories', 'StepCo..."
1,1694554922133490800,1694554924172616700,2023-09-12_22-42-02-133,False,True,500.0,341.217789,500,HardCoded_D_1_T_1_RT_True_HighScore,HardCoded,...,2048,PlayStyle_Trajectories,curated,0.001,[256],relu,0.5,0.5,0.5,"[{'TableName': 'Forward_Trajectories', 'StepCo..."
2,1694554924890528600,1694554926591721100,2023-09-12_22-42-04-890,False,True,500.0,386.926498,500,HardCoded_D_1_T_1_RT_True_HighScore,HardCoded,...,2048,PlayStyle_Trajectories,curated,0.001,[256],relu,0.5,0.5,0.5,"[{'TableName': 'Forward_Trajectories', 'StepCo..."
3,1694554927202768500,1694554928969381100,2023-09-12_22-42-07-202,False,True,500.0,358.574428,500,HardCoded_D_1_T_1_RT_True_HighScore,HardCoded,...,2048,PlayStyle_Trajectories,curated,0.001,[256],relu,0.5,0.5,0.5,"[{'TableName': 'Forward_Trajectories', 'StepCo..."
4,1694554929588415700,1694554932137082700,2023-09-12_22-42-09-588,False,True,500.0,330.244899,500,HardCoded_D_1_T_1_RT_True_HighScore,HardCoded,...,2048,PlayStyle_Trajectories,curated,0.001,[256],relu,0.5,0.5,0.5,"[{'TableName': 'Forward_Trajectories', 'StepCo..."


In [266]:
stats["Duration"] = (stats["EndTime"] - stats["StartTime"])  / 1e9
stats["DurationPerStep"] = stats["Duration"] / stats["EpisodeSteps"]

In [273]:
keys = ["EpisodeTotalReward", "EpisodeTotalCuratedReward", "DurationPerStep"]

print(f"Stats for {agent} in {envName}")

for key in keys:

	values = stats[key].values

	print(f"{key:<30}: {values.mean():.3f} ({values.std():.3f})")

Stats for HardCoded in CartPole
EpisodeTotalReward            : 500.000 (0.000)
EpisodeTotalCuratedReward     : 347.596 (57.922)
DurationPerStep               : 0.004 (0.001)


In [401]:
stats = pd.read_csv(f"Data//FrozenLake//dev//replays//HardCoded//stats.tsv", sep="\t")

logName = "loggerName"
if "metricName" in stats.columns:
	logName = "metricName"
stats[logName].unique()

ParserError: Error tokenizing data. C error: Expected 105 fields in line 202, saw 109


In [366]:
behaviourMatch = stats[logName].str.lower().str.contains("curated")

temp = stats[behaviourMatch]
temp = temp[temp["Agent"] == "ML"]

In [403]:
keys = ["EpisodeTotalReward", "EpisodeTotalCuratedReward", "DurationPerStep"]
# keys = ["EpisodeTotalCuratedReward"]

runGroup = "dev"
for envName in EnvNames:
	for agent in AgentTypes:
		try:
			stats = pd.read_csv(f"Data//{envName}//{runGroup}//replays//{agent}//stats.tsv", sep="\t")
		

			stats["Duration"] = (stats["EndTime"] - stats["StartTime"])  / 1e9
			stats["DurationPerStep"] = stats["Duration"] / stats["EpisodeSteps"]

			if envName == "CartPole":
				stats["EpisodeTotalCuratedReward"] = stats["EpisodeTotalCuratedReward"] / stats["EpisodeSteps"]

			print()
			print(f"Stats for {agent} in {envName} runGroup: {runGroup}")

			for key in keys:

				values = stats[key].values


				if key == "EpisodeTotalCuratedReward":
					logName = "loggerName"
					if "metricName" in stats.columns:
						logName = "metricName"
					
					metricNames = stats[logName].str.lower()
					behaviourMatch = metricNames.str.contains("curated")

					values = stats[behaviourMatch][key].values


				print(f"{key:<30}: {values.mean():.2f} ({values.std():.2f})")

				
		except Exception as e:
			# print(f"Failed to load stats for {agent} in {envName} {e}")
			pass
			


Stats for Human in FrozenLake runGroup: dev
EpisodeTotalReward            : 0.96 (0.19)
EpisodeTotalCuratedReward     : 1.00 (0.00)
DurationPerStep               : 0.41 (0.24)

Stats for ML in CartPole runGroup: dev
EpisodeTotalReward            : 496.60 (10.20)
EpisodeTotalCuratedReward     : nan (nan)
DurationPerStep               : 0.25 (0.06)

Stats for Random in CartPole runGroup: dev
EpisodeTotalReward            : 22.08 (11.60)
EpisodeTotalCuratedReward     : nan (nan)
DurationPerStep               : 0.07 (0.02)

Stats for Human in CartPole runGroup: dev
EpisodeTotalReward            : 189.00 (139.51)
EpisodeTotalCuratedReward     : 0.78 (0.07)
DurationPerStep               : 0.43 (0.38)


  print(f"{key:<30}: {values.mean():.2f} ({values.std():.2f})")
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  print(f"{key:<30}: {values.mean():.2f} ({values.std():.2f})")
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
