# Analysis of competition results

In [4]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pickle
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import pylab as plt

import DDPG
import torch

## 0.0 Load a pkl from the competition

In [34]:
# load and check some values of a pkl from one game in the competition

with open('results_competition/f98b6cbf-bb38-45f8-81d1-c01ac3465613.pkl', 'rb') as f:
    data = pickle.load(f)
    
print(data.keys())

print(data["num_rounds"])

# we are playing a maximum of 4 rounds, each having 251 actions
print(data["actions"].shape)

# each round has 251 actions and 252 observations
print(len(data["actions_round_0"]))
print(len(data["observations_round_0"]))

# each action includes 8 datapoints (do nothing, move, shoot, ...)
# each observation includes 18 datapoints (x pos player one, y pos player one, ...)
print(len(data["actions_round_0"][0]))
print(len(data["observations_round_0"][0]))


# more evaluations follow

dict_keys(['actions_round_0', 'observations_round_0', 'actions_round_1', 'observations_round_1', 'actions_round_2', 'observations_round_2', 'actions_round_3', 'observations_round_3', 'num_rounds', 'actions'])
[array([4])]
(1004, 2, 4)
251
252
8
18


## 0.1 Helper Functions

In [5]:
def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / float(N)    

In [6]:
def run(env, agent, n_episodes=100, noise=0):
    rewards = []
    observations = []
    actions = []
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state, _info = env.reset()
        for t in range(2000):
            action = agent.act(state, noise)
            state, reward, done, _trunc, _info = env.step(action)
            observations.append(state)
            actions.append(action)
            ep_reward += reward
            if done or _trunc:
                break
        rewards.append(ep_reward)
        ep_reward = 0
    print(f'Mean reward: {np.mean(rewards)}')
    observations = np.asarray(observations)
    actions = np.asarray(actions)
    return observations, actions, rewards

## TODO 1. Analyse actions and observations