# Plot results for Aintelope
Run these blocks for all tests, then scroll to the title you're interested in:


In [23]:
%%capture
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from matplotlib import cm
import matplotlib.collections as mcoll
import matplotlib.path as mpath

from torch import Tensor

import dateutil.parser as dparser
import sys
import os
import seaborn as sns

import aintelope.training.dqn_training
from aintelope.training.dqn_training import ReplayMemory
import aintelope.agents
from aintelope.agents import get_agent_class
#from aintelope.agents.inference_agent import InferenceAgent
from aintelope.environments.savanna_gym import SavannaGymEnv
from omegaconf import DictConfig, OmegaConf

In [14]:
root_dir = os.path.split(os.path.split(os.getcwd())[0])[0]

if root_dir not in sys.path:
    sys.path.append(root_dir)

In [15]:
# Paths and directories
outputs_dir = root_dir+'/outputs/' 
conf_dir = root_dir+'/aintelope/config/config_experiment.yaml'

available_records = os.listdir(outputs_dir)
dirs = [os.path.join(outputs_dir, f) for f in available_records] # add path to each file
dirs.sort(key=lambda x: os.path.getmtime(x))
latest_exp_dir = dirs[-1] #arbitrary use of latest

print(available_records)

#last_checkpoint = latest_exp_dir+"/checkpoints/last.ckpt"

# Load a model for evaluation
def load_checkpoint(PATH):
    #https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
    model = Net()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    #epoch = checkpoint['epoch']
    #loss = checkpoint['loss']

    model.eval()
    return model
    


['hunger_20231127181536']


# Old plots

In [None]:

print(latest_exp_dir)
print(dparser.parse(latest_exp_dir,fuzzy=True))
df = pd.read_csv(latest_exp_dir+"/memory_records.csv")
print(df.columns)

In [None]:
%%capture
'''
WIP, plot what the agent sees (needs changes to InferenceAgent etc.)
Check action values per location. Now expected reward for moving into location, but could also be
eating in any location, or of course mapping where the food/agents are. 
'''
cfg = OmegaConf.load(conf_dir)

# load environment agent
env = SavannaGymEnv(env_params=cfg.hparams.env_params)
env.reset() #this is also init...
# get the brains from memory checkpoints
model = load_checkpoint(last_checkpoint)

In [None]:
from collections import namedtuple

keys = (["agent_coords"] + 
        [f"grass_patch_{i}" for i in range(env.metadata["amount_grass_patches"])] + 
        [f"water_hole_{i}" for i in range(env.metadata["amount_water_holes"])])
StateTuple = namedtuple("StateTuple", {k: np.ndarray for k in keys})

x = []
y = []
event_x = []
event_y = []
event_type = []
food_x = []
food_y = []
water_x = []
water_y = []
for _ ,row in df.iterrows():
    
    state = eval(row['state'])
    #print(state)
    x.append(state[0][0])
    y.append(state[0][1])
    
    #refactor
    food_x.append(state[1][0])
    food_y.append(state[1][1])
    #food_x.append(state[2][0])
    #food_y.append(state[2][1])    

    if row['instinct_events'] != '[]':
        event_x.append(x[-1])
        event_y.append(y[-1])
        event_type.append(row['instinct_events'])

In [None]:
agent_df = pd.DataFrame(data={'x':x, 'y':y})
print(agent_df.head(), len(agent_df))

food_df = pd.DataFrame(data={'x':food_x, 'y':food_y})
print(food_df.head(), len(food_df))

#water_df = pd.DataFrame(data={'x':water_x, 'y':water_y})
#print(water_df.head(), len(water_df))

In [None]:
event_df = pd.DataFrame(data={'x':event_x, 'y':event_y, 'event_type':event_type})
print(len(event_df))
print(event_df.head())

In [None]:
plt.plot(agent_df['x'], agent_df['y'], '.r-')
plt.plot(food_df['x'], food_df['y'], '.g', markersize=15)
#plt.plot(water_df['x'], water_df['y'], '.b', markersize=15)
plt.show()

In [None]:
# plot reward received over time
df['reward'].plot()

In [None]:
color_map = 'autumn' # starts yellow, goes orange, then red
n_points = len(agent_df)

fig = plt.figure()
ax1 = fig.add_subplot(111) 
cm = plt.get_cmap(color_map)
for i in range(10):
    ax1.set_prop_cycle('color', cm(np.linspace(0, 1, n_points - 1, endpoint=False)))
    for i in range(n_points - 1):
        plt.plot(agent_df['x'][i:i+2], agent_df['y'][i:i+2])
plt.plot(food_df['x'], food_df['y'], '.g', markersize=15)
#plt.plot(water_df['x'], water_df['y'], '.b', markersize=15)
plt.show()

# Valuemaps for actions
Note, these maps are new ones and don't correlate with the above ones as we randomly regenerate them.

In [None]:
%%capture
'''
WIP, plot what the agent sees (needs changes to InferenceAgent etc.)
Check action values per location. Now expected reward for moving into location, but could also be
eating in any location, or of course mapping where the food/agents are. 
'''
cfg = OmegaConf.load(conf_dir)

# load environment agent
env = SavannaGymEnv(env_params=cfg.hparams.env_params)
env.reset() #this is also init...
# get the brains from memory checkpoints
model = load_checkpoint(latest_exp_dir+"/checkpoints/") # FIXME was last.cktp

In [None]:
# move the agent into each square and ask for its values for each action, then add that direction into the map
valuemap = np.zeros((env.metadata['map_max']+2,env.metadata['map_max']+2,4))
agent = env.agents[0]

ACTION_MAP = np.array([[0, 1], [1, 0], [0, -1], [-1, 0]]) # This is a copy from savanna.py, should be an accessible param
for x in range(0, env.metadata['map_max']):
    for y in range(0, env.metadata['map_max']):
        if (env.grass_patches == [x,y]).all(1).any():
            continue
        if (env.water_holes == [x,y]).all(1).any():
            continue
        env.set_agent_position(agent, np.array([x,y]))
        observation = env.observe(agent)
        #print(env.agent_states[agent])
        action_vals = model(Tensor(observation)).detach().numpy()
        offset = ACTION_MAP
        for action in range(len(ACTION_MAP)):
            x_ = offset[action][0]+x
            y_ = offset[action][1]+y
            valuemap[x_,y_,action] = action_vals[action]
            
valuemap = np.sum(valuemap,2)/len(ACTION_MAP)

#print(valuemap)

In [None]:
sns.heatmap(valuemap[1:-1,1:-1])

In [None]:
#env.render() isnt working atm
maps = np.zeros((env.metadata['map_max'],env.metadata['map_max']))
for grs in env.grass_patches:
    print(grs[0])
    maps[int(grs[0]),int(grs[1])] = 2.0
for wtr in env.water_holes:
    maps[int(wtr[0]),int(wtr[1])] = 4.0
sns.heatmap(maps)
# RED FOOD, LIGHT water

In [None]:
env.set_agent_position(agent, np.array([2,2]))
observation = env.observe(agent)
action_vals = model(Tensor(observation)).detach().numpy()
print(action_vals)

# Model performance plots
These plots don't have the exploration bonus as confabulators (such as epsilon-greedy).
Run the model N times, then change n_latest to this N and run the block.

In [11]:
def testrun(model,env):
    # Run each model for 10 different resets, with 10 different locations on the map.
    # Gather the cumulative reward, -1 has to be given on each step though
    # Or reset each time the food is found?
    
    action_space = env.action_space
    observation, info = env.reset()
    n_observations = len(observation)

    trainer = Trainer(
        cfg, n_observations, action_space
    ) 
    get_agent_class(cfg.hparams.agent_id)(
        0,
        trainer,
        0,#cfg.hparams.warm_start_steps,
        **cfg.hparams.agent_params,
    )
    epsilon = 0.0
    device = "cpu"
    start_pos = [[0,0],
                [env.metadata['map_max'],env.metadata['map_max']],
                [env.metadata['map_max'],0],
                [0,env.metadata['map_max']]
                ] #list of starting positions for agent, to test robustly each model
    results = []
    rewards = []
    for j in range(len(start_pos)):
        agent.reset()
        env.set_agent_position(agent, np.array(start_pos[j]))
        score = 0
        for i in range(20):
            reward, done = agent.play_step(model, epsilon, device)
            rewards.append(reward)
            score -= 1 # to make sure that longer play is penalized? TODO
            if reward > 0:
                break
        results.append(score)
    return results, rewards


In [18]:
%%capture
# Load all the models. Note that to make a statistical thing, run the training several times and then
# take N-latest.
n_latest = 5 # sample of n runs you've just 'make run-training'ed a moment ago
latest_dirs = dirs[-n_latest:-1]

# Read and sort models for testing
models = []
for exp_dir in latest_dirs:
    mod_dir = os.listdir(exp_dir+"/checkpoints/")
    runs_dir = [os.path.join(exp_dir+"/checkpoints/", m) for m in mod_dir]
    runs_dir.sort(key=lambda x: os.path.getmtime(x))
    models.append(runs_dir)

cfg = OmegaConf.load(conf_dir)
env = SavannaGymEnv(env_params=cfg.hparams.env_params)

results = np.zeros([len(models),len(models[0])])
rewards = []
for i in range(len(models)): # statistical significance
    for j in range(len(models[0])): # number of epochs per run
        model = load_checkpoint(models[i][j])
        model.eval()
        # run model
        result, reward = testrun(model, env)
        rewards.append(reward)
        results[i,j] = sum(result)

IndexError: list index out of range

In [22]:
#print(rewards)
print(dirs)

['/home/joel/project/aintelope/outputs/hunger_20231127181536']


In [None]:
# Plot results
print(results)
print(results.mean(axis=1))
print(results.var(axis=1))
event_df = pd.DataFrame(data={'x':event_x, 'y':event_y})
df['reward'].plot()
# what are these rewards?
# which way is the matrix?
# you should do preset envs with static distances
# check agent epsilon
# add nosmell smell to agent somehow
# figure out REWARD...
# qagent 120 add reward for emotions
#print(agent.)

In [None]:
hparams = cfg.hparams
print(hparams.agent_params)

In [None]:
import math
import random
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from aintelope.agents.memory import ReplayBuffer, Experience

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))


class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)