# Benchmarcks and plots of SVM PPO optim envs

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
import pickle

%matplotlib qt

In [None]:
## All the data stored in the run folder file
run = 'run_6/'
name_dir = 'runs_optim_envs/'+run

## print info of this run
info = pickle.load(open(name_dir+'info.p', 'rb'))
for k in info.keys():
    print(k, ' : ', info[k])

In [None]:
## Example of data stored and number of episodes
rew_files = name_dir+'rew*'
num_ep = len(glob.glob(rew_files))
print('The number of episodes is then:', num_ep)

In [None]:
## Score data analysis
scores = np.zeros(num_ep)
for i in range(num_ep):
    rew = pickle.load(open(name_dir+f'rew_{i}.p', 'rb'))
    scores[i] = np.mean([np.sum(ep_rews) for ep_rews in rew])
    
## Max and min
index_max = np.argmax(scores)
index_min = np.argmin(scores)
print(f'The max score is at episode {index_max} and is equal to {scores[index_max]}')
print(f'The min score is at episode {index_min} and is equal to {scores[index_min]}')
    
## Plot scores    
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')

In [None]:
## Rewards of episode with max score
file_rew_max = name_dir+f'rew_{index_max}.p'
rew_max = pickle.load(open(file_rew_max, 'rb'))
print(rew_max)
plt.plot(np.arange(len(rew_max)), [np.sum(ep_rews) for ep_rews in rew_max])
plt.ylabel('Reward')
plt.xlabel('Step #')

print('This episode collected a score of = ', np.mean([np.sum(ep_rews) for ep_rews in rew_max]))

In [None]:
## Plot energies of episode with max score
file_en_max = name_dir+f'en_{index_max}.p'
en_max = pickle.load(open(file_en_max, 'rb'))
print(en_max)
plt.plot(np.arange(len(en_max)), [en_max[i,-1] for i in range(len(en_max))])
plt.ylabel('Energy')
plt.xlabel('Step #')

print('The last energy of the episode with max score is = ', en_max[-1,-1])

In [None]:
## Plot princip dim of episode with max score
file_pri_max = name_dir+f'pri_dim_{index_max}.p'
pri_max = pickle.load(open(file_pri_max, 'rb'))
print(pri_max)
plt.plot(np.arange(len(pri_max)), [np.mean(pri_max[i]) for i in range (len(pri_max))])
plt.ylabel('Pri dims')
plt.xlabel('Step #')

print('The mean over trajs of last prin dim of the episode with max score is = ', np.mean(pri_max[-1]))

In [None]:
## Plot full dim of episode with max score
file_full_max = name_dir+f'full_dim_{index_max}.p'
full_max = pickle.load(open(file_full_max, 'rb'))
print(full_max)
plt.plot(np.arange(len(full_max)), [np.mean(full_max[i]) for i in range (len(full_max))])
plt.ylabel('Full dims')
plt.xlabel('Step #')

print('The mean over trajs of last full dim of the episode with max score is = ', np.mean(full_max[-1]))

In [None]:
## Sigmas analysis (comparing with random and the one collected in the epsidoe with max score)
sigmas_random = np.loadtxt('sigmas_random.out')
sigmas_random_squared = sigmas_random[:,0]**2 + sigmas_random[:,1]**2 + sigmas_random[:,2]**2

file_sigmas_max = name_dir+f'sigmas_{index_max}.p'
sigmas_rl = pickle.load(open(file_sigmas_max, 'rb'))
sigmas_rl = sigmas_rl*109/2 + 111/2
sigmas_rl_last = sigmas_rl[-1,-1]
print(sigmas_rl_last)
sigmas_rl_squared = sigmas_rl_last[:,0]**2 + sigmas_rl_last[:,1]**2 + sigmas_rl_last[:,2]**2
sigmas_rl_squared = sigmas_rl_squared.detach().numpy()
plt.hist(sigmas_random_squared, bins=15, alpha=0.5)
plt.hist(sigmas_rl_squared, bins=15, alpha=0.5)

In [None]:
## Rewards of episode with min score
file_rew_min = name_dir+f'rew_{index_min}.p'
rew_min = pickle.load(open(file_rew_min, 'rb'))
print(rew_min)
plt.plot(np.arange(len(rew_min)), [np.sum(ep_rews) for ep_rews in rew_min])
plt.ylabel('Reward')
plt.xlabel('Step #')
print('This episode collected a score of = ', np.mean([np.sum(ep_rews) for ep_rews in rew_max]))

In [None]:
## Plot energies of episode with min score
file_en_min = name_dir+f'en_{index_min}.p'
en_min = pickle.load(open(file_en_min, 'rb'))
print(en_min)
plt.plot(np.arange(len(en_min)), [en_min[i,-1] for i in range(len(en_min))])
plt.ylabel('Energy')
plt.xlabel('Step #')

print('The last energy of the episode with max score is = ', en_min[-1,-1])

In [None]:
## Plot princip dim of episode with minx score
file_pri_min = name_dir+f'pri_dim_{index_min}.p'
pri_min = pickle.load(open(file_pri_min, 'rb'))
print(pri_min)
plt.plot(np.arange(len(pri_min)), [np.mean(pri_min[i]) for i in range (len(pri_min))])
plt.ylabel('Pri dims')
plt.xlabel('Step #')

print('The mean over trajs of last prin dim of the episode with max score is = ', np.mean(pri_min[-1]))

In [None]:
## Plot full dim of episode with min score
file_full_min = name_dir+f'full_dim_{index_min}.p'
full_min = pickle.load(open(file_full_min, 'rb'))
print(full_min)
plt.plot(np.arange(len(full_min)), [np.mean(full_min[i]) for i in range (len(full_min))])
plt.ylabel('Full dims')
plt.xlabel('Step #')

print('The mean over trajs of last full dim of the episode with max score is = ', np.mean(full_min[-1]))

In [None]:
## Sigmas analysis (comparing with random and the one collected in the epsidoe with min score)
sigmas_random = np.loadtxt('sigmas_random.out')
sigmas_random_squared = sigmas_random[:,0]**2 + sigmas_random[:,1]**2 + sigmas_random[:,2]**2

file_sigmas_min = name_dir+f'sigmas_{index_min}.p'
sigmas_rl = pickle.load(open(file_sigmas_min, 'rb'))
sigmas_rl = sigmas_rl*109/2 + 111/2
sigmas_rl_last = sigmas_rl[-1,-1]
print(sigmas_rl_last)
sigmas_rl_squared = sigmas_rl_last[:,0]**2 + sigmas_rl_last[:,1]**2 + sigmas_rl_last[:,2]**2
sigmas_rl_squared = sigmas_rl_squared.detach().numpy()
plt.hist(sigmas_random_squared, bins=15, alpha=0.5)
plt.hist(sigmas_rl_squared, bins=15, alpha=0.5)