In [None]:
from matplotlib import pyplot as plt
import matplotlib.colors as colors
%matplotlib inline

import torch
from dqn_arch.model import *
import torchvision
from torchvision import datasets, transforms
import numpy as np
import torch.nn as nn

from itertools import product

device = 'cpu'

In [None]:
from rl.evaluate import evaluate_net
from rl.model import Policy
from rl.envs import make_vec_envs

env = make_vec_envs('BeamRiderNoFrameskip-v4', 1001, 1,

                    None, None, False, 'cpu', False )

state_dict, ob_rms = torch.load('./BeamRiderNoFrameskip-v4.pt')

actor_critic = Policy(env.observation_space.shape, env.action_space, base_kwargs={'recurrent':False})

actor_critic.load_state_dict(state_dict)

In [None]:
zero_vector = np.zeros(actor_critic.get_weight_vector().shape)
seed=8
np.random.seed(seed+1000)
b1 = np.random.normal(loc = zero_vector, scale=1)
b1 = b1/np.linalg.norm(b1)

while True:
    b2 = np.random.normal(loc = zero_vector, scale=1)
    b2 = b2/np.linalg.norm(b2)    
    if abs(np.dot(b1,b2)) < 1e-5:
        break


b1, b2 = actor_critic.filterwisely_normalize(b1), actor_critic.filterwisely_normalize(b2)
b1.shape

r=1.0
scale=11
gx = np.linspace(-r, r, scale)
gy = np.linspace(-r, r,scale)
nx,ny = len(gx), len(gy)
loss_map = np.zeros((nx,ny))
std_map = np.zeros((nx,ny))


tnet = Policy(env.observation_space.shape, env.action_space, base_kwargs={'recurrent':False})
tnet.load_state_dict(actor_critic.state_dict())
origin = tnet.get_weight_vector()    

rep = 5
for i,j in product(range(nx), range(ny)):
    v = origin + b1 * gx[i] + b2 * gy[j]
    tnet.set_weight_vector(v,device)
    loss = []
    for rr in range(rep):
        loss.append(evaluate_net(tnet,env))
    loss_map[i,j] = np.mean(loss)
    std_map[i,j] = np.std(loss)
    print(i,j, loss_map[i,j])

In [None]:
X,Y = np.meshgrid(gx,gy)
plt.figure()
cs = plt.contour(X,Y, loss_map)
plt.clabel(cs, inline=1, fontsize=10)
plt.figure()
plt.contour(X,Y, std_map)

In [None]:
n_train = 25
GP_X= (np.random.random(size=(n_train,2)) -0.5)*0.2
GP_Y=[]

for i, (x1,y1) in enumerate(GP_X):
    vec = origin + b1 * x1 + b2 * y1
    tnet.set_weight_vector(vec, device)
    loss = evaluate_net(tnet, env)
    GP_Y.append(loss)
    print(i, x1, y1, loss)
print()

In [None]:
X,Y = np.meshgrid(gx,gy)
#print(X)
#print(Y)
    
fig, axes = plt.subplots(1,2, figsize=(15,10))
    

cs = axes[0].contour(X,Y,loss_map)
plt.clabel(cs, inline=1, fontsize=10)
axes[0].set_title('Loss')
cs = axes[1].contour(X,Y,std_map )
plt.clabel(cs, inline=1, fontsize=10)
axes[1].set_title('Std')
fig.suptitle('beamrider')


In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, RBF, WhiteKernel, ConstantKernel, ExpSineSquared ,Matern

In [None]:
len(GP_X)

In [None]:
kernel = RBF(1e-5) 
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

gp.fit(GP_X, GP_Y)

In [None]:
gp.log_marginal_likelihood()

In [None]:
test_X

In [None]:
test_X = np.vstack((X.flatten(), Y.flatten())).T
test_X.shape

In [None]:
y_mean, y_cov = gp.predict(test_X, return_cov=True)

posterior_nums = 3



In [None]:
loss_map

In [None]:
y_mean.reshape(-1,scale)

In [None]:
fig, axs = plt.subplots(2,figsize=(10,20))
ax = axs[0]
cs = ax.contour(X, Y, loss_map)
plt.clabel(cs, inline=1, fontsize=10)
ax = axs[1]
cs = ax.contour(X, Y, y_mean.reshape(-1,scale))
plt.clabel(cs, inline=1, fontsize=10)


plt.tight_layout()
plt.show()