In [1]:
%set_env CUBLAS_WORKSPACE_CONFIG=:4096:8

env: CUBLAS_WORKSPACE_CONFIG=:4096:8


In [2]:
import torch
import argparse
from pydreamer.models.dreamer import Dreamer
from pydreamer.tools import mlflow_load_checkpoint
from pydreamer.tools import (configure_logging, mlflow_log_params,
                             mlflow_init, print_once, read_yamls)
from distutils.util import strtobool
import os
import json
import mlflow
from torchsummary import summary
import pickle
from PIL import Image
import numpy as np


def get_worker_info():
    worker_type = None
    worker_index = None

    if 'TF_CONFIG' in os.environ:
        # TF_CONFIG indicates Google Vertex AI run
        tf_config = json.loads(os.environ['TF_CONFIG'])
        print_once('TF_CONFIG is set:', tf_config)
        if tf_config['cluster'].get('worker'):
            # If there are workers in the cluster, then it's a distributed run
            worker_type = {
                'chief': 'learner',
                'worker': 'generator',
            }[str(tf_config['task']['type'])]
            worker_index = int(tf_config['task']['index'])
            print_once('Distributed run detected, current worker is:', f'{worker_type} ({worker_index})')

    return worker_type, worker_index

configure_logging('[launcher]')
parser = argparse.ArgumentParser()
parser.add_argument('--configs', nargs='+', required=True)
# args, remaining = parser.parse_known_args()
#--configs defaults atari --env_id Atari-Alien-V5
# Config from YAML
args_list = ['defaults', 'atari']
remaining = ['--env_id', 'Atari-Alien-V5']
conf = {}
configs = read_yamls('./config')
for name in args_list:
    if ',' in name:
        for n in name.split(','):
            conf.update(configs[n])
    else:
        conf.update(configs[name])

# Override config from command-line

parser = argparse.ArgumentParser()
for key, value in conf.items():
    type_ = type(value) if value is not None else str
    if type_ == bool:
        type_ = lambda x: bool(strtobool(x))
    parser.add_argument(f'--{key}', type=type_, default=value)
conf = parser.parse_args(remaining)

print(conf)

worker_type, worker_index = get_worker_info()
is_main_worker = worker_type is None or worker_type == 'learner'
mlrun = mlflow_init(wait_for_resume=not is_main_worker)
artifact_uri = mlrun.info.artifact_uri
mlflow_log_params(vars(conf))
model = Dreamer(conf)
optimizers=tuple()
map_location=None#Same place that it is loaded from
path = "/home/theomichel/work/pyDreamer/pydreamer-minigrid/mlruns/0/c1a7c69b35fa4570915c6be36f57f2c9/artifacts/checkpoints/latest.pt"
checkpoint = torch.load(path, map_location=map_location)
model.load_state_dict(checkpoint['model_state_dict'])
for i, opt in enumerate(optimizers):
    opt.load_state_dict(checkpoint[f'optimizer_{i}_state_dict'])
    checkpoint['epoch']


## Evaluate the model
from train import evaluate
from pydreamer.data import DataSequential, MlflowEpisodeRepository
from torch.utils.data import DataLoader
from pydreamer.preprocessing import Preprocessor, WorkerInfoPreprocess

preprocess = Preprocessor(image_categorical=conf.image_channels if conf.image_categorical else None,
                            image_key=conf.image_key,
                            map_categorical=conf.map_channels if conf.map_categorical else None,
                            map_key=conf.map_key,
                            action_dim=conf.action_dim,
                            clip_rewards=conf.clip_rewards,
                            amp=conf.amp and device.type == 'cuda')


device = torch.device(conf.device)
steps = 10
model.to(device)#Important

# To be replaced with your state
with open('states/in_state_alien_new10.pkl','rb') as f:
    in_state = pickle.load(f)
with open('states/obs_alien_new10.pkl','rb') as f:
    obs = pickle.load(f)
print(obs['action'].shape)
for key in obs:
    obs[key] = obs[key].to(device)
#TODO in state are being resaved in the loop, get one clean

## IMPORTANT : The following commented cells have not been retester and might need some adjustments to work


  from .autonotebook import tqdm as notebook_tqdm


Namespace(action_dim=18, actor_dist='onehot', actor_grad='reinforce', adam_eps=1e-05, adam_lr=0.0003, adam_lr_actor=0.0001, adam_lr_critic=0.0001, allow_mid_reset=True, amp=False, aux_critic=False, aux_critic_weight=1.0, batch_length=48, batch_size=32, buffer_size=10000000, buffer_size_offline=0, clip_rewards='tanh', cnn_depth=48, data_workers=4, deter_dim=1024, device='cuda:0', enable_profiler=False, entropy=0.001, env_action_repeat=4, env_id='Atari-Alien-V5', env_id_eval=None, env_no_terminal=False, env_time_limit=27000, eval_batch_size=32, eval_batches=61, eval_interval=2000, eval_samples=1, eval_save_size=1, gamma=0.99, gamma_aux=0.99, generator_prefill_policy='random', generator_prefill_steps=50000, generator_workers=1, generator_workers_eval=0, generator_workers_train=0, goals_size=0, grad_clip=200, grad_clip_ac=200, gru_layers=1, gru_type='gru', hidden_dim=1000, imag_horizon=15, image_categorical=False, image_channels=3, image_decoder='cnn', image_decoder_layers=0, image_decoder

In [3]:
from PIL import Image
from tqdm import tqdm


in_state_new = (in_state[0].to(device),in_state[1].to(device))

#Forward is just used when the model is translating the world into features for the policy to take a decision
# features, out_state = model.wm.forward(obs,in_state_new)
#Training step does all evem the image prediction

# loss, features, states, out_state, metrics, tensors = model.wm.training_step(obs,in_state_new,do_image_pred=True)

def tensor_to_image(tensors,image_num=0,dream_num=0):
    image_cpu = tensors['image_pred'].cpu().numpy()
    image = image_cpu[image_num,dream_num,:,:,:].transpose(1,2,0)
    image_final = ((image + 0.5) * 255.0).clip(0, 255).astype('uint8')
    img_rgb = image_final#[...,::-1]
    del image_cpu
    return img_rgb

def save_avg(obs,in_state_new,img_shape,save_path="images/avg",image_num=2,dream_num=2,l=1000):
    # fig, axs = plt.subplots(1, l,figsize=(20,80))
    avg = np.zeros(img_shape)
    for i in tqdm(range(l)):
        with torch.no_grad():#imag_horizon=15,
            _, _, _, tensors, dream_tensors = model.training_step(obs,in_state_new,imag_horizon=15,do_image_pred=True,do_dream_tensors=True)#Changed to model.training_step
        img_rgb = tensor_to_image(dream_tensors,image_num=image_num,dream_num=dream_num)#dream tensors or tensors ?
        avg += img_rgb
        del tensors
        del dream_tensors
    avg = avg/l
    im = Image.fromarray((avg).astype(np.uint8))
    im.save(f"{save_path}-{l}.png")
    return avg

# im = Image.fromarray(img_rgb)
# im.save("predicted_dream_alien.jpeg")

In [21]:
# with open('features_dream2/feature_alien_new8.pkl','rb') as f:
with open('features_dream2/feature_alien_new50.pkl','rb') as f:
    features_dream = pickle.load(f)
#test directed decoding with the world model
print(features_dream.shape)
with torch.no_grad():
    out = model.wm.decoder.image.forward(features_dream)
print(out.shape)
# show image
img = out[2,2,:,:,:].cpu().numpy().transpose(1,2,0)
img = ((img + 0.5) * 255.0).clip(0, 255).astype('uint8')
im = Image.fromarray(img)
im.save("dream_from_features_new_2.png")

torch.Size([48, 32, 2048])
torch.Size([48, 32, 3, 64, 64])


In [5]:
# #drop each latent variable to zero and see the image prediction
# with torch.no_grad():#imag_horizon=15,
#     losses, new_state, loss_metrics, tensors, dream_tensors = model.training_step(obs,in_state_new,imag_horizon=15,do_image_pred=True,do_dream_tensors=True)#Changed to model.training_step

# #pick image and dream

# origin_prediction = tensor_to_image(tensors,image_num=2,dream_num=2)
# print(tensors.keys())
# del tensors

# print(origin_prediction.shape)


# l = 10
# fig, axs = plt.subplots(1, l,figsize=(20,80))
# for i in range(l):
#     # instate_copy = in_state_new[1].clone()
#     # instate_copy[:,i] = 0
#     # in_state_new =(in_state_new[0],instate_copy)
#     with torch.no_grad():#imag_horizon=15,
#         losses, new_state, loss_metrics, tensors, dream_tensors = model.training_step(obs,in_state_new,imag_horizon=15,do_image_pred=True,do_dream_tensors=True)#Changed to model.training_step
#     img_rgb = tensor_to_image(tensors,image_num=2,dream_num=2)
#     del tensors
#     axs[i].imshow(img_rgb)




In [32]:
#Compare 2 images
in_state_new = (in_state[0].to(device),in_state[1].to(device))

with torch.no_grad():#imag_horizon=15,
    losses, new_state, loss_metrics, tensors, dream_tensors = model.training_step(obs,in_state_new,imag_horizon=15,do_image_pred=True,do_dream_tensors=True)#Changed to model.training_step

#pick image and dream
origin_prediction = tensor_to_image(tensors,image_num=2,dream_num=2)
del tensors

#now we compare the difference when modyfing h
avg1 = save_avg(obs,in_state_new,origin_prediction.shape,save_path="images/avg_new_unmodif",image_num=2,dream_num=2,l=3)
#modify the in state and see the effect
a = in_state_new[1].clone()#set everything to zero in h
print(a.shape)
a[:,0:8] = 1

b = in_state_new[0].clone()
in_state_new = (b,a)

avg2 = save_avg(obs,in_state_new,origin_prediction.shape,save_path="images/avg_new_modif_one_8",image_num=2,dream_num=2,l=3)

diff = np.sum(np.abs(avg1 - avg2)*3,axis=2)
diff = diff.astype(np.uint8)
im = Image.fromarray(diff)
im.save("images/diff-same_state_ones_8.png")



#overlap the two images avg1 and diff, appering in red

overlap = avg1.copy()

print(overlap.shape)
overlap[:,:,0] = overlap[:,:,0] + diff
im = Image.fromarray(overlap.astype(np.uint8))
im.save("images/overlap_8.png")


100%|██████████| 3/3 [00:01<00:00,  2.39it/s]


torch.Size([32, 1024])


100%|██████████| 3/3 [00:01<00:00,  2.44it/s]

(64, 64, 3)





In [7]:
# # Compare the difference when modifying the latent variables
# in_state_new = (in_state[0].to(device),in_state[1].to(device))

# with torch.no_grad():#imag_horizon=15,
#     losses, new_state, loss_metrics, tensors, dream_tensors = model.training_step(obs,in_state_new,imag_horizon=15,do_image_pred=True,do_dream_tensors=True)#Changed to model.training_step

# #pick image and dream
# origin_prediction = tensor_to_image(tensors,image_num=2,dream_num=2)
# del tensors

# #now we compare the difference when modyfing h
# avg1 = save_avg(obs,in_state_new,origin_prediction.shape,save_path="images/avg_new_unmodif",image_num=2,dream_num=2,l=100)
# copy = (in_state_new[0].clone(),in_state_new[1].clone())
# #modify the in state and see the effect
# a = torch.zeros_like(in_state_new[1])#set everything to zero in h
# b = in_state_new[0].clone()
# in_state_new = (b,a)

# avg2 = save_avg(obs,in_state_new,origin_prediction.shape,save_path="images/avg_new_modif",image_num=2,dream_num=2,l=100)

# diff = np.sum(np.abs(avg1 - avg2),axis=2)
# diff = diff.astype(np.uint8)
# im = Image.fromarray(diff)
# im.save("images/diff-dif_state.jpeg")


In [None]:
# We could imagine trying to do state interpolation or do advanced neural network interpretation Like LRP.