In [1]:
import sys
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import os
import wandb
from transformers import ViTMAEConfig

from custom_models.CustomViT import CustomViT
from custom_models.CustomViTMAE import CustomViTMAE
from transformers.models.vit_mae.modeling_vit_mae import ViTMAEModel
# from tem_dataloader import MultimodalDatasetPerTrajectory
from torch.utils.data import DataLoader

from d3rlpy.algos import CQL
from d3rlpy.dataset import Episode, MDPDataset, Transition
wandb.login() 

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myadix[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
from custom_models.CustomViT import CustomViT
from custom_models.CustomViTMAE import CustomViTMAE
import torch
# call CustomViT
from transformers import AutoImageProcessor, ViTMAEForPreTraining, ViTMAEConfig
from PIL import Image

# output_dir='/home/tyz/Desktop/11_777/camelmera/weights'
trained_model_name = 'multimodal'
output_dir='/home/ubuntu/weights/' + trained_model_name

# Initialize a new CustomViT model
model_name = "facebook/vit-mae-base"
vit_config = ViTMAEConfig.from_pretrained(model_name)
vit_config.output_hidden_states=True
vit_model = CustomViT(config=vit_config)

# Initialize a new CustomViTMAE model
model_name = "facebook/vit-mae-base"
config = ViTMAEConfig.from_pretrained(model_name)
config.output_hidden_states=True
custom_model = CustomViTMAE(config=config)
custom_model.vit = vit_model

# Load the state_dict from the saved model
state_dict = torch.load(f"{output_dir}/pytorch_model.bin")
custom_model.load_state_dict(state_dict)

# don't need decoders
vit_encoder = custom_model.vit

In [None]:
ffmpeg -i C:/Users/szxkd/Documents/Zoom/save/video3487945569.mp4 -filter:v "setpts=PTS/1.25" -filter:a "atempo=1.25" C:/Users/szxkd/Documents/Zoom/save/final_part.mp4

In [3]:
from tem_dataloader import MultimodalDatasetPerTrajectory
# environment_name = 'AmericanDinerExposure'
# environemnt_directory = f'/media/tyz/3B6FFE7354FF3296/11_777/tartanairv2filtered/{environment_name}/Data_easy'
# my_dataset = MultimodalDatasetPerTrajectory(environemnt_directory)
environment_name = 'AbandonedCableExposure'
environemnt_directory = f'/mnt/data/tartanairv2filtered/{environment_name}/Data_easy'
OBSERVATION_SIZE = 768
ACTION_SIZE = 7
BATCH_SIZE = 64

for folder in os.listdir(environemnt_directory):
    trajectory_folder_path = os.path.join(environemnt_directory, folder)
    if not os.path.isdir(trajectory_folder_path):
        continue
    my_dataset = MultimodalDatasetPerTrajectory(trajectory_folder_path)
    train_dataloader = DataLoader(my_dataset, batch_size=BATCH_SIZE, shuffle=False)
    break

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Processing folder: /mnt/data/tartanairv2filtered/AbandonedCableExposure/Data_easy/P001
Number of images: 1183
Number of depth: 1183
Number of lidar: 1183
Number of pose: 1183


In [4]:
import numpy as np

def reward_function(state_embedding, goal_embedding, threshold=0.05, goal_reward=100):
    distance = np.linalg.norm(state_embedding - goal_embedding)

    if distance <= threshold:
        # Give a large positive reward when the goal is reached
        reward = goal_reward
    else:
        # Give a negative reward proportional to the distance otherwise
        reward = -distance

    return reward

In [5]:
import functools
import numpy as np
import torch

# Initialize empty arrays for observations, actions, rewards, and terminals
all_observations = np.empty((0, OBSERVATION_SIZE))
all_actions = np.empty((0, ACTION_SIZE))
all_rewards = np.empty(0)
all_terminals = np.empty(0, dtype=bool)

for batch_idx, data in enumerate(train_dataloader):
    # get embedding
    vit_encoder.cuda()
    vit_encoder.eval()
    pixel_values = data["pixel_values"].cuda()
    pixel_values1 = data["pixel_values1"].cuda()
    pixel_values2 = data["pixel_values2"].cuda()
    outputs = vit_encoder(pixel_values,pixel_values1,pixel_values2,noise=None)
    embedding = outputs.last_hidden_state[:,0,:]
    observation = embedding.cpu().detach().numpy()
    # get action
    pose = data["pose_values"]
    action = torch.diff(pose,axis = 0).numpy()
    action = np.concatenate((action, np.zeros((1,7))), axis=0)
    # get reward
    goal = observation[-1]
    partial_function = functools.partial(reward_function, goal_embedding=goal)
    reward = np.apply_along_axis(partial_function, 1, observation)
    # get terminals
    terminals = [False]*BATCH_SIZE
    terminals[-1]=True
    terminals = np.array(terminals)
    if batch_idx==0:
        print(observation.shape)
        print(action.shape)
        print(reward.shape)
        print(terminals.shape)

    # Concatenate observations, actions, rewards, and terminals
    all_observations = np.vstack((all_observations, observation))
    all_actions = np.vstack((all_actions, action))
    all_rewards = np.hstack((all_rewards, reward))
    all_terminals = np.hstack((all_terminals, terminals))

(64, 768)
(64, 7)
(64,)
(64,)


KeyboardInterrupt: 

In [6]:
print("All observations shape:", all_observations.shape)
print("All actions shape:", all_actions.shape)
print("All rewards shape:", all_rewards.shape)
print("All terminals shape:", all_terminals.shape)

All observations shape: (448, 768)
All actions shape: (448, 7)
All rewards shape: (448,)
All terminals shape: (448,)


In [None]:
np.save('all_observations.npy', all_observations)
np.save('all_actions.npy', all_actions)
np.save('all_rewards.npy', all_rewards)
np.save('all_terminals.npy', all_terminals)

In [7]:
'''
Args:
        observations (numpy.ndarray): N-D array. If the
            observation is a vector, the shape should be
            `(N, dim_observation)`. If the observations is an image, the shape
            should be `(N, C, H, W)`.
        actions (numpy.ndarray): N-D array. If the actions-space is
            continuous, the shape should be `(N, dim_action)`. If the
            action-space is discrete, the shape should be `(N,)`.
        rewards (numpy.ndarray): array of scalar rewards. The reward function
            should be defined as :math:`r_t = r(s_t, a_t)`.
        terminals (numpy.ndarray): array of binary terminal flags.
        episode_terminals (numpy.ndarray): array of binary episode terminal
            flags. The given data will be splitted based on this flag.
            This is useful if you want to specify the non-environment
            terminations (e.g. timeout). If ``None``, the episode terminations
            match the environment terminations.
        discrete_action (bool): flag to use the given actions as discrete
            action-space actions. If ``None``, the action type is automatically
            determined.
    '''
cql_dataset = MDPDataset(observations=all_observations,/
                         actions=all_actions,/
                         rewards=all_rewards,/
                         terminals=all_terminals,/
                         episode_terminals=all_terminals)

In [8]:
from d3rlpy.algos import CQL

# setup CQL algorithm
cql = CQL(use_gpu=False)

# split train and test episodes
# train_episodes, test_episodes = train_test_split(cql_dataset, test_size=0.25)

# start training
cql.fit(cql_dataset,
        eval_episodes=None,
        n_epochs=1,
        scorers=None)

[2m2023-05-02 16:34:26[0m [[32m[1mdebug    [0m] [1mRoundIterator is selected.[0m
[2m2023-05-02 16:34:26[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/CQL_20230502163426[0m
[2m2023-05-02 16:34:26[0m [[32m[1mdebug    [0m] [1mBuilding models...[0m
[2m2023-05-02 16:34:26[0m [[32m[1mdebug    [0m] [1mModels have been built.[0m
[2m2023-05-02 16:34:26[0m [[32m[1minfo     [0m] [1mParameters are saved to d3rlpy_logs/CQL_20230502163426/params.json[0m [36mparams[0m=[35m{'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0001, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_learning_rate': 0.0001, 'alpha_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'alpha_threshold': 10.0, 'bat

Epoch 1/1:   0%|          | 0/1 [00:00<?, ?it/s]

[2m2023-05-02 16:34:28[0m [[32m[1minfo     [0m] [1mCQL_20230502163426: epoch=1 step=1[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008671283721923828, 'time_algorithm_update': 1.88875150680542, 'temp_loss': 11.679020881652832, 'temp': 0.9998999834060669, 'alpha_loss': -31.257404327392578, 'alpha': 1.000100016593933, 'critic_loss': 506.60784912109375, 'actor_loss': -5.022002220153809, 'time_step': 1.890986680984497}[0m [36mstep[0m=[35m1[0m
[2m2023-05-02 16:34:28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/CQL_20230502163426/model_1.pt[0m


[(1,
  {'time_sample_batch': 0.0008671283721923828,
   'time_algorithm_update': 1.88875150680542,
   'temp_loss': 11.679020881652832,
   'temp': 0.9998999834060669,
   'alpha_loss': -31.257404327392578,
   'alpha': 1.000100016593933,
   'critic_loss': 506.60784912109375,
   'actor_loss': -5.022002220153809,
   'time_step': 1.890986680984497})]