This notebook measures MACs (number of mulitply-accumulate operation) and parameter sizes of ARP models and Diffusion Policy, ACT, and RVT-2. The MACs and parameter sizes are measured using the [thop](https://pypi.org/project/thop/) library.

[thop](https://pypi.org/project/thop/) only measures the parameters and computations that are necessary for inference (this is good). For example, it will not count the VAE encoder for ACT. 

In [1]:
import torch
import sys
import hydra
import arp
from thop import profile, clever_format
from arp import AutoRegressivePolicy, TokenType, LayerType, ModelConfig
from argparse import Namespace
from as_list import as_list
from omegaconf import OmegaConf

def run_profile(mod, inputs):
    inputs = as_list(inputs)
    macs, params = profile(mod, inputs)
    return clever_format([macs, params], "%.3f")

root = %pwd
root

'/common/users/xz653/Workspace/iclr2025/release'

First, download `datasets/input_captured/{pusht, aloha, rlb}` from this box folder https://rutgers.box.com/s/uzozemx67kje58ycy3lyzf1zgddz8tyq and put them to `pusht/outputs/input_captured/`,  `aloha/outputs/input_captured/` and  `rlb/outputs/input_captured/` folders, correspondingly.

They are input data captured from the evaluation of these environments. In doing so, we save the trouble of running the evaluation code. 

# Push-T

In [6]:
%cd {root}/pusht

/common/users/xz653/Workspace/iclr2025/release/pusht


In [7]:
diff_policy_cfg = OmegaConf.load(f'./diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.single.yaml')
arp_cfg = OmegaConf.load(f'./configs/arp.yaml')

In [8]:
diff_policy = hydra.utils.instantiate(diff_policy_cfg.policy)
arp_policy = hydra.utils.instantiate(arp_cfg.policy)

from diffusion_policy.dataset.base_dataset import BaseImageDataset

dataset: BaseImageDataset = hydra.utils.instantiate(arp_cfg.task.dataset)
diff_policy.set_normalizer(dataset.get_normalizer())



using obs modality: low_dim with keys: ['agent_pos']
using obs modality: rgb with keys: ['image']
using obs modality: depth with keys: []
using obs modality: scan with keys: []






using obs modality: rgb with keys: ['image']
using obs modality: depth with keys: []
using obs modality: scan with keys: []
using obs modality: low_dim with keys: []


In [9]:
obs_dict = torch.load('outputs/input_captured/pusht/obs_dict.pth', map_location='cpu')
batch = torch.load('outputs/input_captured/pusht/batch.pth', map_location='cpu')
diff_policy.eval();

In [10]:
obs_dict = {k: v[:1] for k, v in obs_dict.items()}

In [11]:
diff_policy.forward = diff_policy.predict_action
run_profile(diff_policy, [obs_dict]) # MACs, params

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.


('6.751G', '27.132M')

In [12]:
arp_policy.forward = arp_policy.predict_action
run_profile(arp_policy, [obs_dict]) # MACs, params

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.


('2.653G', '25.494M')

# ALOHA

In [13]:
%cd {root}/aloha

/common/users/xz653/Workspace/iclr2025/release/aloha


In [14]:
from lerobot.common.policies.autoregressive_policy.modeling import AutoregressiveModel 
from lerobot.common.policies.autoregressive_policy.configuration import ARPConfig
from lerobot.common.policies.factory import _policy_cfg_from_hydra_cfg

from lerobot.common.policies.act.modeling_act import ACT
from lerobot.common.policies.act.configuration_act import ACTConfig

In [15]:
arp_cfg = _policy_cfg_from_hydra_cfg(ARPConfig, OmegaConf.load(f'configs/arp.yaml'))
arp_model = AutoregressiveModel(arp_cfg)
arp_model.eval();





In [16]:
act_cfg = _policy_cfg_from_hydra_cfg(ACTConfig, OmegaConf.load(f'lerobot/configs/policy/act.single.yaml'))
act_model = ACT(act_cfg)
act_model.eval();

In [17]:
batch = torch.load('outputs/input_captured/aloha/batch.pth', map_location='cpu')
batch = {k:v[:1] for k, v in batch.items()}

In [18]:
run_profile(act_model, batch)

[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.


('17.799G', '50.905M')

In [19]:
run_profile(arp_model, batch)

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.


('17.789G', '47.559M')

# RLBench

In [2]:
%cd {root}/rlb

/common/users/xz653/Workspace/iclr2025/release/rlb


In [3]:
from importlib import import_module
rlb_device = 0

In [4]:
rvt2_cfg = OmegaConf.load(f'./configs/rvt2.yaml')
arp_cfg = OmegaConf.load(f'./configs/arp.yaml')
arp_plus_cfg = OmegaConf.load(f'./configs/arp_plus.yaml')

In [5]:
observation = torch.load('outputs/input_captured/rlb/observation.pth', map_location=torch.device(rlb_device))

In [10]:
def load_rlb_model(cfg):
    mod = import_module(cfg.py_module)
    model = mod.Policy(mod.PolicyNetwork(cfg.model.hp, cfg.env, 
                                            render_device=f"cuda:{rlb_device}").to(rlb_device), 
                        cfg.model.hp)
    return model

In [11]:
rvt_model = load_rlb_model(rvt2_cfg)
arp_model = load_rlb_model(arp_cfg)
arp_plus_model = load_rlb_model(arp_plus_cfg)

MVT Vars: {'training': True, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_non_persistent_buffers_set': set(), '_backward_pre_hooks': OrderedDict(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_hooks_with_kwargs': OrderedDict(), '_forward_hooks_always_called': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_forward_pre_hooks_with_kwargs': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict(), 'depth': 8, 'img_feat_dim': 3, 'img_size': 224, 'add_proprio': True, 'proprio_dim': 3, 'add_lang': True, 'lang_dim': 512, 'lang_len': 77, 'im_channels': 64, 'img_patch_size': 14, 'attn_dropout': 0.1, 'add_corr': True, 'add_pixel_loc': True, 'add_depth': True, 'pe_fix': True, 'attn_dim': 512, 'attn_heads': 8, 'attn_dim_head': 64, 'use_xformers': True, 'feat_dim': 22

In [12]:
rvt_model.forward = rvt_model.act
rvt_model.add_lang = False
rvt_model._device = rlb_device
run_profile(rvt_model, [0, observation])

[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm1d'>.


('57.100G', '72.091M')

In [13]:
run_profile(arp_model._network, [observation])

[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.


('56.191G', '71.914M')

In [14]:
run_profile(arp_plus_model._network, [observation])

[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.normalization.LayerNorm'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.


('57.362G', '73.813M')

: 