In [1]:
import torch
import torch.nn as nn
import rkgb
from rkgb.core.partitioned import PartitionerSequence, PartitionerBottomToTop
import rockmate
from rockmate import Rockmate
from rockmate.solvers import HILP, RK_rotor
from rockmate.solvers.main import add_sched


In [30]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
device='cpu'

model = torch.nn.Transformer(num_encoder_layers=1,num_decoder_layers=1)
model.to(device)

batchsize = 8
sample = [
    torch.rand((200, batchsize, 512),device=device),
    torch.rand((200, batchsize, 512),device=device),
]



cuda




In [31]:
# Partitioner to get a sequence of blocks
sub_partitioner = PartitionerBottomToTop()
sub_partitioner.config.max_estimate_for_main_graph=10**7
sub_partitioner.config.max_estimate_per_sub_graph=10**6

partitioner = PartitionerSequence(sub_partitioner=sub_partitioner)

In [32]:
# Build graphs based on the partitioner
rkgb_res = rkgb.rkgb.Result(
                    model,
                    model_args=sample,
                    # model_kwargs=model_kwargs,
                    # verbose=verbose,
                    # wanted_graphs={"FB"},
                    partitioners=[partitioner],
                    inspection_device=torch.device("cuda"),
                    # print_time_in_each_stage=True
                )



In [33]:
for hcn in rkgb_res.hierarchical_cluster.partitionings[0].list_HCNs:
    print(hcn.sub_cluster)
    try:
        print(hcn.sub_cluster.list_schedules)
    except:
        print('None cluster!!!!!!!\n')

BottomHCluster(__3_addmm)
[]
BottomHCluster(__9_clone)
[]
H_Cluster_2_Ano_id_2
[]
BottomHCluster(__41_clone_1)
[]
BottomHCluster(__44_addmm_1)
[]
BottomHCluster(__47_getitem_2)
[]
BottomHCluster(__48_add)
[]
BottomHCluster(__50_getitem_4)
[]
H_Cluster_3_Ano_id_3
[]
BottomHCluster(__66_getitem_11)
[]
BottomHCluster(__68_getitem_14)
[]
H_Cluster_4_Ano_id_4
[]
BottomHCluster(__177_getitem_32)
[]
H_Cluster_5_Ano_id_3
None
BottomHCluster(__193_getitem_39)
[]
BottomHCluster(__195_getitem_42)
[]
None
None cluster!!!!!!!

BottomHCluster(__195_getitem_42)
[]
BottomHCluster(__193_getitem_39)
[]
H_Cluster_5_Ano_id_3
None
BottomHCluster(__177_getitem_32)
[]
H_Cluster_4_Ano_id_4
[]
BottomHCluster(__68_getitem_14)
[]
BottomHCluster(__66_getitem_11)
[]
H_Cluster_3_Ano_id_3
[]
BottomHCluster(__50_getitem_4)
[]
BottomHCluster(__48_add)
[]
BottomHCluster(__47_getitem_2)
[]
BottomHCluster(__44_addmm_1)
[]
BottomHCluster(__41_clone_1)
[]
H_Cluster_2_Ano_id_2
[]
BottomHCluster(__9_clone)
[]
BottomHCluster(

In [34]:
rkmod = Rockmate(
        model,
        sample,
        budget=5*10**7,
        # list_solvers=[],
        rkgb_res=rkgb_res,
        solve_sched=False,
        # verbose=False,
        # ilp_solver="PULP_CBC_CMD",
        # ilp_time_limit=1 * 60 // 360,
        # ilp_time_limit_top=10 * 60,
        # model_kwargs=None,
        # partitioners=None,
        # max_size_S_graph_for_no_partitioning=40,
        # cpu_optim = torch.optim.Adam,
        # gpu_optim = torch.optim.Adam,
        # optim_kwargs = {},
        # minor_param_size = 10*1024**2,
    )

Using PULP_CBC_CMD to solve ILP


In [35]:
for hcn in rkgb_res.hierarchical_cluster.partitionings[0].list_HCNs:
    print(hcn.sub_cluster)
    try:
        print(hcn.sub_cluster.list_schedules)
    except:
        print('None cluster!!!!!!!\n')

BottomHCluster(__3_addmm)
[]
BottomHCluster(__9_clone)
[]
H_Cluster_2_Ano_id_2
[]
BottomHCluster(__41_clone_1)
[]
BottomHCluster(__44_addmm_1)
[]
BottomHCluster(__47_getitem_2)
[]
BottomHCluster(__48_add)
[]
BottomHCluster(__50_getitem_4)
[]
H_Cluster_3_Ano_id_3
[]
BottomHCluster(__66_getitem_11)
[]
BottomHCluster(__68_getitem_14)
[]
H_Cluster_4_Ano_id_4
[]
BottomHCluster(__177_getitem_32)
[]
H_Cluster_5_Ano_id_3
None
BottomHCluster(__193_getitem_39)
[]
BottomHCluster(__195_getitem_42)
[]
None
None cluster!!!!!!!

BottomHCluster(__195_getitem_42)
[]
BottomHCluster(__193_getitem_39)
[]
H_Cluster_5_Ano_id_3
None
BottomHCluster(__177_getitem_32)
[]
H_Cluster_4_Ano_id_4
[]
BottomHCluster(__68_getitem_14)
[]
BottomHCluster(__66_getitem_11)
[]
H_Cluster_3_Ano_id_3
[]
BottomHCluster(__50_getitem_4)
[]
BottomHCluster(__48_add)
[]
BottomHCluster(__47_getitem_2)
[]
BottomHCluster(__44_addmm_1)
[]
BottomHCluster(__41_clone_1)
[]
H_Cluster_2_Ano_id_2
[]
BottomHCluster(__9_clone)
[]
BottomHCluster(

In [36]:
rkmod.preprocess()

In [37]:
for hcn in rkgb_res.hierarchical_cluster.partitionings[0].list_HCNs:
    print(hcn.sub_cluster)
    if hcn.sub_cluster is None:
        print('None cluster!!!!!!!\n')
    elif hcn.sub_cluster.list_schedules is None:
        print('None schedule!!!!!!!\n')
    else:
        print(hcn.sub_cluster.list_schedules, '\n')


BottomHCluster(__3_addmm)
[Op_sched takes 0.72 ms with 18.75048828125 MiB peak mem, Op_sched takes 1.13 ms with 18.75048828125 MiB peak mem] 

BottomHCluster(__9_clone)
[Op_sched takes 0.30 ms with 0.0 MiB peak mem, Op_sched takes 0.43 ms with 0.0 MiB peak mem] 

H_Cluster_2_Ano_id_2
[Op_sched takes 2.25 ms with 50.87890625 MiB peak mem, Op_sched takes 3.21 ms with 50.87890625 MiB peak mem] 

BottomHCluster(__41_clone_1)
[Op_sched takes 0.19 ms with 0.0 MiB peak mem, Op_sched takes 0.28 ms with 0.0 MiB peak mem] 

BottomHCluster(__44_addmm_1)
[Op_sched takes 0.44 ms with 6.25048828125 MiB peak mem, Op_sched takes 0.63 ms with 6.25048828125 MiB peak mem] 

BottomHCluster(__47_getitem_2)
[Op_sched takes 0.16 ms with 1.5625 MiB peak mem, Op_sched takes 0.23 ms with 1.5625 MiB peak mem] 

BottomHCluster(__48_add)
[Op_sched takes 0.15 ms with 0.0 MiB peak mem, Op_sched takes 0.20 ms with 0.0 MiB peak mem] 

BottomHCluster(__50_getitem_4)
[Op_sched takes 0.22 ms with 0.025390625 MiB peak mem

In [18]:
# rkmod.preprocess()

for hcn in rkmod.rkgb_res.hierarchical_cluster.partitionings[0].list_HCNs:
    print(hcn.sub_cluster)

    if hcn.sub_cluster is None: continue
    if not hcn.is_fwd: continue
    if not hcn.sub_cluster.name.startswith('H'): continue
    
    solver = HILP(ilp_solver="PULP_CBC_CMD")
    solver.config.optimize_metrics = {"minor_param_size": 10*1024**2}

    list_sched = solver(hcn.sub_cluster)
    if hcn.sub_cluster.list_schedules is None:
        hcn.sub_cluster.list_schedules = []
    for sched in list_sched:
        add_sched(hcn.sub_cluster, sched)


BottomHCluster(__3_addmm)
BottomHCluster(__9_clone)
H_Cluster_2_Ano_id_2
Using PULP_CBC_CMD to solve ILP
solving H_Cluster_2_Ano_id_2
BottomHCluster(__41_clone_1)
BottomHCluster(__44_addmm_1)
BottomHCluster(__47_getitem_2)
BottomHCluster(__48_add)
BottomHCluster(__50_getitem_4)
H_Cluster_3_Ano_id_3
Using PULP_CBC_CMD to solve ILP
solving H_Cluster_3_Ano_id_3
BottomHCluster(__66_getitem_11)
BottomHCluster(__68_getitem_14)
H_Cluster_4_Ano_id_4
Using PULP_CBC_CMD to solve ILP
solving H_Cluster_4_Ano_id_4
BottomHCluster(__177_getitem_32)
H_Cluster_5_Ano_id_3
Using PULP_CBC_CMD to solve ILP
solving H_Cluster_5_Ano_id_3
BottomHCluster(__193_getitem_39)
BottomHCluster(__195_getitem_42)
None
BottomHCluster(__195_getitem_42)
BottomHCluster(__193_getitem_39)
H_Cluster_5_Ano_id_3
BottomHCluster(__177_getitem_32)
H_Cluster_4_Ano_id_4
BottomHCluster(__68_getitem_14)
BottomHCluster(__66_getitem_11)
H_Cluster_3_Ano_id_3
BottomHCluster(__50_getitem_4)
BottomHCluster(__48_add)
BottomHCluster(__47_getit

In [38]:
for hcn in rkgb_res.hierarchical_cluster.partitionings[0].list_HCNs:
    print(hcn.sub_cluster)
    if hcn.sub_cluster is None:
        print('None cluster!!!!!!!\n')
    elif hcn.sub_cluster.list_schedules is None:
        print('None schedule!!!!!!!\n')
    else:
        print(hcn.sub_cluster.list_schedules, '\n')

BottomHCluster(__3_addmm)
[Op_sched takes 0.72 ms with 18.75048828125 MiB peak mem, Op_sched takes 1.13 ms with 18.75048828125 MiB peak mem] 

BottomHCluster(__9_clone)
[Op_sched takes 0.30 ms with 0.0 MiB peak mem, Op_sched takes 0.43 ms with 0.0 MiB peak mem] 

H_Cluster_2_Ano_id_2
[Op_sched takes 2.25 ms with 50.87890625 MiB peak mem, Op_sched takes 3.21 ms with 50.87890625 MiB peak mem] 

BottomHCluster(__41_clone_1)
[Op_sched takes 0.19 ms with 0.0 MiB peak mem, Op_sched takes 0.28 ms with 0.0 MiB peak mem] 

BottomHCluster(__44_addmm_1)
[Op_sched takes 0.44 ms with 6.25048828125 MiB peak mem, Op_sched takes 0.63 ms with 6.25048828125 MiB peak mem] 

BottomHCluster(__47_getitem_2)
[Op_sched takes 0.16 ms with 1.5625 MiB peak mem, Op_sched takes 0.23 ms with 1.5625 MiB peak mem] 

BottomHCluster(__48_add)
[Op_sched takes 0.15 ms with 0.0 MiB peak mem, Op_sched takes 0.20 ms with 0.0 MiB peak mem] 

BottomHCluster(__50_getitem_4)
[Op_sched takes 0.22 ms with 0.025390625 MiB peak mem

In [42]:
# rkmod.solve_sched(list_solvers=[RK_rotor()], recursive=False)

solver = RK_rotor()
solver.solve(rkmod.rkgb_res.hierarchical_cluster, [5*10**7])

[]

In [43]:
rkmod.op_sched, rkmod.list_solutions

(None, [])

In [45]:
hcluster = rkmod.rkgb_res.hierarchical_cluster
hcluster.name

'H_Cluster_1_Ano_id_1'

In [21]:
rkmod.get_compiled_fct()


AttributeError: 'NoneType' object has no attribute 'simulate_update'