In [None]:
%reload_ext autoreload
%autoreload 2

import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, Adagrad
from torch_geometric.utils import degree
import pickle
from torch import optim

# For visualization
# from experiments.cross_design.utils import *

# PyTorch data loader
# from torch.utils.data import DataLoader

# PyTorch geometric data loader
from torch_geometric.loader import DataLoader
from pyg_dataset import pyg_dataset
import numpy as np
import os
import time
import argparse
import scipy
from tqdm import tqdm
# For Laplacian position encoding
from scipy.sparse import csgraph

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
torch.set_num_threads(4)

In [61]:
args = argparse.ArgumentParser(description = 'Supervised learning')
args.dir = './train_gnn_hetero/demand'
args.target = 'demand'
args.data_dir = '../../data/chips/clean_data/'
args.name = 'test'
args.num_epoch = 10
args.batch_size = 1
args.learning_rate = 0.001
args.seed = 123456789
args.n_layers = 3
args.hidden_dim = 20
args.test_mode = 0
args.pe = 'lap'
args.pos_dim = 10
args.virtual_node = 0
args.gnn_type = 'gcn'
args.load_global_info = 0
args.load_pd = False
args.fold = 0
args.device = 'cpu'
args.design = 19
args.pl = 0

In [23]:
import sys
from gnn_hetero import GNN
from yacs.config import CfgNode as CN

In [24]:
if args.test_mode == 0:
    log_name = args.dir + "/" + args.name + ".log"
else:
    print("Test mode")
    log_name = args.dir + "/" + args.name + ".test_mode.log"
model_name = args.dir + "/" + args.name + ".model"

In [25]:
# Fix CPU torch random seed
torch.manual_seed(args.seed)

# Fix GPU torch random seed
torch.cuda.manual_seed(args.seed)

# Fix the Numpy random seed
np.random.seed(args.seed)

# Train on CPU (hide GPU) due to memory constraints
# os.environ['CUDA_VISIBLE_DEVICES'] = ""

# device = 'cpu'
device = args.device
print(device)

cpu


In [26]:
if args.gnn_type not in ["gcn", "gat"]:
    sparse = True
else:
    sparse = False

if args.virtual_node >= 1:
    virtual_node = True
    single = False
    if args.virtual_node == 2:
        single = True
else:
    single = False
    virtual_node = False

if sparse:
    from pyg_dataset_sparse import *
    concat = True
else:
    concat = True

print(f'Create data loaders: concat {concat}, sparse {sparse}, gnntype {args.gnn_type}')
pe = args.pe
pos_dim = args.pos_dim

Create data loaders: concat True, sparse False, gnntype gcn


In [57]:
config = None
use_signnet = False
if pe == 'signnet':
    use_signnet = True
    config = CN()
    config = set_cfg_posenc(config)
    config.posenc_SignNet.model = 'DeepSet'
    config.posenc_SignNet.post_layers = 2
    config.posenc_SignNet.dim_pe = pos_dim

In [27]:
print(args.data_dir)

load_global_info = False
if args.load_global_info == 1:
    load_global_info = True

load_pd = False
if args.load_pd == 1:
    load_pd = True

../../data/chips/clean_data/


In [135]:
if pe == 'lap':
    train_dataset = pyg_dataset(design = args.design, pl = args.pl, data_dir = args.data_dir, fold_index = args.fold, split = 'train', target = args.target, load_pe = True, num_eigen = pos_dim, load_global_info = load_global_info, load_pd = load_pd, vn = virtual_node, concat = concat, net = False)
    valid_dataset = pyg_dataset(design = args.design, pl = args.pl, data_dir = args.data_dir, fold_index = args.fold, split = 'valid', target = args.target, load_pe = True, num_eigen = pos_dim, load_global_info = load_global_info, load_pd = load_pd, vn = virtual_node, concat = concat, net = False)
    test_dataset = pyg_dataset(design = args.design, pl = args.pl, data_dir = args.data_dir, fold_index = args.fold, split = 'test', target = args.target, load_pe = True, num_eigen = pos_dim, load_global_info = load_global_info, load_pd = load_pd, vn = virtual_node, concat = concat, net = False)
else:
    train_dataset = pyg_dataset(data_dir = args.data_dir, fold_index = args.fold, design = args.design, pl = args.pl, split = 'train', target = args.target, load_global_info = load_global_info, load_pd = load_pd, load_pe = False, vn=args.virtual_node, concat = concat)
    valid_dataset = pyg_dataset(data_dir = args.data_dir, fold_index = args.fold, design = args.design, pl = args.pl, split = 'valid', target = args.target, load_global_info = load_global_info, load_pd = load_pd, load_pe = False, vn = args.virtual_node, concat = concat)
    test_dataset = pyg_dataset(data_dir = args.data_dir, fold_index = args.fold, design = args.design, pl = args.pl, split = 'test', target = args.target, load_global_info = load_global_info, load_pd = load_pd, load_pe = False, vn=args.virtual_node, concat = concat)


Learning target: demand
Number of samples: 11


100%|██████████| 11/11 [00:00<00:00, 196.91it/s]


0
torch.Size([3952, 6])
torch.Size([3952, 4])
torch.Size([3952, 4])
1
torch.Size([6872, 6])
torch.Size([6872, 4])
torch.Size([6872, 4])
2
torch.Size([6913, 6])
torch.Size([6913, 4])
torch.Size([6913, 4])
3
torch.Size([7323, 6])
torch.Size([7323, 4])
torch.Size([7323, 4])
4
torch.Size([7258, 6])
torch.Size([7258, 4])
torch.Size([7258, 4])
5
torch.Size([7120, 6])
torch.Size([7120, 4])
torch.Size([7120, 4])
6
torch.Size([7879, 6])
torch.Size([7879, 4])
torch.Size([7879, 4])
7
torch.Size([7626, 6])
torch.Size([7626, 4])
torch.Size([7626, 4])
8
torch.Size([7620, 6])
torch.Size([7620, 4])
torch.Size([7620, 4])
9
torch.Size([7772, 6])
torch.Size([7772, 4])
torch.Size([7772, 4])
10
torch.Size([7814, 6])
torch.Size([7814, 4])
torch.Size([7814, 4])
Done reading data
Learning target: demand
Number of samples: 1


100%|██████████| 1/1 [00:00<00:00, 278.78it/s]


11
torch.Size([6529, 6])
torch.Size([6529, 4])
torch.Size([6529, 4])
Done reading data
Learning target: demand
Number of samples: 1


100%|██████████| 1/1 [00:00<00:00, 332.46it/s]

12
torch.Size([6548, 6])
torch.Size([6548, 4])
torch.Size([6548, 4])
Done reading data





In [137]:
train_dataset[0]

Data(x=[8434, 5], num_instances=3952, y=[3952, 1, 13], edge_index_node_net=[2, 17444], edge_index_net_node=[2, 17444], cell_degrees=[3952], net_degrees=[4482], evects=[8434, 10])

In [138]:
with open(args.data_dir + '0.eigen.10.pkl', 'rb') as f:
    d = pickle.load(f)

In [139]:
d['evects'].shape

(8434, 10)

In [140]:
with open(args.data_dir + '0.node_features.pkl', 'rb') as f:
    d = pickle.load(f)

In [141]:
d

{'num_instances': 3952,
 'num_nets': 4482,
 'x_min': 512,
 'x_max': 84096,
 'y_min': 1536,
 'y_max': 87552,
 'min_cell_width': 256,
 'max_cell_width': 397440,
 'min_cell_height': 1536,
 'max_cell_height': 503056,
 'instance_features': array([[4.96171516e-01, 5.00000000e-01, 2.30000000e+01, 4.83403158e-03,
         0.00000000e+00, 0.00000000e+00],
        [4.96171516e-01, 5.35714286e-01, 2.30000000e+01, 4.83403158e-03,
         0.00000000e+00, 6.00000000e+00],
        [5.22205207e-01, 5.00000000e-01, 2.30000000e+01, 4.83403158e-03,
         0.00000000e+00, 0.00000000e+00],
        ...,
        [2.72588055e-01, 7.50000000e-01, 3.40000000e+01, 3.22268772e-04,
         0.00000000e+00, 0.00000000e+00],
        [4.79326187e-01, 5.00000000e-01, 1.10000000e+01, 9.66806316e-04,
         0.00000000e+00, 0.00000000e+00],
        [5.54364472e-01, 5.00000000e-01, 1.10000000e+01, 9.66806316e-04,
         0.00000000e+00, 6.00000000e+00]]),
 'sample_name': '../../data/chips/NCSU-DigIC-GraphData-2023-0

In [142]:
batch_size = args.batch_size
print(batch_size)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle = False)

print('Number of training examples:', len(train_dataset))
print('Number of testing examples:', len(test_dataset))

1
Number of training examples: 11
Number of testing examples: 1


In [143]:
for batch_idx, data in enumerate(train_dataloader):
    print(batch_idx)
    print(data)
    node_dim = data.x.size(1)
    edge_dim = 1
    # if sparse:
    #     edge_dim = 1
    #     #net_dim = data.x_net.size(1)
    # else:
    #     edge_dim = data.edge_attr.size(1)
    #     #net_dim = data.x_net.size(1)
        
    if args.target == 'classify':
        num_outputs = 2#data.y.size(1)
    else:
        num_outputs = data.y.size(1)


print('Number of node features:', node_dim)
print('Number of edge features:', edge_dim)
print('Number of outputs:', num_outputs)

0
DataBatch(x=[15784, 5], num_instances=[1], y=[7626, 1, 13], edge_index_node_net=[2, 25645], edge_index_net_node=[2, 25645], cell_degrees=[7626], net_degrees=[8158], evects=[15784, 10], batch=[15784], ptr=[2])
1
DataBatch(x=[16160, 5], num_instances=[1], y=[7814, 1, 13], edge_index_node_net=[2, 26023], edge_index_net_node=[2, 26023], cell_degrees=[7814], net_degrees=[8346], evects=[16160, 10], batch=[16160], ptr=[2])
2
DataBatch(x=[15178, 5], num_instances=[1], y=[7323, 1, 13], edge_index_node_net=[2, 25033], edge_index_net_node=[2, 25033], cell_degrees=[7323], net_degrees=[7855], evects=[15178, 10], batch=[15178], ptr=[2])
3
DataBatch(x=[8434, 5], num_instances=[1], y=[3952, 1, 13], edge_index_node_net=[2, 17444], edge_index_net_node=[2, 17444], cell_degrees=[3952], net_degrees=[4482], evects=[8434, 10], batch=[8434], ptr=[2])
4
DataBatch(x=[16076, 5], num_instances=[1], y=[7772, 1, 13], edge_index_node_net=[2, 25926], edge_index_net_node=[2, 25926], cell_degrees=[7772], net_degrees=

In [144]:
if pe == 'lap':
    node_dim += pos_dim

    print('Number of eigenvectors:', pos_dim)
    print('Number of node features + position encoding:', node_dim)

Number of eigenvectors: 10
Number of node features + position encoding: 15


In [145]:
y = []
for batch_idx, data in enumerate(train_dataloader):
    y.append(data.y.detach().numpy())
y = np.concatenate(y)

y_min = np.min(y)
y_max = np.max(y)
y_mean = np.mean(y)
y_std = np.std(y)

print('y min:', y_min)
print('y max:', y_max)
print('y mean:', y_mean)
print('y std:', y_std)

y min: 0.0
y max: 34.0
y mean: 2.083835
y std: 3.6699028


In [146]:
if args.virtual_node == 1:
    virtual_node = True
else:
    virtual_node = False
gnn_type = args.gnn_type

print('GNN type:', gnn_type)
print('Virtual node:', virtual_node)

GNN type: gcn
Virtual node: False


In [147]:
if gnn_type == 'pna':
    aggregators = ['mean', 'min', 'max', 'std']
    scalers = ['identity', 'amplification', 'attenuation']

    print('Computing the in-degree histogram')
    deg = torch.zeros(10, dtype = torch.long)
    for batch_idx, data in enumerate(train_dataloader):
        d = degree(data.edge_index[1], num_nodes = data.num_nodes, dtype = torch.long)
        deg += torch.bincount(d, minlength = deg.numel())
    print('Done computing the in-degree histogram')

    model = GNN(gnn_type = gnn_type, num_tasks = num_outputs, virtual_node = virtual_node, num_layer = args.n_layers, emb_dim = args.hidden_dim,
            aggregators = aggregators, scalers = scalers, deg = deg, edge_dim = edge_dim, 
            use_signnet = use_signnet, node_dim = node_dim, cfg_posenc = config,
            device = device, single = single).to(device)
else:
    model = GNN(gnn_type = gnn_type, num_tasks = num_outputs, virtual_node = virtual_node, num_layer = args.n_layers, emb_dim = args.hidden_dim,
            use_signnet = use_signnet, node_dim = node_dim, edge_dim = edge_dim, cfg_posenc = config,
            device = device, single = single).to(device)

GNN_node(
  (node_encoder): Sequential(
    (0): Linear(in_features=15, out_features=40, bias=True)
    (1): LeakyReLU(negative_slope=0.1)
    (2): Linear(in_features=40, out_features=20, bias=True)
    (3): LeakyReLU(negative_slope=0.1)
  )
  (convs): ModuleList(
    (0): GCNConv()
    (1): GCNConv()
    (2): GCNConv()
  )
  (re_convs): ModuleList(
    (0): GCNConv()
    (1): GCNConv()
    (2): GCNConv()
  )
  (norms): ModuleList(
    (0): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
    (2): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
  )
)


In [148]:
optimizer = Adagrad(model.parameters(), lr = args.learning_rate)
num_parameters = sum(param.numel() for param in model.parameters() if param.requires_grad)
print('Number of learnable parameters:', num_parameters)
print('Done with model creation')

Number of learnable parameters: 46205
Done with model creation


In [149]:
if args.test_mode == 1:
    print("Skip the training")
    num_epoch = 0
else:
    num_epoch = args.num_epoch

In [150]:
best_mae = 1e9
patience = 300
stop = False
for epoch in range(num_epoch):
    if stop:
        break
    print('--------------------------------------')
    print('Epoch', epoch)
    # LOG.write('--------------------------------------\n')
    # LOG.write('Epoch ' + str(epoch) + '\n')

    # Training
    t = time.time()
    total_loss = 0.0
    nBatch = 0
    sum_error = 0.0
    num_samples = 0
    
    for batch_idx, data in enumerate(train_dataloader):
        data = data.to(device = device)
        if args.target == 'classify':
            target = data.y
        else:
            target = (data.y - y_mean) / y_std
        #weights = data.weights.to(device = device)

        if pe == 'lap':
            data.x = torch.cat([data.x, data.evects], dim = 1)
            #print(data.x.shape, data.evects.shape)

        if use_signnet == True:
            data.x = data.x.type(torch.FloatTensor).to(device = device)

        if gnn_type == 'pna':
            data.edge_attr = data.edge_attr.type(torch.FloatTensor).to(device = device)
        
        #print(data.x.shape)
        
        predict = model(data)
        predict = predict[: target.size(0), :]

        optimizer.zero_grad()

        # Mean squared error loss
        if args.target == 'classify':
            loss = F.nll_loss(predict, target.view(-1))
        else:
            loss = F.mse_loss(predict.view(-1), target.view(-1), reduction = 'mean')
 
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        nBatch += 1

        if args.target == 'classify':
            sum_error += loss.item()
        else:
            sum_error += torch.sum(torch.abs(predict.view(-1) - target.view(-1))).detach().cpu().numpy()
        num_samples += predict.size(0)

        if batch_idx % 10 == 0:
            print('Batch', batch_idx, '/', len(train_dataloader),': Loss =', loss.item())
            # LOG.write('Batch ' + str(batch_idx) + '/' + str(len(train_dataloader)) + ': Loss = ' + str(loss.item()) + '\n')

    train_mae = sum_error / (num_samples * num_outputs)
    avg_loss = total_loss / nBatch

    print('Train average loss:', avg_loss)
    # LOG.write('Train average loss: ' + str(avg_loss) + '\n')
    print('Train MAE:', train_mae)
    # LOG.write('Train MAE: ' + str(train_mae) + '\n')
    print('Train MAE (original scale):', train_mae * y_std)
    # LOG.write('Train MAE (original scale): ' + str(train_mae * y_std) + '\n')
    print("Train time =", "{:.5f}".format(time.time() - t))
    # LOG.write("Train time = " + "{:.5f}".format(time.time() - t) + "\n")

    # Validation
    t = time.time()
    model.eval()
    total_loss = 0.0
    nBatch = 0
 
    with torch.no_grad():
        sum_error = 0.0
        num_samples = 0
        for batch_idx, data in enumerate(valid_dataloader):
            data = data.to(device = device)
            if args.target == 'classify':
                target = data.y
            else:
                target = (data.y - y_mean) / y_std

            if pe == 'lap':
                data.x = torch.cat([data.x, data.evects], dim = 1)

            if use_signnet == True:
                data.x = data.x.type(torch.FloatTensor).to(device = device)

            if gnn_type == 'pna':
                data.edge_attr = data.edge_attr.type(torch.FloatTensor).to(device = device)

            predict = model(data)
            predict = predict[: target.size(0), :]

            # Mean squared error loss
            if args.target == 'classify':
                loss = F.nll_loss(predict, target.view(-1))
            else:
                loss = F.mse_loss(predict.view(-1), target.view(-1), reduction = 'mean')

            total_loss += loss.item()
            nBatch += 1

            if args.target == 'classify':
                sum_error += loss.item()
            else:
                sum_error += torch.sum(torch.abs(predict.view(-1) - target.view(-1))).detach().cpu().numpy()
            num_samples += predict.size(0)
             
            if batch_idx % 10 == 0:
                print('Valid Batch', batch_idx, '/', len(valid_dataloader),': Loss =', loss.item())
                # LOG.write('Valid Batch ' + str(batch_idx) + '/' + str(len(valid_dataloader)) + ': Loss = ' + str(loss.item()) + '\n')

    valid_mae = sum_error / (num_samples * num_outputs)
    avg_loss = total_loss / nBatch

    print('Valid average loss:', avg_loss)
    # LOG.write('Valid average loss: ' + str(avg_loss) + '\n')
    print('Valid MAE:', valid_mae)
    # LOG.write('Valid MAE: ' + str(valid_mae) + '\n')
    print('Valid MAE (original scale):', valid_mae * y_std)
    # LOG.write('Valid MAE (original scale): ' + str(valid_mae * y_std) + '\n')
    print("Valid time =", "{:.5f}".format(time.time() - t))
    # LOG.write("Valid time = " + "{:.5f}".format(time.time() - t) + "\n")
    
    if valid_mae < best_mae:
        best_mae = valid_mae
        patience = 300
        print('Current best MAE updated:', best_mae)
        # LOG.write('Current best MAE updated: ' + str(best_mae) + '\n')
        print('Current best MAE (original scale) updated:', best_mae * y_std)
        # LOG.write('Current best MAE (original scale) updated: ' + str(best_mae * y_std) + '\n')
        
        torch.save(model.state_dict(), model_name)
        print("Save the best model to " + model_name)
        # LOG.write("Save the best model to " + model_name + "\n")
    else:
        patience -= 1
        print(f'Patience: {patience}')
        if patience <= 0:
            stop = True

--------------------------------------
Epoch 0


  loss = F.mse_loss(predict.view(-1), target.view(-1), reduction = 'mean')


RuntimeError: The size of tensor a (6913) must match the size of tensor b (89869) at non-singleton dimension 0