L2-Regularization Method on Graph Classification Task of Proteins Dataset
--------------------------

### All libraries we need

In [1]:
import warnings
warnings.filterwarnings('ignore')


import os
import os.path as osp
import shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy
import time
import statistics as stat
import psutil
import itertools
import tracemalloc
import gc
import argparse



import torch
import torch.nn as nn
import torch.nn.functional as F

from sparse_softmax import Sparsemax
from torch.nn import Parameter
from torch_geometric.data import Data
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.pool.topk_pool import topk, filter_adj
from torch_geometric.utils import softmax, dense_to_sparse, add_remaining_self_loops
from torch_scatter import scatter_add
from torch_sparse import spspmm, coalesce
from torch.utils.data import random_split
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.nn import GCNConv

from utils import *


### Device

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Regularization Rate
#### Regularization rates range from the following numbers:

In [3]:
0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2,0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1e2, 1e3, 1e6
;

''

### Functions for Mmeasuring criterias

In [4]:
def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
    """
    calculate the total number of parameters of model
    :param count_nonzero_only: only count nonzero weights
    """
    num_counted_elements = 0
    for param in model.parameters():
        if count_nonzero_only:
            num_counted_elements += param.count_nonzero()
        else:
            num_counted_elements += param.numel()
    return num_counted_elements

# Function to get CPU usage
def get_cpu_usage():
    return psutil.cpu_percent(interval=1)



# Function to approximate power consumption (Assume some average power usage per CPU percentage point)
def estimate_power_usage(cpu_usage):
    base_power_usage = 10  # Assumed base power usage in watts
    power_per_percent = 0.5  # Assumed additional watts per CPU usage percent
    return base_power_usage + (power_per_percent * cpu_usage)

# The model size based on the number of parameters
def get_model_size_param(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
    """
    calculate the model size in bits
    :param data_width: #bits per element
    :param count_nonzero_only: only count nonzero weights
    """
    return get_num_parameters(model, count_nonzero_only) * data_width

Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB




### Setting Arguments

In [5]:
import sys
sys.argv=['']
del sys

parser = argparse.ArgumentParser()

parser.add_argument('--seed', type=int, default=777, help='random seed')
parser.add_argument('--batch_size', type=int, default=512, help='batch size')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--weight_decay', type=float, default=0.001, help='weight decay')
parser.add_argument('--nhid', type=int, default=128, help='hidden size')
parser.add_argument('--sample_neighbor', type=bool, default=True, help='whether sample neighbors')
parser.add_argument('--sparse_attention', type=bool, default=True, help='whether use sparse attention')
parser.add_argument('--structure_learning', type=bool, default=True, help='whether perform structure learning')
parser.add_argument('--pooling_ratio', type=float, default=0.5, help='pooling ratio')
parser.add_argument('--dropout_ratio', type=float, default=0.0, help='dropout ratio')
parser.add_argument('--lamb', type=float, default=1.0, help='trade-off parameter')
parser.add_argument('--dataset', type=str, default='PROTEINS', help='DD/PROTEINS/NCI1/NCI109/Mutagenicity/ENZYMES')
parser.add_argument('--device', type=str, default='cpu', help='specify cuda devices')
parser.add_argument('--epochs', type=int, default=2, help='maximum number of epochs')
parser.add_argument('--patience', type=int, default=100, help='patience for early stopping')
parser.add_argument('--model_name', type=str, default='HGPSL', help='-')

args = parser.parse_args()
torch.manual_seed(args.seed)

<torch._C.Generator at 0x2c47495a9d0>

### save path for model

In [6]:

if not os.path.isdir('checkpoint'):
    os.mkdir('checkpoint')
if not os.path.isdir(os.path.join('checkpoint', args.dataset)):
    os.mkdir(os.path.join('checkpoint', f"{args.dataset}"))
ckpt_dir = f"./checkpoint/{args.dataset}/"



def save_best(ckpt_dir, epoch, state, model_name, eval_acc, is_best, is_reg):
    print('saving....')
    model.to(device)
    state_save = {
        'net':state,
        'epoch':epoch,
        'acc': eval_acc 
        }
    best_pth_name = f'{args.model_name}_best.pth'
    reg_pth_name = f'{args.model_name}_reg_best.pth'
    
  
    if is_reg & is_best:
        ckpt_path = os.path.join(ckpt_dir, reg_pth_name) 
        torch.save(state_save, ckpt_path)
    
     
    if is_reg == False & is_best:
        ckpt_path = os.path.join(ckpt_dir, best_pth_name)  
        torch.save(state_save, ckpt_path)
           
        
    model.to(device)

### start loading data

In [7]:
dataset = TUDataset(os.path.join('data', args.dataset), name=args.dataset, use_node_attr=True)

args.num_classes = dataset.num_classes
args.num_features = dataset.num_features

print(args)

Downloading https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip
Processing...


Namespace(seed=777, batch_size=512, lr=0.001, weight_decay=0.001, nhid=128, sample_neighbor=True, sparse_attention=True, structure_learning=True, pooling_ratio=0.5, dropout_ratio=0.0, lamb=1.0, dataset='PROTEINS', device='cpu', epochs=2, patience=100, model_name='HGPSL', num_classes=2, num_features=4)


Done!


### Preprocessing  dataset

In [8]:
num_training = int(len(dataset) * 0.8)
num_val = int(len(dataset) * 0.1)
num_test = len(dataset) - (num_training + num_val)
training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])

train_loader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)

### Model for Training

In [9]:


class TwoHopNeighborhood(object):
    def __call__(self, data):
        edge_index, edge_attr = data.edge_index, data.edge_attr
        n = data.num_nodes

        fill = 1e16
        value = edge_index.new_full((edge_index.size(1),), fill, dtype=torch.float)

        index, value = spspmm(edge_index, value, edge_index, value, n, n, n, True)

        edge_index = torch.cat([edge_index, index], dim=1)
        if edge_attr is None:
            data.edge_index, _ = coalesce(edge_index, None, n, n)
        else:
            value = value.view(-1, *[1 for _ in range(edge_attr.dim() - 1)])
            value = value.expand(-1, *list(edge_attr.size())[1:])
            edge_attr = torch.cat([edge_attr, value], dim=0)
            #, fill_value=fill
            data.edge_index, edge_attr = coalesce(edge_index, edge_attr, n, n, op='min')
            edge_attr[edge_attr >= fill] = 0
            data.edge_attr = edge_attr

        return data

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)


class GCN(MessagePassing):
    def __init__(self, in_channels, out_channels, cached=False, bias=True, **kwargs):
        super(GCN, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.cached = cached
        self.cached_result = None
        self.cached_num_edges = None

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))
        nn.init.xavier_uniform_(self.weight.data)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
            nn.init.zeros_(self.bias.data)
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, device=edge_index.device)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight=None):
        x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}'.format(self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)


class NodeInformationScore(MessagePassing):
    def __init__(self, improved=False, cached=False, **kwargs):
        super(NodeInformationScore, self).__init__(aggr='add', **kwargs)

        self.improved = improved
        self.cached = cached
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, device=edge_index.device)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        edge_index, edge_weight = add_remaining_self_loops(edge_index, edge_weight, 0, num_nodes)

        row, col = edge_index
        expand_deg = torch.zeros((edge_weight.size(0),), dtype=dtype, device=edge_index.device)
        expand_deg[-num_nodes:] = torch.ones((num_nodes,), dtype=dtype, device=edge_index.device)

        return edge_index, expand_deg - deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight):
        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}'.format(self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out


class HGPSLPool(torch.nn.Module):
    def __init__(self, in_channels, ratio=0.8, sample=False, sparse=False, sl=True, lamb=1.0, negative_slop=0.2):
        super(HGPSLPool, self).__init__()
        self.in_channels = in_channels
        self.ratio = ratio
        self.sample = sample
        self.sparse = sparse
        self.sl = sl
        self.negative_slop = negative_slop
        self.lamb = lamb

        self.att = Parameter(torch.Tensor(1, self.in_channels * 2))
        nn.init.xavier_uniform_(self.att.data)
        self.sparse_attention = Sparsemax()
        self.neighbor_augment = TwoHopNeighborhood()
        self.calc_information_score = NodeInformationScore()

    def forward(self, x, edge_index, edge_attr, batch=None):
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))

        x_information_score = self.calc_information_score(x, edge_index, edge_attr)
        score = torch.sum(torch.abs(x_information_score), dim=1)

        # Graph Pooling
        original_x = x
        perm = topk(score, self.ratio, batch)
        x = x[perm]
        batch = batch[perm]
        induced_edge_index, induced_edge_attr = filter_adj(edge_index, edge_attr, perm, num_nodes=score.size(0))

        # Discard structure learning layer, directly return
        if self.sl is False:
            return x, induced_edge_index, induced_edge_attr, batch

        # Structure Learning
        if self.sample:
            # A fast mode for large graphs.
            # In large graphs, learning the possible edge weights between each pair of nodes is time consuming.
            # To accelerate this process, we sample it's K-Hop neighbors for each node and then learn the
            # edge weights between them.
            k_hop = 3
            if edge_attr is None:
                edge_attr = torch.ones((edge_index.size(1),), dtype=torch.float, device=edge_index.device)

            hop_data = Data(x=original_x, edge_index=edge_index, edge_attr=edge_attr)
            for _ in range(k_hop - 1):
                hop_data = self.neighbor_augment(hop_data)
            hop_edge_index = hop_data.edge_index
            hop_edge_attr = hop_data.edge_attr
            new_edge_index, new_edge_attr = filter_adj(hop_edge_index, hop_edge_attr, perm, num_nodes=score.size(0))

            new_edge_index, new_edge_attr = add_remaining_self_loops(new_edge_index, new_edge_attr, 0, x.size(0))
            row, col = new_edge_index
            weights = (torch.cat([x[row], x[col]], dim=1) * self.att).sum(dim=-1)
            weights = F.leaky_relu(weights, self.negative_slop) + new_edge_attr * self.lamb
            adj = torch.zeros((x.size(0), x.size(0)), dtype=torch.float, device=x.device)
            adj[row, col] = weights
            new_edge_index, weights = dense_to_sparse(adj)
            row, col = new_edge_index
            if self.sparse:
                new_edge_attr = self.sparse_attention(weights, row)
            else:
                new_edge_attr = softmax(weights, row, x.size(0))
            # filter out zero weight edges
            adj[row, col] = new_edge_attr
            new_edge_index, new_edge_attr = dense_to_sparse(adj)
            # release gpu memory
            del adj
            torch.cuda.empty_cache()
        else:
            # Learning the possible edge weights between each pair of nodes in the pooled subgraph, relative slower.
            if edge_attr is None:
                induced_edge_attr = torch.ones((induced_edge_index.size(1),), dtype=x.dtype,
                                               device=induced_edge_index.device)
            num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0)
            shift_cum_num_nodes = torch.cat([num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0)
            cum_num_nodes = num_nodes.cumsum(dim=0)
            adj = torch.zeros((x.size(0), x.size(0)), dtype=torch.float, device=x.device)
            # Construct batch fully connected graph in block diagonal matirx format
            for idx_i, idx_j in zip(shift_cum_num_nodes, cum_num_nodes):
                adj[idx_i:idx_j, idx_i:idx_j] = 1.0
            new_edge_index, _ = dense_to_sparse(adj)
            row, col = new_edge_index

            weights = (torch.cat([x[row], x[col]], dim=1) * self.att).sum(dim=-1)
            weights = F.leaky_relu(weights, self.negative_slop)
            adj[row, col] = weights
            induced_row, induced_col = induced_edge_index

            adj[induced_row, induced_col] += induced_edge_attr * self.lamb
            weights = adj[row, col]
            if self.sparse:
                new_edge_attr = self.sparse_attention(weights, row)
            else:
                new_edge_attr = softmax(weights, row, x.size(0))
            # filter out zero weight edges
            adj[row, col] = new_edge_attr
            new_edge_index, new_edge_attr = dense_to_sparse(adj)
            # release gpu memory
            del adj
            torch.cuda.empty_cache()

        return x, new_edge_index, new_edge_attr, batch


In [10]:



class Model(torch.nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        self.args = args
        self.num_features = args.num_features
        self.nhid = args.nhid
        self.num_classes = args.num_classes
        self.pooling_ratio = args.pooling_ratio
        self.dropout_ratio = args.dropout_ratio
        self.sample = args.sample_neighbor
        self.sparse = args.sparse_attention
        self.sl = args.structure_learning
        self.lamb = args.lamb

        self.conv1 = GCNConv(self.num_features, self.nhid)
        self.conv2 = GCN(self.nhid, self.nhid)
        self.conv3 = GCN(self.nhid, self.nhid)

        self.pool1 = HGPSLPool(self.nhid, self.pooling_ratio, self.sample, self.sparse, self.sl, self.lamb)
        self.pool2 = HGPSLPool(self.nhid, self.pooling_ratio, self.sample, self.sparse, self.sl, self.lamb)

        self.lin1 = torch.nn.Linear(self.nhid * 2, self.nhid)
        self.lin2 = torch.nn.Linear(self.nhid, self.nhid // 2)
        self.lin3 = torch.nn.Linear(self.nhid // 2, self.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        edge_attr = None

        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool1(x, edge_index, edge_attr, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool2(x, edge_index, edge_attr, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index, edge_attr))
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(x1) + F.relu(x2) + F.relu(x3)

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x


In [11]:
model = Model(args)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

### Required functions  for training with regularization

In [12]:
def train(model, train_loader, reg_rate):
    loss_train = 0.0
    correct = 0
    model.train()
    optimizer.zero_grad()
    for i, data in enumerate(train_loader):
            #data = data.to(args.device)
            out = model(data)
            loss = F.nll_loss(out, data.y)
            l2_reg = torch.tensor(0.)
            for param in model.parameters():
                l2_reg += torch.norm(param)

            # Combine the loss function with L2 regularization
            loss += (reg_rate * l2_reg)
    
            loss.backward()
            optimizer.step()
            loss_train += loss.item()
            pred = out.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
    acc_train = correct / len(train_loader.dataset)
    
    return   loss_train,acc_train  


   


def compute_test(loader):
    model.eval()
    correct = 0.0
    loss_test = 0.0
    for data in loader:
        #data = data.to(args.device)
        out = model(data)
        pred = out.max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
        loss_test += F.nll_loss(out, data.y).item()
    return correct / len(loader.dataset), loss_test


In [13]:
def run(model, train_loader, l2_lambda):
    min_loss = 1e10
    patience_cnt = 0
    val_loss_values = []
    best_epoch = 0
    save_epoch=100
    print(f"the reregularization rate is : {l2_lambda}")
    if l2_lambda==0:
        is_reg=False
    else:
        is_reg=True
   
    #model.train()
    t = time.time()
    for epoch in range(args.epochs):
        #loss_train = 0.0
        #correct = 0
        loss_train,acc_train =train(model, train_loader,l2_lambda)
        
        acc_val, loss_val = compute_test(val_loader)
        
        if epoch % 20 == 0:
            print('Epoch: {:04d}'.format(epoch ), 'loss_train: {:.6f}'.format(loss_train),
                  'acc_train: {:.6f}'.format(acc_train), 'loss_val: {:.6f}'.format(loss_val),
                  'acc_val: {:.6f}'.format(acc_val), 'time: {:.6f}s'.format(time.time() - t))

        val_loss_values.append(loss_val)

        if val_loss_values[-1] < min_loss:
            min_loss = val_loss_values[-1]
            best_epoch = epoch
            patience_cnt = 0
            is_best=True
            save_best(ckpt_dir, epoch, model.state_dict(), args.model_name, acc_val, is_best, is_reg)
        else:
            patience_cnt += 1

        if patience_cnt == args.patience:
            break
  

    print('Optimization Finished! Total time elapsed: {:.6f}'.format(time.time() - t))
    
  
    return best_epoch

## Measurement

#### Setting Regularization rate

In [None]:

### Regularization Rate

l2_lambda = 0.9
# The number of iteration
num_iterations=1
# The number of epochs
args.epochs=5

In [None]:
# This is a dictionary to save all measurements. Aftre measuring, we can compute mean and std of each item.
Eva_final=dict()

# The following are all list of criteria for measurements. 
# We collect all desired datas of each list across iterations. 
# Then, we compute average and std of each list.

#Base model
Base_model_accuracy=[]
T_base_model=[]
Num_parm_base_model=[]
Base_model_size=[]
Base_Energy_Consumption=[]
Base_Cpu_Usage=[]
Base_Memory_Usage=[]

#regularized model
Reg_model_accuracy=[]
T_Reg_model=[]
Num_parm_Reg_model=[]
Reg_model_size=[]
Reg_Energy_Consumption=[]
Reg_Cpu_Usage=[]
Reg_Memory_Usage=[]


# Here is the dictionary to record the list of all measurements
Eva_measure={'base model accuracy':Base_model_accuracy,
            'time inference of base model':T_base_model,
            'number parmameters of base model':Num_parm_base_model,
            'base model size':Base_model_size,
            'energy consumption of base model':Base_Energy_Consumption,
            'cpu usage of base model':Base_Cpu_Usage,
            'memory usage of base model':Base_Memory_Usage,
            'regularized model accuracy': Reg_model_accuracy,
            'time inference of regularized model':T_Reg_model,
            'number parmameters of regularized model':Num_parm_Reg_model,
            'regularized model size':Reg_model_size,
            'energy consumption of regularized model':Reg_Energy_Consumption,
            'cpu usage of regularized model':Reg_Cpu_Usage,
            'memory usage of regularized model':Reg_Memory_Usage
            }

       

### Training and Regularization

In [18]:

for i in range(num_iterations):
        print('________________________________________________')
        print('************************************************')
        print(f"This is iteration :{i+1}")

        Eva=dict() # It is a dictionary to arrange output of this iteration

        print(f'Training and evaluation before regularization ')
        print("Starting training...")
        reg_rate=0
        model = Model(args)
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        run(model, train_loader,reg_rate)
        #### load the best model
        base_model_path = os.path.join(ckpt_dir, f'{args.model_name}_best.pth') 
        checkpoint = torch.load(base_model_path)
        model.load_state_dict(checkpoint['net'])      
        recover_model = lambda: model.load_state_dict(checkpoint['net'])

        # Start monitoring CPU and memory usage, model size, number of parametes, time inference and  power consumption
        gc.collect()
        time.sleep(5)  # Add a 5-second delay to stabilize the initial state
        tracemalloc.start()  # Start tracking memory allocations
        snapshot_before = tracemalloc.take_snapshot()#take a snapshot of the current memory state before starting the measurement.

        t0 = time.perf_counter()
        initial_cpu_usage = get_cpu_usage()
        power_usage = estimate_power_usage(initial_cpu_usage)

        base_model_accuracy, test_loss = compute_test(test_loader)

        base_cpu_usage = get_cpu_usage()
        t1 = time.perf_counter()
        t_base_model=t1-t0

        snapshot_after = tracemalloc.take_snapshot()
        tracemalloc.stop()
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')

        base_total_memory_diff = sum([stat.size_diff for stat in top_stats])
        base_energy_consumption = power_usage * t_base_model
        base_model_size = os.path.getsize(base_model_path)
        num_parm_base_model=get_num_parameters(model, count_nonzero_only=True)

        gc.collect()
        time.sleep(5) 

        print(f'*****Results of base model*********')

        print(f"base model has accuracy on test set={base_model_accuracy:.2f}%")
        print(f"base model has size={base_model_size:.2f} bit")
        print(f"The time inference of base model is ={t_base_model}") 
        print(f"The number of parametrs of base model is:{num_parm_base_model}") 

        print(f"Energy Consumption : {base_energy_consumption:.3f}")
        print(f"total memory usage of base model':{base_total_memory_diff} ")
        print(f"cpu usage of base model':{base_cpu_usage:.3f} %")


        #Update Eva dictionary
        Eva.update({'base model accuracy': base_model_accuracy,
                    'time inference of base model': t_base_model,
                    'number parmameters of base model': num_parm_base_model,
                    'size of base model': base_model_size, 
                    'energy consumption of base model':base_energy_consumption,
                    'total memory usage of base model':base_total_memory_diff,
                    'cpu usage of base model':base_cpu_usage
                   })

        gc.collect()
        time.sleep(5)  

        #### Regularization of the Model
        gc.collect()
        time.sleep(5)   

        print('___________*******************************__________')
        print(f'Regularized Model')
      
        reg_rate=l2_lambda
 
        model = Model(args)
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        run(model, train_loader,reg_rate)
      

        reg_pth_name = f'{args.model_name}_reg_best.pth'
        reg_model_path = os.path.join(ckpt_dir, reg_pth_name) 
        checkpoint = torch.load(reg_model_path)
        model.load_state_dict(checkpoint['net'])
        recover_model = lambda: model.load_state_dict(checkpoint['net'])


        # Result of regularization


        gc.collect()
        time.sleep(5)  
        tracemalloc.start() 
        snapshot_before = tracemalloc.take_snapshot()

        t0 = time.perf_counter()
        initial_cpu_usage = get_cpu_usage()
        power_usage = estimate_power_usage(initial_cpu_usage)

        regularized_model_accuracy, test_loss = compute_test(test_loader)

        regularized_cpu_usage = get_cpu_usage()
        t1 = time.perf_counter()
        t_regularized_model=t1-t0

        snapshot_after = tracemalloc.take_snapshot()
        tracemalloc.stop()
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')

        regularized_total_memory_diff = sum([stat.size_diff for stat in top_stats])
        regularized_energy_consumption = power_usage * t_regularized_model
        regularized_model_size = os.path.getsize( reg_model_path )
        num_parm_regularized_model=get_num_parameters(model, count_nonzero_only=True)

        gc.collect()
        time.sleep(5)  # Add a 5-second delay to stabilize the initial state    



        print('****************Results of regularized model ******************')


        print(f"{l2_lambda} regularized model has accuracy on test set={regularized_model_accuracy:.2f}%")
        print(f"{l2_lambda} regularized model has size={regularized_model_size:.2f} bit")
        print(f"The time inference of {l2_lambda} regularized model is ={t_regularized_model}") 
        print(f"The number of parametrs of {l2_lambda} regularized model is:{num_parm_regularized_model}") 

        print(f"Energy Consumption of {l2_lambda} regularized model: {regularized_energy_consumption:.3f}")
        print(f"total memory usage of {l2_lambda} regularized model':{regularized_total_memory_diff} ")
        print(f"cpu usage of {l2_lambda} regularized model':{regularized_cpu_usage:.3f} %")


        #Update Eva dictionary
        Eva.update({'regularized model accuracy': regularized_model_accuracy,
                    'time inference of regularized model': t_regularized_model,
                    'number parmameters of regularized model': num_parm_regularized_model,
                    'size of regularized model': regularized_model_size, 
                    'energy consumption of regularized model':regularized_energy_consumption,
                    'total memory usage of regularized model':regularized_total_memory_diff,
                    'cpu usage of regularized model':regularized_cpu_usage
                   })

        gc.collect()
        time.sleep(5)   



        Base_model_accuracy.append(Eva['base model accuracy'])
        T_base_model.append(Eva['time inference of base model'])
        Num_parm_base_model.append(int(Eva['number parmameters of base model']))
        Base_model_size.append(int(Eva['size of base model']))
        Base_Energy_Consumption.append(Eva['energy consumption of base model'])
        Base_Cpu_Usage.append(Eva['cpu usage of base model'])
        Base_Memory_Usage.append(Eva['total memory usage of base model'])

        Reg_model_accuracy.append(Eva['regularized model accuracy'])
        T_Reg_model.append(Eva['time inference of regularized model'])
        Num_parm_Reg_model.append(int(Eva['number parmameters of regularized model']))
        Reg_model_size.append(int(Eva['size of regularized model']))
        Reg_Energy_Consumption.append(Eva['energy consumption of regularized model'])
        Reg_Cpu_Usage.append(Eva['cpu usage of regularized model'])
        Reg_Memory_Usage.append(Eva['total memory usage of regularized model'])


________________________________________________
************************************************
This is iteration :1
Training and evaluation before regularization 
Starting training...
the reregularization rate is : 0
Epoch: 0000 loss_train: 1.338034 acc_train: 0.621348 loss_val: 0.646126 acc_val: 0.621622 time: 9.124206s
saving....
saving....
saving....
saving....
saving....
saving....
saving....
saving....
saving....
Epoch: 0020 loss_train: 1.243334 acc_train: 0.666292 loss_val: 0.603657 acc_val: 0.711712 time: 188.603925s
saving....
saving....
saving....
saving....
saving....
saving....
saving....
Epoch: 0040 loss_train: 1.178836 acc_train: 0.706742 loss_val: 0.589234 acc_val: 0.729730 time: 360.740319s
saving....
saving....
Epoch: 0060 loss_train: 1.174197 acc_train: 0.695506 loss_val: 0.579936 acc_val: 0.720721 time: 532.519994s
saving....
Epoch: 0080 loss_train: 1.117458 acc_train: 0.733708 loss_val: 0.597407 acc_val: 0.702703 time: 702.769598s
Optimization Finished! Total time

KeyboardInterrupt: 

In [44]:
Eva_final=dict()
base_model_accuracy_mean = stat.mean(Base_model_accuracy)
base_model_accuracy_std =  stat.stdev(Base_model_accuracy)
Eva_final.update({'Ave of base model accuracy':float(format(base_model_accuracy_mean, '.3f'))})
Eva_final.update({'Std of base model accuracy':float(format(base_model_accuracy_std, '.3f'))})
base_model_accuracy = "{:.3f} ± {:.3f}".format(base_model_accuracy_mean ,base_model_accuracy_std)
print(f"Base model accuracy is:{base_model_accuracy}")

                 
t_base_model_mean =stat.mean(T_base_model)
t_base_model_std =stat.stdev(T_base_model)  
Eva_final.update({'Ave of time inference of base model':float(format(t_base_model_mean, '.3f'))})
Eva_final.update({'Std of time inference of base model':float(format(t_base_model_std, '.3f'))})
t_base_model = "{:.3f} ± {:.3f}".format(t_base_model_mean ,t_base_model_std)
print(f"Time inference of Base model :{t_base_model}")


num_parm_base_model_mean = stat.mean(Num_parm_base_model)
num_parm_base_model_std = stat.stdev(Num_parm_base_model)
Eva_final.update({'Ave of number parmameters of base model':num_parm_base_model_mean})
Eva_final.update({'Std of number parmameters of base model':num_parm_base_model_std})
num_parm_base_model = "{:.3f} ± {:.3f}".format(num_parm_base_model_mean ,num_parm_base_model_std)
print(f"Time number of parameters of Base model :{num_parm_base_model}")

base_model_size_mean = stat.mean(Base_model_size)
base_model_size_std = stat.stdev(Base_model_size)
Eva_final.update({'Ave of base model size':base_model_size_mean})
Eva_final.update({'Std of base model size':base_model_size_std})
base_model_size_model = "{:.3f} ± {:.3f}".format(base_model_size_mean ,base_model_size_std)
print(f"The size of Base model :{base_model_size} bytes")


base_energy_consumption_mean = stat.mean(Base_Energy_Consumption)
base_energy_consumption_std = stat.stdev(Base_Energy_Consumption)
Eva_final.update({'Ave of energy consumption of base model':base_energy_consumption_mean })
Eva_final.update({'Std of energy consumption of base model':base_energy_consumption_std})
base_energy_consumption = "{:.3f} ± {:.3f}".format(base_energy_consumption_mean ,base_energy_consumption_std)
print(f"The energy consumption of Base model :{base_energy_consumption} ")


base_cpu_usage_mean = stat.mean(Base_Cpu_Usage)
base_cpu_usage_std = stat.stdev(Base_Cpu_Usage)
Eva_final.update({'Ave of cpu usage of base model':base_cpu_usage_mean})
Eva_final.update({'Std of cpu usage of base model':base_cpu_usage_std})
base_cpu_usage = "{:.3f} ± {:.3f}".format(base_cpu_usage_mean ,base_cpu_usage_std)
print(f"The CPU usage of Base model :{base_cpu_usage} ")


base_memory_usage_mean = stat.mean(Base_Memory_Usage)
base_memory_usage_std = stat.stdev(Base_Memory_Usage)
Eva_final.update({'Ave of memory usage of base model':base_memory_usage_mean})
Eva_final.update({'Std of memory usage of base model':base_memory_usage_std})
base_memory_usage = "{:.3f} ± {:.3f}".format(base_memory_usage_mean ,base_memory_usage_std)
print(f"The memory usage of Base model :{base_memory_usage} ")

print(100 * "=")
####################################################

reg_model_accuracy_mean =stat.mean(Reg_model_accuracy)
reg_model_accuracy_std = stat.stdev(Reg_model_accuracy)
Eva_final.update({'Ave of regularized model accuracy':float(format(reg_model_accuracy_mean, '.3f'))})
Eva_final.update({'Std of regularized model accuracy':float(format(reg_model_accuracy_std, '.3f'))})
reg_model_accuracy = "{:.3f} ± {:.3f}".format(reg_model_accuracy_mean ,reg_model_accuracy_std)
print(f"Regularized model accuracy is:{reg_model_accuracy}")
                 

t_reg_model_mean = stat.mean(T_Reg_model)
t_reg_model_std =stat.stdev(T_Reg_model)
Eva_final.update({'Ave of time inference of regularized model':float(format(t_reg_model_mean, '.3f'))})
Eva_final.update({'Std of time inference of regularized model':float(format(t_reg_model_std, '.3f'))})
t_reg_model = "{:.3f} ± {:.3f}".format(t_reg_model_mean ,t_reg_model_std)
print(f"Time inference of Regularized model :{t_reg_model}")

num_parm_reg_model_mean = stat.mean(Num_parm_Reg_model)
num_parm_reg_model_std = stat.stdev(Num_parm_Reg_model)
Eva_final.update({'Ave of number parmameters of regularized model':num_parm_reg_model_mean})
Eva_final.update({'Std of number parmameters of regularized model':num_parm_reg_model_std})
num_parm_reg_model = "{:.3f} ± {:.3f}".format(num_parm_reg_model_mean ,num_parm_reg_model_std)
print(f"Time number of parameters of Regularized model :{num_parm_reg_model}")

reg_model_size_mean =stat.mean( Reg_model_size)
reg_model_size_std = stat.stdev(Reg_model_size)
Eva_final.update({'Ave of regularized model size':reg_model_size_mean})
Eva_final.update({'Std of regularized model size':reg_model_size_std })
reg_model_size = "{:.3f} ± {:.3f}".format(reg_model_size_mean ,reg_model_size_std)
print(f"The size of Regularized model :{reg_model_size} bytes")

reg_energy_consumption_mean = stat.mean(Reg_Energy_Consumption)
reg_energy_consumption_std = stat.stdev(Reg_Energy_Consumption)
Eva_final.update({'Ave of energy consumption of regularized model':reg_energy_consumption_mean })
Eva_final.update({'Std of energy consumption of regularized model':reg_energy_consumption_std})
reg_energy_consumption = "{:.3f} ± {:.3f}".format(reg_energy_consumption_mean ,reg_energy_consumption_std)
print(f"The energy consumption of Regularized model :{reg_energy_consumption} ")


reg_cpu_usage_mean = stat.mean(Reg_Cpu_Usage)
reg_cpu_usage_std = stat.stdev(Reg_Cpu_Usage)
Eva_final.update({'Ave of cpu usage of regularized model':reg_cpu_usage_mean})
Eva_final.update({'Std of cpu usage of regularized model':reg_cpu_usage_std})
reg_cpu_usage = "{:.3f} ± {:.3f}".format(reg_cpu_usage_mean ,reg_cpu_usage_std)
print(f"The CPU usage of Regularized model :{reg_cpu_usage} ")


reg_memory_usage_mean = stat.mean(Reg_Memory_Usage)
reg_memory_usage_std = stat.stdev(Reg_Memory_Usage)
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'Ave of memory usage of regularized model':reg_memory_usage_mean})
Eva_final.update({'Std of memory usage of regularized model':reg_memory_usage_std})
reg_memory_usage = "{:.3f} ± {:.3f}".format(reg_memory_usage_mean ,reg_memory_usage_std)
print(f"The memory usage of Regularized model :{reg_memory_usage} ")



#################################

print(100 * "=")
print(f"All measurement about regularization process of rate:{reg_rate} ")   
Eva_final

Base model accuracy is:0.757 ± 0.026
Time inference of Base model :2.631 ± 0.048
Time number of parameters of Base model :75420.000 ± 6.595
The size of Base model :306667 bytes
The energy consumption of Base model :56.884 ± 16.359 
The CPU usage of Base model :21.560 ± 10.104 
The memory usage of Base model :36201.400 ± 336.838 
Regularized model accuracy is:0.736 ± 0.043
Time inference of Regularized model :2.680 ± 0.056
Time number of parameters of Regularized model :75423.000 ± 2.236
The size of Regularized model :306731.000 ± 0.000 bytes
The energy consumption of Regularized model :75.679 ± 39.322 
The CPU usage of Regularized model :24.360 ± 6.916 
The memory usage of Regularized model :35879.800 ± 117.001 
All measurement about regularization process of rate:0.9 


{'Ave of base model accuracy': 0.757,
 'Std of base model accuracy': 0.026,
 'Ave of time inference of base model': 2.631,
 'Std of time inference of base model': 0.048,
 'Ave of number parmameters of base model': 75420,
 'Std of number parmameters of base model': 6.59545297913646,
 'Ave of base model size': 306667,
 'Std of base model size': 0.0,
 'Ave of energy consumption of base model': 56.883636994300176,
 'Std of energy consumption of base model': 16.359177016313676,
 'Ave of cpu usage of base model': 21.56,
 'Std of cpu usage of base model': 10.104108075431498,
 'Ave of memory usage of base model': 36201.4,
 'Std of memory usage of base model': 336.83794323086585,
 'Ave of regularized model accuracy': 0.736,
 'Std of regularized model accuracy': 0.043,
 'Ave of time inference of regularized model': 2.68,
 'Std of time inference of regularized model': 0.056,
 'Ave of number parmameters of regularized model': 75423,
 'Std of number parmameters of regularized model': 2.236067977499

### Recording results in txt file

In [45]:

dataset_name = 'Proteins'
Pruning_Method='Regularization'
max_epoch = 100
result_folder ='pathresult/'
if not os.path.exists(result_folder):
    os.makedirs(result_folder)



file_name = result_folder+Pruning_Method+'_'+'with rate of regularization of'+'_'+str(reg_rate)+'_on_'+dataset_name+'_'+str(max_epoch)+'.txt'

with open(file_name, 'w') as f:
        f.write('%s:%s\n'%('dataset_name', 'Proteins'))
        f.write('%s:%s\n'%('max_epoch', max_epoch))
        f.write('%s:%s\n'%('sparsity', l2_lambda))
        for key, value in Eva_final.items():
            f.write('%s:%s\n'%(key, value))
        for key, value in Eva_measure.items():
            f.write('%s:%s\n' % (key, ','.join(map(str, value))))             
       