Global Pruning Method on graph Prediction  Task of Proteins Dataset
----------------------------

### Libraries

In [5]:
import warnings
warnings.filterwarnings('ignore')

import argparse
import os
import os.path as osp
import shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy
import time
import statistics as stat
import psutil
import itertools
import tracemalloc
import gc


import torch
import torch.nn as nn
import torch.nn.functional as F

from sparse_softmax import Sparsemax
from torch.nn import Parameter
from torch_geometric.data import Data
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.pool.topk_pool import topk, filter_adj
from torch_geometric.utils import softmax, dense_to_sparse, add_remaining_self_loops
from torch_scatter import scatter_add
from torch_sparse import spspmm, coalesce
from torch.utils.data import random_split
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.nn import GCNConv


import torch.nn.utils.prune as prune
from torch.nn.utils.prune import global_unstructured, L1Unstructured


### Device

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Sparsity
- The sparsity is the parameter that is determines the rate of pruning across the layer. It is a value in range(0,0.1,1). This parameter is fixed for this notebook and change for remaining experiment. We determine it before training process. Here is all values of sparsities.

In [7]:
0.1, 0.2, 0.3, 0.4,  0.5, 0.6 , 0.7,  0.8, 0.9
;

''

### Functions for pruning and loading pruned model

In [8]:

def global_pruning_torch(model, parameters_to_prune, sparsity, path):
    # Pruning the model layers globally using PyTorch library
    global_unstructured(parameters_to_prune, pruning_method=L1Unstructured, amount=sparsity)
    
    # Removing pruning reparameterization to save only the final weights
    for module in parameters_to_prune:
           prune.remove(module[0],'weight')
    
    state = state_sparse_model(model, eval_acc=None)
    torch.save(state, path)
    
    return  model

def state_sparse_model(model, eval_acc=None, epoch=None):
    '''
    This funcrion Removes Zeroed Weights
    and saves non-zero weights as state dictionary
    '''
    state_dict = model.state_dict()
    non_zero_state = {
        k: v.to_sparse() if torch.count_nonzero(v) < v.numel() else v
        for k, v in state_dict.items()
    }
   
    non_zero_state_dict = {'net': non_zero_state, 'epoch': epoch, 'acc': eval_acc}
    return non_zero_state_dict
    
import torch

def load_sparse_model(state_path, original_model):
    """
    This function provides a practical approach to loading sparse models into dense environments,
    offering a good balance between memory efficiency and model functionality.
    """
    non_zero_state_dict = torch.load(state_path)
    sparse_model_state = original_model.state_dict()

    for k, v in non_zero_state_dict['net'].items():
        if isinstance(v, torch.Tensor) and v.is_sparse:
            sparse_model_state[k] = v.to_dense()
        else:
            sparse_model_state[k] = v

    original_model.load_state_dict(sparse_model_state, strict=False)
    
    
    return original_model


def load_and_evaluate_pruned_model(args, model_path):
    """
    This function loads the pruned model from disk and evaluates it.
    """
    # Instantiate the model
    model = Model(args)

    # Load the pruned model
    sparse_model = load_sparse_model(model_path, model)
    print("Pruned model loaded.")

    return  sparse_model




### Functions for Mmeasuring criterias

In [9]:
def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
    """
    calculate the total number of parameters of model
    :param count_nonzero_only: only count nonzero weights
    """
    num_counted_elements = 0
    for param in model.parameters():
        if count_nonzero_only:
            num_counted_elements += param.count_nonzero()
        else:
            num_counted_elements += param.numel()
    return num_counted_elements

# Function to get CPU usage
def get_cpu_usage():
    return psutil.cpu_percent(interval=1)



# Function to approximate power consumption (Assume some average power usage per CPU percentage point)
def estimate_power_usage(cpu_usage):
    base_power_usage = 10  # Assumed base power usage in watts
    power_per_percent = 0.5  # Assumed additional watts per CPU usage percent
    return base_power_usage + (power_per_percent * cpu_usage)

# The model size based on the number of parameters
def get_model_size_param(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
    """
    calculate the model size in bits
    :param data_width: #bits per element
    :param count_nonzero_only: only count nonzero weights
    """
    return get_num_parameters(model, count_nonzero_only) * data_width


def get_model_sparsity(model: nn.Module) -> float:
    ''' 
    The input is layers of pruned model and the output is the sparsity after pruning.
    '''
    Sparsity=dict()
    global_zero=0
    global_nzero=0
    layyers=[]
    spars=[]
    for name, param in model.named_parameters(): 
        if 'weight' in name:
                    zero=float(torch.sum(param == 0))
                    nzero=float(param.nelement())
                    sparsity=  float(zero)/ float(nzero)
                    print( f'Sparsity in {name}: {sparsity:.3f}' )
                    layyers.append(name)
                    spars.append(sparsity)
                    global_zero +=zero
                    global_nzero +=nzero



    Sparsity={key: value for key, value in zip(layyers,spars)}
    global_sparsity= float(global_zero) /float(global_nzero)
    Sparsity.update({'Global sparsity':  global_sparsity})
    print("Global sparsity: {:.3f}".format(global_sparsity))
    return   Sparsity   

### Setting Arguments

In [10]:
import sys
sys.argv=['']
del sys

parser = argparse.ArgumentParser()

parser.add_argument('--seed', type=int, default=777, help='random seed')
parser.add_argument('--batch_size', type=int, default=512, help='batch size')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--weight_decay', type=float, default=0.001, help='weight decay')
parser.add_argument('--nhid', type=int, default=128, help='hidden size')
parser.add_argument('--sample_neighbor', type=bool, default=True, help='whether sample neighbors')
parser.add_argument('--sparse_attention', type=bool, default=True, help='whether use sparse attention')
parser.add_argument('--structure_learning', type=bool, default=True, help='whether perform structure learning')
parser.add_argument('--pooling_ratio', type=float, default=0.5, help='pooling ratio')
parser.add_argument('--dropout_ratio', type=float, default=0.0, help='dropout ratio')
parser.add_argument('--lamb', type=float, default=1.0, help='trade-off parameter')
parser.add_argument('--dataset', type=str, default='PROTEINS', help='DD/PROTEINS/NCI1/NCI109/Mutagenicity/ENZYMES')
parser.add_argument('--device', type=str, default='cpu', help='specify cuda devices')
parser.add_argument('--epochs', type=int, default=2, help='maximum number of epochs')
parser.add_argument('--patience', type=int, default=100, help='patience for early stopping')
parser.add_argument('--model_name', type=str, default='HGPSL', help='-')

args = parser.parse_args()
torch.manual_seed(args.seed)

<torch._C.Generator at 0x1613329a9d0>

### save path for model

In [11]:

if not os.path.isdir('checkpoint'):
    os.mkdir('checkpoint')
if not os.path.isdir(os.path.join('checkpoint', args.dataset)):
    os.mkdir(os.path.join('checkpoint', f"{args.dataset}"))
ckpt_dir = f"./checkpoint/{args.dataset}/"




def save_best(ckpt_dir, epoch, state, model_name, eval_acc, is_best, is_pruned):
    print('saving....')
            
    model.to(device)
    state_save = {
        'net':state,
        'epoch':epoch,
        'acc': eval_acc 
        }
    best_pth_name = f'{args.model_name}_best.pth'
    fine_tuned_pth_name = f'{args.model_name}_fine_tuned_best.pth'
  
    if is_pruned & is_best:
        ckpt_path = os.path.join(ckpt_dir, fine_tuned_pth_name) 
        torch.save(state_save, ckpt_path)
    
     
    if is_pruned== False & is_best:
        ckpt_path = os.path.join(ckpt_dir, best_pth_name)  
        torch.save(state_save, ckpt_path)
                   
            
    model.to(device)

### start loading data

In [12]:
dataset = TUDataset(os.path.join('data', args.dataset), name=args.dataset, use_node_attr=True)

args.num_classes = dataset.num_classes
args.num_features = dataset.num_features

print(args)

Namespace(seed=777, batch_size=512, lr=0.001, weight_decay=0.001, nhid=128, sample_neighbor=True, sparse_attention=True, structure_learning=True, pooling_ratio=0.5, dropout_ratio=0.0, lamb=1.0, dataset='PROTEINS', device='cpu', epochs=2, patience=100, model_name='HGPSL', num_classes=2, num_features=4)


### Preprocessing  dataset

In [13]:
num_training = int(len(dataset) * 0.8)
num_val = int(len(dataset) * 0.1)
num_test = len(dataset) - (num_training + num_val)
training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])

train_loader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)

### Model for Training

In [14]:


class TwoHopNeighborhood(object):
    def __call__(self, data):
        edge_index, edge_attr = data.edge_index, data.edge_attr
        n = data.num_nodes

        fill = 1e16
        value = edge_index.new_full((edge_index.size(1),), fill, dtype=torch.float)

        index, value = spspmm(edge_index, value, edge_index, value, n, n, n, True)

        edge_index = torch.cat([edge_index, index], dim=1)
        if edge_attr is None:
            data.edge_index, _ = coalesce(edge_index, None, n, n)
        else:
            value = value.view(-1, *[1 for _ in range(edge_attr.dim() - 1)])
            value = value.expand(-1, *list(edge_attr.size())[1:])
            edge_attr = torch.cat([edge_attr, value], dim=0)
            #, fill_value=fill
            data.edge_index, edge_attr = coalesce(edge_index, edge_attr, n, n, op='min')
            edge_attr[edge_attr >= fill] = 0
            data.edge_attr = edge_attr

        return data

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)


class GCN(MessagePassing):
    def __init__(self, in_channels, out_channels, cached=False, bias=True, **kwargs):
        super(GCN, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.cached = cached
        self.cached_result = None
        self.cached_num_edges = None

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))
        nn.init.xavier_uniform_(self.weight.data)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
            nn.init.zeros_(self.bias.data)
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, device=edge_index.device)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight=None):
        x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}'.format(self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)


class NodeInformationScore(MessagePassing):
    def __init__(self, improved=False, cached=False, **kwargs):
        super(NodeInformationScore, self).__init__(aggr='add', **kwargs)

        self.improved = improved
        self.cached = cached
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, device=edge_index.device)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        edge_index, edge_weight = add_remaining_self_loops(edge_index, edge_weight, 0, num_nodes)

        row, col = edge_index
        expand_deg = torch.zeros((edge_weight.size(0),), dtype=dtype, device=edge_index.device)
        expand_deg[-num_nodes:] = torch.ones((num_nodes,), dtype=dtype, device=edge_index.device)

        return edge_index, expand_deg - deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight):
        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}'.format(self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out


class HGPSLPool(torch.nn.Module):
    def __init__(self, in_channels, ratio=0.8, sample=False, sparse=False, sl=True, lamb=1.0, negative_slop=0.2):
        super(HGPSLPool, self).__init__()
        self.in_channels = in_channels
        self.ratio = ratio
        self.sample = sample
        self.sparse = sparse
        self.sl = sl
        self.negative_slop = negative_slop
        self.lamb = lamb

        self.att = Parameter(torch.Tensor(1, self.in_channels * 2))
        nn.init.xavier_uniform_(self.att.data)
        self.sparse_attention = Sparsemax()
        self.neighbor_augment = TwoHopNeighborhood()
        self.calc_information_score = NodeInformationScore()

    def forward(self, x, edge_index, edge_attr, batch=None):
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))

        x_information_score = self.calc_information_score(x, edge_index, edge_attr)
        score = torch.sum(torch.abs(x_information_score), dim=1)

        # Graph Pooling
        original_x = x
        perm = topk(score, self.ratio, batch)
        x = x[perm]
        batch = batch[perm]
        induced_edge_index, induced_edge_attr = filter_adj(edge_index, edge_attr, perm, num_nodes=score.size(0))

        # Discard structure learning layer, directly return
        if self.sl is False:
            return x, induced_edge_index, induced_edge_attr, batch

        # Structure Learning
        if self.sample:
            # A fast mode for large graphs.
            # In large graphs, learning the possible edge weights between each pair of nodes is time consuming.
            # To accelerate this process, we sample it's K-Hop neighbors for each node and then learn the
            # edge weights between them.
            k_hop = 3
            if edge_attr is None:
                edge_attr = torch.ones((edge_index.size(1),), dtype=torch.float, device=edge_index.device)

            hop_data = Data(x=original_x, edge_index=edge_index, edge_attr=edge_attr)
            for _ in range(k_hop - 1):
                hop_data = self.neighbor_augment(hop_data)
            hop_edge_index = hop_data.edge_index
            hop_edge_attr = hop_data.edge_attr
            new_edge_index, new_edge_attr = filter_adj(hop_edge_index, hop_edge_attr, perm, num_nodes=score.size(0))

            new_edge_index, new_edge_attr = add_remaining_self_loops(new_edge_index, new_edge_attr, 0, x.size(0))
            row, col = new_edge_index
            weights = (torch.cat([x[row], x[col]], dim=1) * self.att).sum(dim=-1)
            weights = F.leaky_relu(weights, self.negative_slop) + new_edge_attr * self.lamb
            adj = torch.zeros((x.size(0), x.size(0)), dtype=torch.float, device=x.device)
            adj[row, col] = weights
            new_edge_index, weights = dense_to_sparse(adj)
            row, col = new_edge_index
            if self.sparse:
                new_edge_attr = self.sparse_attention(weights, row)
            else:
                new_edge_attr = softmax(weights, row, x.size(0))
            # filter out zero weight edges
            adj[row, col] = new_edge_attr
            new_edge_index, new_edge_attr = dense_to_sparse(adj)
            # release gpu memory
            del adj
            torch.cuda.empty_cache()
        else:
            # Learning the possible edge weights between each pair of nodes in the pooled subgraph, relative slower.
            if edge_attr is None:
                induced_edge_attr = torch.ones((induced_edge_index.size(1),), dtype=x.dtype,
                                               device=induced_edge_index.device)
            num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0)
            shift_cum_num_nodes = torch.cat([num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0)
            cum_num_nodes = num_nodes.cumsum(dim=0)
            adj = torch.zeros((x.size(0), x.size(0)), dtype=torch.float, device=x.device)
            # Construct batch fully connected graph in block diagonal matirx format
            for idx_i, idx_j in zip(shift_cum_num_nodes, cum_num_nodes):
                adj[idx_i:idx_j, idx_i:idx_j] = 1.0
            new_edge_index, _ = dense_to_sparse(adj)
            row, col = new_edge_index

            weights = (torch.cat([x[row], x[col]], dim=1) * self.att).sum(dim=-1)
            weights = F.leaky_relu(weights, self.negative_slop)
            adj[row, col] = weights
            induced_row, induced_col = induced_edge_index

            adj[induced_row, induced_col] += induced_edge_attr * self.lamb
            weights = adj[row, col]
            if self.sparse:
                new_edge_attr = self.sparse_attention(weights, row)
            else:
                new_edge_attr = softmax(weights, row, x.size(0))
            # filter out zero weight edges
            adj[row, col] = new_edge_attr
            new_edge_index, new_edge_attr = dense_to_sparse(adj)
            # release gpu memory
            del adj
            torch.cuda.empty_cache()

        return x, new_edge_index, new_edge_attr, batch


In [15]:



class Model(torch.nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        self.args = args
        self.num_features = args.num_features
        self.nhid = args.nhid
        self.num_classes = args.num_classes
        self.pooling_ratio = args.pooling_ratio
        self.dropout_ratio = args.dropout_ratio
        self.sample = args.sample_neighbor
        self.sparse = args.sparse_attention
        self.sl = args.structure_learning
        self.lamb = args.lamb

        self.conv1 = GCNConv(self.num_features, self.nhid)
        self.conv2 = GCN(self.nhid, self.nhid)
        self.conv3 = GCN(self.nhid, self.nhid)

        self.pool1 = HGPSLPool(self.nhid, self.pooling_ratio, self.sample, self.sparse, self.sl, self.lamb)
        self.pool2 = HGPSLPool(self.nhid, self.pooling_ratio, self.sample, self.sparse, self.sl, self.lamb)

        self.lin1 = torch.nn.Linear(self.nhid * 2, self.nhid)
        self.lin2 = torch.nn.Linear(self.nhid, self.nhid // 2)
        self.lin3 = torch.nn.Linear(self.nhid // 2, self.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        edge_attr = None

        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool1(x, edge_index, edge_attr, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool2(x, edge_index, edge_attr, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index, edge_attr))
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(x1) + F.relu(x2) + F.relu(x3)

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x


In [16]:
model = Model(args)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

### Required functions  for training 

In [17]:
def train(model, train_loader):
    loss_train = 0.0
    correct = 0
    model.train()
    optimizer.zero_grad()
    for i, data in enumerate(train_loader):
            #data = data.to(args.device)
            out = model(data)
            loss = F.nll_loss(out, data.y)
            loss.backward()
            optimizer.step()
            loss_train += loss.item()
            pred = out.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
    acc_train = correct / len(train_loader.dataset)
    
    return   loss_train,acc_train      


def compute_test(loader, model):
    model.eval()
    correct = 0.0
    loss_test = 0.0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
        loss_test += F.nll_loss(out, data.y).item()
    return correct / len(loader.dataset), loss_test


In [18]:
def run(model, train_loader, callbacks = None):
    min_loss = 1e10
    patience_cnt = 0
    val_loss_values = []
    best_epoch = 0
    if callbacks == None:
        is_prune=False
    
    else:
        is_prune= True

    #model.train()
    t = time.time()
    for epoch in range(args.epochs):
        #loss_train = 0.0
        #correct = 0
        loss_train,acc_train =train(model, train_loader)
        
        acc_val, loss_val = compute_test(val_loader, model)
        
        if epoch % 10 == 0:
            print('Epoch: {:04d}'.format(epoch ), 'loss_train: {:.6f}'.format(loss_train),
                  'acc_train: {:.6f}'.format(acc_train), 'loss_val: {:.6f}'.format(loss_val),
                  'acc_val: {:.6f}'.format(acc_val), 'time: {:.6f}s'.format(time.time() - t))
        if callbacks is not None:
            for callback in callbacks:
                callback()

        val_loss_values.append(loss_val)
        if val_loss_values[-1] < min_loss:
            min_loss = val_loss_values[-1]
            best_epoch = epoch
            patience_cnt = 0
            is_best=True
            if is_prune:
                non_zero_state_dict=state_sparse_model(model,  acc_val, epoch)
                save_best(ckpt_dir, epoch, non_zero_state_dict, args.model_name, acc_val, is_best, is_prune)
            else:
                save_best(ckpt_dir, epoch, model.state_dict(), args.model_name, acc_val, is_best, is_prune)
        else:
            patience_cnt += 1

        if patience_cnt == args.patience:
            break


     
    

    print('Optimization Finished! Total time elapsed: {:.6f}'.format(time.time() - t))
    

    return best_epoch

In [19]:
### Here is all parameters candidates for pruning
'''

parameters_to_prune = [
    (model.conv1.lin, 'weight'),
    (model.conv2, 'weight'),
    (model.conv3, 'weight'),
     (model.lin1, 'weight'),
     (model.lin2, 'weight'),
     (model.lin3, 'weight'),
]

'''

"\n\nparameters_to_prune = [\n    (model.conv1.lin, 'weight'),\n    (model.conv2, 'weight'),\n    (model.conv3, 'weight'),\n     (model.lin1, 'weight'),\n     (model.lin2, 'weight'),\n     (model.lin3, 'weight'),\n]\n\n"

### Criteria for measurements

In [None]:
### Setting Sparsity
sparsity=0.9


# The number of epochs 
args.epochs=100
# The number of iterations
num_iterations=10

In [None]:

# This is a dictionary to save all measurements. Aftre measuring, we can compute mean and std of each item.
Eva_final=dict()

# The following are all list of criteria for measurements. 
# We collect all desired datas of each list across iterations. 
# Then, we compute average and std of each list.

#Base model
Base_model_accuracy=[]
T_base_model=[]
Num_parm_base_model=[]
Base_model_size=[]
Base_Energy_Consumption=[]
Base_Cpu_Usage=[]
Base_Memory_Usage=[]

#Pruned model
Pruned_model_accuracy=[]
T_pruned_model=[]
Num_parm_pruned_model=[]
Pruned_model_size=[]
Pruned_Energy_Consumption=[]
Pruned_Cpu_Usage=[]
Pruned_Memory_Usage=[]

#Pruned and finetune model
Pruned_finetune_model_accuracy=[]
T_pruned_finetune_model=[]
Num_parm_pruned_finetune_model=[]
Pruned_finetune_model_size=[]
Pruned_finetune_Energy_Consumption=[]
Pruned_finetune_Cpu_Usage=[]
Pruned_finetune_Memory_Usage=[]

#recording sparsities of layers
Spar_model_conv1_lin_w=[]
Spar_model_conv2_w=[]
Spar_model_conv3_w=[]
Spar_model_lin1_w=[]
Spar_model_lin2_w=[]
Spar_model_lin3_w=[]
Global_spar=[]  


# Here is the dictionary to record the list of all measurements
Eva_measure={'base model accuracy':Base_model_accuracy,
            'time inference of base model':T_base_model,
            'number parmameters of base model':Num_parm_base_model,
            'base model size':Base_model_size,
            'energy consumption of base model':Base_Energy_Consumption,
            'cpu usage of base model':Base_Cpu_Usage,
            'memory usage of base model':Base_Memory_Usage,
            'pruned model accuracy': Pruned_model_accuracy,
            'time inference of pruned model':T_pruned_model,
            'number parmameters of pruned model':Num_parm_pruned_model,
            'pruned model size':Pruned_model_size,
            'energy consumption of pruned model':Pruned_Energy_Consumption,
            'cpu usage of pruned model':Pruned_Cpu_Usage,
            'memory usage of pruned model':Pruned_Memory_Usage,
            'pruned finetune model accuracy':Pruned_finetune_model_accuracy,
            'time inference of pruned finetune model':T_pruned_finetune_model,
            'number parmameters of pruned finetune model':Num_parm_pruned_finetune_model,
            'pruned finetune model size':Pruned_finetune_model_size,
            'energy consumption of pruned_finetune model':Pruned_finetune_Energy_Consumption,
            'cpu usage of pruned_finetune model':Pruned_finetune_Cpu_Usage,
            'memory usage of pruned_finetune model':Pruned_finetune_Memory_Usage,
            'Sparsity in conv1.lin.weight':Spar_model_conv1_lin_w,
            'Sparsity in conv2.weight': Spar_model_conv2_w,
            'Sparsity in conv3.weight': Spar_model_conv3_w,
            'Sparsity in lin1.weight':  Spar_model_lin1_w,
            'Sparsity in lin2.weight':  Spar_model_lin2_w,
            'Sparsity in lin3.weight' :  Spar_model_lin3_w,
            'Global sparsity':  Global_spar
            }                         

### Training, Pruning, Finetuning

In [None]:

for i in range(num_iterations):
        print(f'This is iteration {i}')   
        Eva=dict() # It is a dictionary to arrange output of this iteration
        print(f'Training and evaluation before pruning ')
        print("Starting training...")

        model = Model(args)
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        best_model =run(model, train_loader)

        #### load the best model
        base_model_path = os.path.join(ckpt_dir, f'{args.model_name}_best.pth') 
        checkpoint = torch.load(base_model_path)
        model.load_state_dict(checkpoint['net'])
        recover_model = lambda: model.model.load_state_dict(checkpoint['net'])

        # Start monitoring CPU and memory usage, model size, number of parametes, time inference and  power consumption
        gc.collect()
        time.sleep(5)  # Add a 5-second delay to stabilize the initial state
        tracemalloc.start()  # Start tracking memory allocations
        snapshot_before = tracemalloc.take_snapshot()#take a snapshot of the current memory state before starting the measurement.

        t0 = time.perf_counter()
        initial_cpu_usage = get_cpu_usage()
        power_usage = estimate_power_usage(initial_cpu_usage)

        base_model_accuracy, test_loss = compute_test(test_loader, model)

        base_cpu_usage = get_cpu_usage()
        t1 = time.perf_counter()
        t_base_model=t1-t0

        snapshot_after = tracemalloc.take_snapshot()
        tracemalloc.stop()
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')

        base_total_memory_diff = sum([stat.size_diff for stat in top_stats])
        base_energy_consumption = power_usage * t_base_model
        base_model_size = os.path.getsize(base_model_path)
        num_parm_base_model=get_num_parameters(model, count_nonzero_only=True)

        gc.collect()
        time.sleep(5) 

        print(f'*****Results of base model*********')

        print(f"base model has accuracy on test set={base_model_accuracy:.2f}%")
        print(f"base model has size={base_model_size:.2f} byte")
        print(f"The time inference of base model is ={t_base_model}") 
        print(f"The number of parametrs of base model is:{num_parm_base_model}") 

        print(f"Energy Consumption : {base_energy_consumption:.3f}")
        print(f"total memory usage of base model':{base_total_memory_diff} ")
        print(f"cpu usage of base model':{base_cpu_usage:.3f} %")


        #Update Eva dictionary
        Eva.update({'base model accuracy': base_model_accuracy,
                'time inference of base model': t_base_model,
                'number parmameters of base model': num_parm_base_model,
                'size of base model': base_model_size, 
                'energy consumption of base model':base_energy_consumption,
                'total memory usage of base model':base_total_memory_diff,
                'cpu usage of base model':base_cpu_usage
               })

        gc.collect()
        time.sleep(5)  

        print('_________******************************_____________')
        print(f'Pruning the Model')
        
        # Parameters for pruning
        parameters_to_prune = [
        (model.conv1.lin, 'weight'),
        (model.conv2, 'weight'),
        (model.conv3, 'weight'),
         (model.lin1, 'weight'),
         (model.lin2, 'weight'),
         (model.lin3, 'weight'),
        ]
        # Pruned model path
        pth_name = f"pruned_model.pth"   
        ckpt_pruned_path = os.path.join(ckpt_dir, pth_name)  
        # Applying Pruning method
        pruned_model=global_pruning_torch(model,parameters_to_prune, sparsity, ckpt_pruned_path)

        ###Sparsities of layyer
        spar_dict=get_model_sparsity(pruned_model)
        #recording sparsities of layers
        Eva.update(spar_dict)


        print('****************Result of pruning ******************')



        
        pruned_model=load_and_evaluate_pruned_model(args,ckpt_pruned_path)


        gc.collect()
        time.sleep(5)  
        tracemalloc.start()  
        snapshot_before = tracemalloc.take_snapshot()

        t0 = time.perf_counter()
        initial_cpu_usage = get_cpu_usage()
        power_usage = estimate_power_usage(initial_cpu_usage)

        pruned_model_accuracy, test_loss = compute_test(test_loader, pruned_model)

        pruned_cpu_usage = get_cpu_usage()
        t1 = time.perf_counter()
        t_pruned_model=t1-t0

        snapshot_after = tracemalloc.take_snapshot()
        tracemalloc.stop()
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')

        pruned_total_memory_diff = sum([stat.size_diff for stat in top_stats])
        pruned_energy_consumption = power_usage * t_pruned_model
        pruned_model_size = os.path.getsize(ckpt_pruned_path)
        num_parm_pruned_model=get_num_parameters(pruned_model, count_nonzero_only=True)

        gc.collect()
        time.sleep(5)  

        ###### Report of pruning 
        print(f"pruned model has accuracy on test set={pruned_model_accuracy:.2f}%")
        print(f"pruned model has size={pruned_model_size:.2f} byte")
        print(f"The time inference of pruned model is ={t_pruned_model}") 
        print(f"The number of parametrs of pruned model is:{num_parm_pruned_model}") 

        print(f"Energy Consumption : {pruned_energy_consumption:.3f}")
        print(f"total memory usage of pruned model':{pruned_total_memory_diff} ")
        print(f"cpu usage of pruned model':{pruned_cpu_usage:.3f} %")


        #Update Eva dictionary
        Eva.update({'pruned model accuracy': pruned_model_accuracy,
                'time inference of pruned model': t_pruned_model,
                'number parmameters of pruned model': num_parm_pruned_model,
                'size of pruned model': pruned_model_size, 
                'energy consumption of pruned model':pruned_energy_consumption,
                'total memory usage of pruned model':pruned_total_memory_diff,
                'cpu usage of pruned model':pruned_cpu_usage
               })

        gc.collect()
        time.sleep(5)   

        print('________*******************************_____________')
        print(f'Finetuning Pruned Sparse Model')
        # Pruned model path
        fine_tuned_pth_name=f'{args.model_name}_fine_tuned.pth'
        fine_tuned_model_path = os.path.join(ckpt_dir, fine_tuned_pth_name)
        #Fine tuning pruned model
        run(pruned_model, train_loader, callbacks=[lambda:global_pruning_torch(model,parameters_to_prune, sparsity, fine_tuned_model_path)])

        # load the best fine-tune model
        fine_tuned_model=load_and_evaluate_pruned_model(args,fine_tuned_model_path )

        # Now you can use the sparse model for evaluation or further training
        print('****************Result of fine-tuning of pruned model ******************')


        gc.collect()
        time.sleep(5)  
        tracemalloc.start() 
        snapshot_before = tracemalloc.take_snapshot()

        t0 = time.perf_counter()
        initial_cpu_usage = get_cpu_usage()
        power_usage = estimate_power_usage(initial_cpu_usage)

        pruned_finetune_model_accuracy, test_loss = compute_test(test_loader,fine_tuned_model)


        pruned_finetune_cpu_usage = get_cpu_usage()
        t1 = time.perf_counter()
        t_pruned_finetune_model=t1-t0

        snapshot_after = tracemalloc.take_snapshot()
        tracemalloc.stop()
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')

        pruned_finetune_total_memory_diff = sum([stat.size_diff for stat in top_stats])
        pruned_finetune_energy_consumption = power_usage * t_pruned_finetune_model
        pruned_finetune_model_size = os.path.getsize( fine_tuned_model_path)
        num_parm_pruned_finetune_model=get_num_parameters(pruned_model, count_nonzero_only=True)

        gc.collect()
        time.sleep(5)  # Add a 5-second delay to stabilize the initial state    

        ###### Report  

        print(f"pruned_finetune model has accuracy on test set={pruned_finetune_model_accuracy:.2f}%")
        print(f"pruned_finetune model has size={pruned_finetune_model_size:.2f} byte ")
        print(f"The time inference of pruned_finetune model is ={t_pruned_finetune_model}") 
        print(f"The number of parametrs of pruned_finetune model is:{num_parm_pruned_finetune_model}") 

        print(f"Energy Consumption of pruned_finetune model: {pruned_finetune_energy_consumption:.3f}")
        print(f"total memory usage of pruned_finetune model':{pruned_finetune_total_memory_diff} ")
        print(f"cpu usage of pruned_finetune model':{pruned_finetune_cpu_usage:.3f} %")


        #Update my Eva dictionary
        Eva.update({'pruned and finetune model accuracy': pruned_finetune_model_accuracy,
                'time inference of pruned and finetune model': t_pruned_finetune_model,
                'number parmameters of pruned and finetune model': num_parm_pruned_finetune_model,
                'size of pruned and finetune model': pruned_finetune_model_size, 
                'energy consumption of pruned and finetune model':pruned_finetune_energy_consumption,
                'total memory usage of pruned and finetune model':pruned_finetune_total_memory_diff,
                'cpu usage of pruned and finetune model':pruned_finetune_cpu_usage
               })

        gc.collect()
        time.sleep(5) 


        Base_model_accuracy.append(Eva['base model accuracy'])
        T_base_model.append(Eva['time inference of base model'])
        Num_parm_base_model.append(int(Eva['number parmameters of base model']))
        Base_model_size.append(int(Eva['size of base model']))
        Base_Energy_Consumption.append(Eva['energy consumption of base model'])
        Base_Cpu_Usage.append(Eva['cpu usage of base model'])
        Base_Memory_Usage.append(Eva['total memory usage of base model'])

        Pruned_model_accuracy.append(Eva['pruned model accuracy'])
        T_pruned_model.append(Eva['time inference of pruned model'])
        Num_parm_pruned_model.append(int(Eva['number parmameters of pruned model']))
        Pruned_model_size.append(int(Eva['size of pruned model']))
        Pruned_Energy_Consumption.append(Eva['energy consumption of pruned model'])
        Pruned_Cpu_Usage.append(Eva['cpu usage of pruned model'])
        Pruned_Memory_Usage.append(Eva['total memory usage of pruned model'])


        Pruned_finetune_model_accuracy.append(Eva['pruned and finetune model accuracy'])
        T_pruned_finetune_model.append(Eva['time inference of pruned and finetune model'])
        Num_parm_pruned_finetune_model.append(int(Eva['number parmameters of pruned and finetune model']))
        Pruned_finetune_model_size.append(int(Eva['size of pruned and finetune model']))
        Pruned_finetune_Energy_Consumption.append(Eva['energy consumption of pruned and finetune model'])
        Pruned_finetune_Cpu_Usage.append(Eva['cpu usage of pruned and finetune model'])
        Pruned_finetune_Memory_Usage.append(Eva['total memory usage of pruned and finetune model'])

        
        

        Spar_model_conv1_lin_w.append(Eva['conv1.lin.weight'])
        Spar_model_conv2_w.append(Eva['conv2.weight'])
        Spar_model_conv3_w.append(Eva['conv3.weight'])
        Spar_model_lin1_w.append(Eva['lin1.weight'])
        Spar_model_lin2_w.append(Eva['lin2.weight'])
        Spar_model_lin3_w.append(Eva['lin3.weight'])
        Global_spar.append(Eva['Global sparsity'])     


### Computing the mean and std 

In [23]:
Eva_final=dict()

base_model_accuracy_mean = stat.mean(Base_model_accuracy)
base_model_accuracy_std =  stat.stdev(Base_model_accuracy)
Eva_final.update({'Ave of base model accuracy':float(format(base_model_accuracy_mean, '.3f'))})
Eva_final.update({'Std of base model accuracy':float(format(base_model_accuracy_std, '.3f'))})
                 
t_base_model_mean =stat.mean(T_base_model)
t_base_model_std =stat.stdev(T_base_model)  
Eva_final.update({'Ave of time inference of base model':float(format(t_base_model_mean, '.3f'))})
Eva_final.update({'Std of time inference of base model':float(format(t_base_model_std, '.3f'))})


num_parm_base_model_mean = stat.mean(Num_parm_base_model)
num_parm_base_model_std = stat.stdev(Num_parm_base_model)
Eva_final.update({'Ave of number parmameters of base model':num_parm_base_model_mean})
Eva_final.update({'Std of number parmameters of base model':num_parm_base_model_std})

base_model_size_mean = stat.mean(Base_model_size)
base_model_size_std = stat.stdev(Base_model_size)
Eva_final.update({'Ave of base model size':base_model_size_mean})
Eva_final.update({'Std of base model size':base_model_size_std})


base_energy_consumption_mean = stat.mean(Base_Energy_Consumption)
base_energy_consumption_std = stat.stdev(Base_Energy_Consumption)
Eva_final.update({'Ave of energy consumption of base model':base_energy_consumption_mean })
Eva_final.update({'Std of energy consumption of base model':base_energy_consumption_std})


base_cpu_usage_mean = stat.mean(Base_Cpu_Usage)
base_cpu_usage_std = stat.stdev(Base_Cpu_Usage)
#desc = "{:.3f} ± {:.3f}".format(acc_mean,acc_std)
Eva_final.update({'Ave of cpu usage of base model':base_cpu_usage_mean})
Eva_final.update({'Std of cpu usage of base model':base_cpu_usage_std})

base_memory_usage_mean = stat.mean(Base_Memory_Usage)
base_memory_usage_std = stat.stdev(Base_Memory_Usage)
Eva_final.update({'Ave of memory usage of base model':base_memory_usage_mean})
Eva_final.update({'Std of memory usage of base model':base_memory_usage_std})




#################################

pruned_model_accuracy_mean =stat.mean(Pruned_model_accuracy)
pruned_model_accuracy_std = stat.stdev(Pruned_model_accuracy)
Eva_final.update({'Ave of pruned model accuracy':float(format(pruned_model_accuracy_mean, '.3f'))})
Eva_final.update({'Std of pruned model accuracy':float(format(pruned_model_accuracy_std, '.3f'))})
                 

t_pruned_model_mean = stat.mean(T_pruned_model)
t_pruned_model_std =stat.stdev(T_pruned_model)
Eva_final.update({'Ave of time inference of pruned model':float(format(t_pruned_model_mean, '.3f'))})
Eva_final.update({'Std of time inference of pruned model':float(format(t_pruned_model_std, '.3f'))})

num_parm_pruned_model_mean = stat.mean(Num_parm_pruned_model)
num_parm_pruned_model_std = stat.stdev(Num_parm_pruned_model)
Eva_final.update({'Ave of number parmameters of pruned model':num_parm_pruned_model_mean})
Eva_final.update({'Std of number parmameters of pruned model':num_parm_pruned_model_std})

pruned_model_size_mean =stat.mean( Pruned_model_size)
pruned_model_size_std = stat.stdev(Pruned_model_size)
Eva_final.update({'Ave of pruned model size':pruned_model_size_mean})
Eva_final.update({'Std of pruned_model_size':pruned_model_size_std })

pruned_energy_consumption_mean = stat.mean(Pruned_Energy_Consumption)
pruned_energy_consumption_std = stat.stdev(Pruned_Energy_Consumption)
Eva_final.update({'Ave of energy consumption of pruned model':pruned_energy_consumption_mean })
Eva_final.update({'Std of energy consumption of pruned model':pruned_energy_consumption_std})


pruned_cpu_usage_mean = stat.mean(Pruned_Cpu_Usage)
pruned_cpu_usage_std = stat.stdev(Pruned_Cpu_Usage)
Eva_final.update({'Ave of cpu usage of pruned model':pruned_cpu_usage_mean})
Eva_final.update({'Std of cpu usage of pruned model':pruned_cpu_usage_std})

pruned_memory_usage_mean = stat.mean(Pruned_Memory_Usage)
pruned_memory_usage_std = stat.stdev(Pruned_Memory_Usage)
Eva_final.update({'Ave of memory usage of pruned model':pruned_memory_usage_mean})
Eva_final.update({'Std of memory usage of pruned model':pruned_memory_usage_std})


#################################
pruned_finetune_model_accuracy_mean =stat.mean(Pruned_finetune_model_accuracy)
pruned_finetune_model_accuracy_std = stat.stdev(Pruned_finetune_model_accuracy)
Eva_final.update({'Ave of pruned finetune model accuracy':float(format(pruned_finetune_model_accuracy_mean, '.3f'))})
Eva_final.update({'Std of pruned finetune model accuracy':float(format(pruned_finetune_model_accuracy_std, '.3f'))})                 

t_pruned_finetune_model_mean =stat.mean(T_pruned_finetune_model)
t_pruned_finetune_model_std =stat.stdev(T_pruned_finetune_model)
Eva_final.update({'Ave of time inference of pruned finetune model':float(format(t_pruned_finetune_model_mean,'.3f'))})
Eva_final.update({'Std of time inference of pruned finetune model':float(format(t_pruned_finetune_model_std,'.3f'))})

num_parm_pruned_finetune_model_mean =stat.mean(Num_parm_pruned_finetune_model)
num_parm_pruned_finetune_model_std = stat.stdev(Num_parm_pruned_finetune_model)
Eva_final.update({'Ave of number parmameters of pruned finetune model':num_parm_pruned_finetune_model_mean})
Eva_final.update({'Std of number parmameters of pruned finetune model':num_parm_pruned_finetune_model_std })

pruned_finetune_model_size_mean = stat.mean(Pruned_finetune_model_size)
pruned_finetune_model_size_std = stat.stdev(Pruned_finetune_model_size)
Eva_final.update({'Ave of pruned finetune model size':pruned_finetune_model_size_mean})
Eva_final.update({'Std of pruned finetune model size':pruned_finetune_model_size_std})


pruned_finetune_energy_consumption_mean = stat.mean(Pruned_finetune_Energy_Consumption)
pruned_finetune_energy_consumption_std = stat.stdev(Pruned_finetune_Energy_Consumption)
Eva_final.update({'Ave of energy consumption of pruned_finetune model':pruned_finetune_energy_consumption_mean })
Eva_final.update({'Std of energy consumption of pruned_finetune model':pruned_finetune_energy_consumption_std})


pruned_finetune_cpu_usage_mean = stat.mean(Pruned_finetune_Cpu_Usage)
pruned_finetune_cpu_usage_std = stat.stdev(Pruned_finetune_Cpu_Usage)
Eva_final.update({'Ave of cpu usage of pruned_finetune model':pruned_finetune_cpu_usage_mean})
Eva_final.update({'Std of cpu usage of pruned_finetune model':pruned_finetune_cpu_usage_std})

pruned_finetune_memory_usage_mean = stat.mean(Pruned_finetune_Memory_Usage)
pruned_finetune_memory_usage_std = stat.stdev(Pruned_finetune_Memory_Usage)
Eva_final.update({'Ave of memory usage of pruned_finetune model':pruned_finetune_memory_usage_mean})
Eva_final.update({'Std of memory usage of pruned_finetune model':pruned_finetune_memory_usage_std})

sparsity_conv1_lin_w_mean = stat.mean(Spar_model_conv1_lin_w)
sparsity_conv1_lin_w_std = stat.stdev(Spar_model_conv1_lin_w)
Eva_final.update({'Sparsity in conv1.lin.weight':float(format(sparsity_conv1_lin_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in conv1.lin.weight':float(format(sparsity_conv1_lin_w_std,'.3f'))})

sparsity_conv2_w_mean = stat.mean(Spar_model_conv2_w)
sparsity_conv2_w_std = stat.stdev(Spar_model_conv2_w)
Eva_final.update({'Sparsity in conv2.weight':float(format(sparsity_conv2_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in conv2.weight':float(format(sparsity_conv2_w_std,'.3f'))})

sparsity_conv3_w_mean = stat.mean(Spar_model_conv3_w)
sparsity_conv3_w_std = stat.stdev(Spar_model_conv3_w)
Eva_final.update({'Sparsity in conv3.weight':float(format(sparsity_conv3_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in conv3.weight':float(format(sparsity_conv3_w_std,'.3f'))})

sparsity_model_lin1_w_mean = stat.mean(Spar_model_lin1_w)
sparsity_model_lin1_w_std = stat.stdev(Spar_model_lin1_w)
Eva_final.update({'Sparsity in lin1.weight':float(format(sparsity_model_lin1_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in lin1.weight':float(format(sparsity_model_lin1_w_std,'.3f'))})

sparsity_model_lin2_w_mean = stat.mean(Spar_model_lin2_w)
sparsity_model_lin2_w_std = stat.stdev(Spar_model_lin2_w)
Eva_final.update({'Sparsity in lin2.weight':float(format(sparsity_model_lin2_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in lin2.weight':float(format(sparsity_model_lin2_w_std,'.3f'))})

sparsity_model_lin3_w_mean = stat.mean(Spar_model_lin3_w)
sparsity_model_lin3_w_std = stat.stdev(Spar_model_lin3_w)
Eva_final.update({'Sparsity in lin3.weight':float(format(sparsity_model_lin3_w_mean,'.3f'))})
Eva_final.update({'Std of Sparsity in lin3.weight':float(format(sparsity_model_lin3_w_std,'.3f'))})

Global_sparsity_mean = stat.mean(Global_spar)
Global_sparsity_std = stat.stdev(Global_spar)
Eva_final.update({'Global sparsity':float(format(Global_sparsity_mean,'.3f'))})
Eva_final.update({'Std of Global sparsity':float(format(Global_sparsity_std,'.3f'))})

#################################


print(f"All measurement about pruning process of sparsity:{sparsity*100}% ")   
Eva_final

All measurement about pruning process of sparsity:90.0% 


{'Ave of base model accuracy': 0.772,
 'Std of base model accuracy': 0.019,
 'Ave of time inference of base model': 2.64,
 'Std of time inference of base model': 0.06,
 'Ave of number parmameters of base model': 75427,
 'Std of number parmameters of base model': 2.8284271247461903,
 'Ave of base model size': 306667,
 'Std of base model size': 0.0,
 'Ave of energy consumption of base model': 34.55097634207341,
 'Std of energy consumption of base model': 3.6774241057955277,
 'Ave of cpu usage of base model': 6.75,
 'Std of cpu usage of base model': 0.21213203435596475,
 'Ave of memory usage of base model': 84825,
 'Std of memory usage of base model': 69188.9843255413,
 'Ave of pruned model accuracy': 0.696,
 'Std of pruned model accuracy': 0.0,
 'Ave of time inference of pruned model': 2.62,
 'Std of time inference of pruned model': 0.022,
 'Ave of number parmameters of pruned model': 8496,
 'Std of number parmameters of pruned model': 2.8284271247461903,
 'Ave of pruned model size': 160

### Recording results on txt file

In [24]:

### The sparsity changes across range(0, 1, .01)
dataset_name = 'Proteins'
Pruning_Method='Global_Pruning'
max_epoch = 100
resume = True
result_folder ='pathresult/'
if not os.path.exists(result_folder):
    os.makedirs(result_folder)



file_name = result_folder+Pruning_Method+'_'+'with sparsity of'+'_'+str(sparsity)+'_on_'+dataset_name+'_'+str(max_epoch)+'.txt'

with open(file_name, 'w') as f:
        f.write('%s:%s\n'%('dataset_name', 'Proteins'))
        f.write('%s:%s\n'%('max_epoch', max_epoch))
        f.write('%s:%s\n'%('sparsity', sparsity))
        for key, value in Eva_final.items():
            f.write('%s:%s\n'%(key, value))
       
        for key, value in Eva_measure.items():
            f.write('%s:%s\n' % (key, ','.join(map(str, value))))   
       