In [2]:
#from google.colab import drive
#drive.mount('/content/drive')

In [3]:
!pip install captum
!pip install fvcore

Collecting captum
  Downloading captum-0.6.0-py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: captum
Successfully installed captum-0.6.0
Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Building wheels for collect

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from captum.attr import visualization as viz
from fvcore.nn import FlopCountAnalysis, flop_count_table
from captum.attr import IntegratedGradients, LayerConductance, DeepLift, LayerDeepLift,LayerIntegratedGradients
import itertools
import pandas as pd
import os
import random
import numpy as np
from itertools import product

In [5]:
method_names = ["LayerIntegratedGradients", "LayerDeepLift"]
train_size = 25000
INPUT_SHAPE= (1, 3, 32, 32)
Training_Device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Feature_Attribution_Device = torch.device("cpu")

# Model structure defination

In [6]:
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
        super(InceptionBlock, self).__init__()

        # 1x1 conv branch
        self.conv1x1 = nn.Sequential(
            nn.Conv2d(in_channels, n1x1, kernel_size=1),
            nn.BatchNorm2d(n1x1),
            nn.ReLU(True),
        )

        # 1x1 then 3x3 conv branch
        self.conv1x1_3x3 = nn.Sequential(
            nn.Conv2d(in_channels, n3x3red, kernel_size=1),
            nn.BatchNorm2d(n3x3red),
            nn.ReLU(True),
            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
            nn.BatchNorm2d(n3x3),
            nn.ReLU(True),
        )

        # 1x1 then 5x5 conv branch
        self.conv1x1_5x5 = nn.Sequential(
            nn.Conv2d(in_channels, n5x5red, kernel_size=1),
            nn.BatchNorm2d(n5x5red),
            nn.ReLU(True),
            nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2),
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
        )

        # 3x3 pool then 1x1 conv branch
        self.pool3x3_conv1x1 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_planes, kernel_size=1),
            nn.BatchNorm2d(pool_planes),
            nn.ReLU(True),
        )

    def forward(self, x):
        y1 = self.conv1x1(x)
        y2 = self.conv1x1_3x3(x)
        y3 = self.conv1x1_5x5(x)
        y4 = self.pool3x3_conv1x1(x)
        return torch.cat([y1, y2, y3, y4], 1)  # Concatenate on the channel dimension

In [7]:
class Inception(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(Inception, self).__init__()

        # Initial convolutional layers
        self.initial_conv_layers = nn.Sequential(
            nn.Conv2d(in_channels, 192, kernel_size=3, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )

        # Inception blocks in stage 3
        self.inception_block_3a = InceptionBlock(192,  64,  96, 128, 16, 32, 32)
        self.inception_block_3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)

        # Pooling layer between stages
        self.inter_stage_pooling = nn.MaxPool2d(3, stride=2, padding=1)

        # Inception blocks in stage 4
        self.inception_block_4a = InceptionBlock(480, 192,  96, 208, 16,  48,  64)
        self.inception_block_4b = InceptionBlock(512, 160, 112, 224, 24,  64,  64)
        self.inception_block_4c = InceptionBlock(512, 128, 128, 256, 24,  64,  64)
        self.inception_block_4d = InceptionBlock(512, 112, 144, 288, 32,  64,  64)
        self.inception_block_4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)

        # Global average pooling and dropout
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout_layer = nn.Dropout(0.2)

        # Fully connected layer
        self.final_fc_layer = nn.Linear(832, num_classes)

    def forward(self, x):
        x = self.initial_conv_layers(x)

        x = self.inception_block_3a(x)
        x = self.inception_block_3b(x)

        x = self.inter_stage_pooling(x)

        x = self.inception_block_4a(x)
        x = self.inception_block_4b(x)
        x = self.inception_block_4c(x)
        x = self.inception_block_4d(x)
        x = self.inception_block_4e(x)

        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout_layer(x)

        x = self.final_fc_layer(x)
        return x


# FLOP Count

In [8]:
def count_flops(model, input_size):
    inputs = torch.randn(input_size)
    flops = FlopCountAnalysis(model, inputs)
    return flop_count_table(flops)

inception_model = Inception(3, 10)
# Assuming the input size for CIFAR-10 (batch size, channels, height, width)
input_size = (1, 3, 32, 32)
flops_table = count_flops(inception_model, input_size)
print(flops_table)

| module                                  | #parameters or shape   | #flops     |
|:----------------------------------------|:-----------------------|:-----------|
| model                                   | 3.385M                 | 1.299G     |
|  initial_conv_layers                    |  5.76K                 |  6.291M    |
|   initial_conv_layers.0                 |   5.376K               |   5.308M   |
|    initial_conv_layers.0.weight         |    (192, 3, 3, 3)      |            |
|    initial_conv_layers.0.bias           |    (192,)              |            |
|   initial_conv_layers.1                 |   0.384K               |   0.983M   |
|    initial_conv_layers.1.weight         |    (192,)              |            |
|    initial_conv_layers.1.bias           |    (192,)              |            |
|  inception_block_3a                     |  0.164M                |  0.169G    |
|   inception_block_3a.conv1x1            |   12.48K               |   12.911M  |
|    inception_b

# Train and attribution functions

train and eval function

In [9]:
def train(epoch, model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()

    train_accuracy = 100. * correct / total
    print(f"Epoch {epoch}: Train Loss = {train_loss / len(train_loader):.4f}, Train Accuracy = {train_accuracy:.2f}%")


def test(epoch, model, test_loader, device):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            test_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

    test_accuracy = 100. * correct / total
    print(f"Epoch {epoch}: Test Loss = {test_loss / len(test_loader):.4f}, Test Accuracy = {test_accuracy:.2f}%")


functions for calculate attribution

In [10]:
def print_ig(test_loader, model, device):
    # Move the model to the device (CPU or CUDA)
    model.to(device)

    # Ensure the model is in evaluation mode
    model.eval()

    # Get a single batch from the test loader
    inputs, target_class = next(iter(test_loader))
    inputs = inputs.to(device)

    ig_attributions = {}

    # Iterate through each layer and compute the attributions for each layer
    for layer in model.named_children():
        layer_name, layer_module = layer
        layer_cond = LayerIntegratedGradients(model, layer_module)
        attr = layer_cond.attribute(inputs, target=target_class)
        print(f'Layer: {layer_name}')
        print(f'Attribution: {attr.cpu().detach().numpy().sum()}')
        ig_attributions[layer_name] = attr.cpu().detach().numpy().sum()
        del attr
    return ig_attributions

In [11]:
import torch.nn as nn
from captum.attr import LayerIntegratedGradients

def print_ig(test_loader, model, device):
    # Move the model to the device (CPU or CUDA)
    model.to(device)

    # Ensure the model is in evaluation mode
    model.eval()

    # Get a single batch from the test loader
    inputs, target_class = next(iter(test_loader))
    inputs = inputs.to(device)

    ig_attributions = {}

    # Iterate through each named module and compute attributions for Conv2d layers with learnable parameters
    for layer_name, layer_module in model.named_modules():
        # Check if the layer is a Conv2d layer with learnable parameters
        if isinstance(layer_module, nn.Conv2d) and any(p.requires_grad for p in layer_module.parameters(recurse=False)):
            # Initialize LayerIntegratedGradients for the layer
            lig = LayerIntegratedGradients(model, layer_module)

            # Compute the attributions for the current layer
            try:
                attributions = lig.attribute(inputs, target=target_class.to(device))
            except Exception as e:
                print(f"Error computing attributions for layer {layer_name}: {e}")
                continue

            # Print out the attributions for the current layer
            print(f'Layer: {layer_name}')
            print(f'Attribution: {attributions.cpu().detach().numpy().sum()}')

            # Store the sum of attributions in the dictionary
            ig_attributions[layer_name] = attributions.cpu().detach().numpy().sum()

            # Free up memory
            del attributions, lig

    return ig_attributions

# Usage example:
# ig_attributions = print_ig(test_loader, model, device)


In [12]:
def print_deeplift(test_loader, model, device):
    # Move the model to the specified device and set it to evaluation mode
    model.to(device).eval()

    # Get a batch of data from the loader
    inputs, target_class = next(iter(test_loader))
    inputs, target_class = inputs.to(device), target_class.to(device)

    dl_attributions = {}

    # Now compute the attributions for Conv2d layers
    for layer_name, layer_module in model.named_modules():
        # Skip the whole model's container and focus on Conv2d layers with learnable parameters
        if isinstance(layer_module, nn.Conv2d) and any(p.requires_grad for p in layer_module.parameters(recurse=False)):
            # Initialize LayerDeepLift with the current layer
            ldl = LayerDeepLift(model, layer_module)

            # Compute the attributions for the current layer
            try:
                attributions_ldl = ldl.attribute(inputs, target=target_class)
            except Exception as e:
                print(f"Error computing attributions for layer {layer_name}: {e}")
                continue

            # Print out the attributions for the current layer
            print(f'Layer: {layer_name}')
            print(attributions_ldl.cpu().data.numpy().sum())

            dl_attributions[layer_name] = attributions_ldl.cpu().data.numpy().sum()

            del attributions_ldl, ldl

    return dl_attributions

# Usage example:
# dl_attributions = print_deeplift(test_loader, model, device)


# Possible Hyperparameter grid search creation

In [13]:
def generate_hyperparameter_combinations(hyperparams):
    """
    Generate a sequence of hyperparameter combinations.

    :param hyperparams: A dictionary where keys are the names of hyperparameters,
                        and values are lists of possible choices for each hyperparameter.
    :return: A list of dictionaries, each representing a unique combination of hyperparameters.
    """
    # Extract the hyperparameter names and their corresponding choices
    keys, values = zip(*hyperparams.items())

    # Generate all possible combinations of hyperparameter values
    all_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    return all_combinations

# Example Usage
hyperparams = {
    'learning_rate': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64],
    'num_epochs': [10, 20, 30]
}

combinations = generate_hyperparameter_combinations(hyperparams)
for combo in combinations:
    print(combo)

{'learning_rate': 0.001, 'batch_size': 16, 'num_epochs': 10}
{'learning_rate': 0.001, 'batch_size': 16, 'num_epochs': 20}
{'learning_rate': 0.001, 'batch_size': 16, 'num_epochs': 30}
{'learning_rate': 0.001, 'batch_size': 32, 'num_epochs': 10}
{'learning_rate': 0.001, 'batch_size': 32, 'num_epochs': 20}
{'learning_rate': 0.001, 'batch_size': 32, 'num_epochs': 30}
{'learning_rate': 0.001, 'batch_size': 64, 'num_epochs': 10}
{'learning_rate': 0.001, 'batch_size': 64, 'num_epochs': 20}
{'learning_rate': 0.001, 'batch_size': 64, 'num_epochs': 30}
{'learning_rate': 0.01, 'batch_size': 16, 'num_epochs': 10}
{'learning_rate': 0.01, 'batch_size': 16, 'num_epochs': 20}
{'learning_rate': 0.01, 'batch_size': 16, 'num_epochs': 30}
{'learning_rate': 0.01, 'batch_size': 32, 'num_epochs': 10}
{'learning_rate': 0.01, 'batch_size': 32, 'num_epochs': 20}
{'learning_rate': 0.01, 'batch_size': 32, 'num_epochs': 30}
{'learning_rate': 0.01, 'batch_size': 64, 'num_epochs': 10}
{'learning_rate': 0.01, 'batch_

# Functions for saving attribution

In [12]:
import pandas as pd
import os
import random
import torch
import numpy as np
from itertools import product

def run_experiments_and_save(hyperparams_combinations, attribution_function, csv_file):
    """
    Run experiments for each combination of hyperparameters, get feature layer attributions for DeepLift and Integrated Gradients,
    save the results to a CSV file, and skip any combinations that have already been run.

    :param hyperparams_combinations: List of dictionaries with hyperparameter combinations.
    :param attribution_function: Function to compute feature layer attribution.
    :param csv_file: Path to the CSV file for saving results.
    """

    # Check if the CSV file exists and load existing data
    if os.path.exists(csv_file):
        existing_data = pd.read_csv(csv_file)
    else:
        existing_data = pd.DataFrame()

    for combo in hyperparams_combinations:
        for i in range(10):  # For each run index
            for method in ['deeplift', 'integrated_gradients']:  # For each method
                # Prepare data for checking if it's already processed
                combo_check = combo.copy()
                combo_check['method'] = method
                combo_check['run'] = i

                # Check if this specific combination is already processed
                if not existing_data.empty and (existing_data[list(combo_check.keys())] == list(combo_check.values())).all(axis=1).any():
                    continue  # Skip if combination is already processed

                # Set seed for reproducibility
                random.seed(i)
                np.random.seed(i)
                torch.manual_seed(i)
                if torch.cuda.is_available():
                    torch.cuda.manual_seed_all(i)

                # Compute attributions
                attr = attribution_function(combo, i, method)

                # Prepare data for saving
                combo_results = combo.copy()
                combo_results.update(attr)
                combo_results['method'] = method
                combo_results['run'] = i

                # Append results to the existing data
                existing_data = existing_data.append(combo_results, ignore_index=True)

    # Save the data to CSV
    existing_data.to_csv(csv_file, index=False)

# Example Usage
hyperparams_combinations = [dict(zip(hyperparams, v)) for v in product(*hyperparams.values())]

def mock_attribution_function(combo, seed, method):
    # Placeholder for actual attribution function
    return {'con1': 0.9, 'con2': 0.1, 'con3': 0}  # Example attribution

run_experiments_and_save(hyperparams_combinations, mock_attribution_function, 'experiment_results.csv')


# Automated Experiments

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, SequentialSampler

# Assuming these are defined globally in your script
Training_Device = 'cuda'  # or 'cpu'
Feature_Attribution_Device = 'cuda'  # or 'cpu'

def run_experiments(num_epochs, num_classes, in_channels, num_experiments, hyperparams):
    """
    Run a series of experiments based on specified parameters and hyperparameters.

    :param num_epochs: Number of epochs for training.
    :param num_classes: Number of classes in the classification task.
    :param in_channels: Number of input channels (e.g., 3 for RGB images).
    :param num_experiments: Number of experiments to run.
    :param hyperparams: Dictionary of hyperparameters including optimizer, train loader sampler, etc.
    """
    transform = transforms.Compose([transforms.ToTensor()])

    train_subset = datasets.CIFAR10('.', train=True, download=True, transform=transform)
    test_subset = datasets.CIFAR10('.', train=False, download=True, transform=transform)

    # Use the sampler from hyperparameters for the training data loader
    train_loader = DataLoader(train_subset, batch_size=hyperparams['batch_size'], sampler=hyperparams['trainloader_sampler'](train_subset))
    # Sequential sampler for the test data loader
    test_loader = DataLoader(test_subset, batch_size=64, sampler=SequentialSampler(test_subset))

    for exp_num in range(num_experiments):
        print(f"Model {exp_num + 1}")

        # Initialize model
        model = Inception(in_channels, num_classes).to(Training_Device)  # Assuming Inception is defined elsewhere

        # Use the optimizer from hyperparameters
        optimizer = hyperparams['optimizer'](model.parameters(), lr=hyperparams['initial_lr'])
        criterion = hyperparams['criterion']

        for epoch in range(num_epochs):
            # Assuming train and test functions are defined elsewhere
            train(epoch, model, train_loader, optimizer, criterion, Training_Device)
            test(epoch, model, test_loader, Training_Device)

        print("Integrated Gradient")
        ig_attributions = print_ig(test_loader, model, Feature_Attribution_Device)
        print("\n")

        print("DeepLift")
        dl_attributions = print_deeplift(test_loader, model, Feature_Attribution_Device)
        print("\n")

# Example Usage
hyperparams = {
    'batch_size': 64,
    'initial_lr': 0.001,
    'optimizer': torch.optim.Adam,  # Example optimizer
    'criterion': torch.nn.CrossEntropyLoss(),
    'trainloader_sampler': lambda dataset: torch.utils.data.RandomSampler(dataset)
}

#run_experiments(10, 10, 3, 2, hyperparams)


# Experiments

get dataloader, optimizer, and loss function

In [14]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

train_subset = datasets.CIFAR10('.', train=True, download=True, transform=transform)
test_subset = datasets.CIFAR10('.', train=False, download=True, transform=transform)

# Define the size of the random subsets
train_indices = torch.randperm(len(train_subset))[:train_size]
test_indices = torch.randperm(len(test_subset))[:]

train_loader = torch.utils.data.DataLoader(train_subset, batch_size=64, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
test_loader = torch.utils.data.DataLoader(test_subset, batch_size=16, sampler=torch.utils.data.SubsetRandomSampler(test_indices))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29500385.84it/s]


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [15]:
for i in range(2):
    print("model"+str(i+1))
    in_channels = 3  # Input channels (e.g., for RGB images)
    num_classes = 10  # Number of classes in your classification task

    model = Inception(in_channels, num_classes).to(Training_Device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    for epoch in range(0):
      train(epoch, Training_Device)
      test(epoch, Training_Device)
    print("intergrated gradient")
    ig_attributions = print_ig(test_loader, model, Feature_Attribution_Device)
    print("\n")

    print("deeplift")
    dl_attributions = print_deeplift(test_loader, model, Feature_Attribution_Device)
    print("\n")

model1


KeyboardInterrupt: ignored