In [1]:
from pathlib import Path
import sys, os

scriptPath = Path(sys.path[0])
parentPath = scriptPath.parent
dataPath = parentPath / 'data'
srcPath = parentPath / 'src'
modelPath = parentPath / 'models'
savePath = parentPath / 'results'

sys.path.append(srcPath.as_posix())

# The Cifar-10 dataset

In [2]:
import torch
from torchvision import datasets, transforms

cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2471, 0.2435, 0.2616)

testset = datasets.CIFAR10(root=dataPath, train=False, download=True,\
                    transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(cifar10_mean,cifar10_std)
                   ]))

batch_size = 100
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Load the trained models

In [3]:
from SPDNN_Conv import PDConvNet

cifar_models = {}
nc_list = []

strides = [1,1,1,1]
kernel_sizes = [5,5,5,5]
n_linear = 400

for ptf in os.listdir(modelPath):# / 'models'):
    if ptf.startswith(f"Cifar10"):
        if ptf.endswith('.pth'):
            nc = tuple([int(num) for num in ptf.split('C[')[1].split(']_')[0].split(',')])
            print(nc,'\t',ptf)
            if nc not in nc_list:
                nc_list.append(nc)
            
            if ptf.endswith(f'PD{n_linear}.pth'):
                model = PDConvNet(linear_act='PD',d_input=(3,32,32),n_linear=n_linear,n_output=10,n_chan=nc,kss=kernel_sizes,ss=strides,last_layer_bias=True)
                model.load_state_dict(torch.load(modelPath / ptf, map_location=device))
                model.to(device)
                cifar_models[nc,'PD'] = model
            elif ptf.endswith(f'ReLU{n_linear}.pth'):
                model = PDConvNet(linear_act='ReLU',d_input=(3,32,32),n_linear=n_linear,n_output=10,n_chan=nc,kss=kernel_sizes,ss=strides,last_layer_bias=True)
                model.load_state_dict(torch.load(modelPath / ptf, map_location=device))
                model.to(device)
                cifar_models[nc,'ReLU'] = model
nc_list.sort() 

(128, 256, 256, 256) 	 Cifar10_C[128, 256, 256, 256]_ReLU400.pth
(128, 128, 128, 128) 	 Cifar10_C[128, 128, 128, 128]_PD400.pth
(64, 128, 128, 128) 	 Cifar10_C[64, 128, 128, 128]_ReLU400.pth
(64, 128, 128, 128) 	 Cifar10_C[64, 128, 128, 128]_PD400.pth
(32, 64, 128, 128) 	 Cifar10_C[32, 64, 128, 128]_PD400.pth
(128, 128, 128, 128) 	 Cifar10_C[128, 128, 128, 128]_ReLU400.pth
(32, 64, 128, 128) 	 Cifar10_C[32, 64, 128, 128]_ReLU400.pth
(128, 256, 256, 256) 	 Cifar10_C[128, 256, 256, 256]_PD400.pth


# Compute the test accuracies

In [4]:
%%time
import pandas as pd
import numpy as np
import torch

# create a DataFrame for the results
cifar_results_df = pd.DataFrame(columns=['n_channels', 'linear_act', 'mean_accuracy', 'std_dev'])
torch.manual_seed(0)

nr = 100


temp_results = []

for nc in nc_list:
    acc_arr = np.zeros((2,nr))
    for linear_act in ['PD', 'ReLU']:
        i = 0
        
        for data, target in test_loader:
            data = data.to(device)
            target = target.to(device)
            
            for j in range(nr):
                output = cifar_models[nc,linear_act](data)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(target.data.view_as(pred)).cpu().sum()
                acc_arr[i,j] += correct 
        acc_arr = acc_arr / float(len(testset))
        mean_acc = 100. * acc_arr[i].mean()
        std_dev = 100. * acc_arr[i].std()
        
        # append the results to your DataFrame
        temp_results.append({'n_channels': nc, 'linear_act': linear_act, 'mean_accuracy': mean_acc, 'std_dev': std_dev})

        i += 1 
        
results_df = pd.concat([pd.DataFrame([i]) for i in temp_results], ignore_index=True)


# Pivot the DataFrame so that 'K' are columns and 'N' are rows with 'mean_accuracy' and 'std_dev' together in the same cell
results_df['results'] = results_df.apply(lambda row: f"{row['mean_accuracy']:.2f} ± {row['std_dev']:.2f}%", axis=1)
results_df_pivot = results_df.pivot(index='n_channels', columns='linear_act', values='results')

results_df_pivot

CPU times: user 10min 55s, sys: 3min, total: 13min 55s
Wall time: 13min 55s


linear_act,PD,ReLU
n_channels,Unnamed: 1_level_1,Unnamed: 2_level_1
"(32, 64, 128, 128)",83.26 ± 0.23%,84.10 ± 0.25%
"(64, 128, 128, 128)",85.56 ± 0.24%,85.85 ± 0.21%
"(128, 128, 128, 128)",85.91 ± 0.22%,86.37 ± 0.23%
"(128, 256, 256, 256)",86.97 ± 0.19%,87.53 ± 0.20%


## Save the computed results

In [5]:
results_df.to_pickle(savePath / 'Cifar10_4convs_N400_simacc')

## View the number of operations in different parts of the SPDNN models

In [7]:
# Number of neurons in the first layer right after the conv layers
n_linear = 400

def cal_ndp(nc, d_input=(3,32,32)):
    """Calculate the number of dot products (activation size) in the conv layers"""
    ndp = 0
    nct = [d_input[0]]+list(nc)
    for ci, c in enumerate(nc):
        ndp += nct[ci]*c*np.prod(d_input[1:])//2**(2*ci)
    return ndp

cifar_results = results_df
# Output dimension of the last convolutional layer
cifar_results['conv_out_d'] = cifar_results.apply(lambda row: row['n_channels'][-1]*(32//2**len(row['n_channels']))**2, axis=1)

# Number of dot products (n_dp) and MAC operations (n_MAC) computed in the convolution layers
cifar_results['n_dp_convs'] = cifar_results.apply(lambda row: cal_ndp(row['n_channels'],(3,32,32)), axis=1)
cifar_results['n_MAC_convs'] = cifar_results.apply(lambda row: row['n_dp_convs']*5**2, axis=1)

# Number of dot products (n_dp) and MAC operations (n_MAC) computed in the linear layers
cifar_results['n_dp_lin1'] = cifar_results.apply(lambda row: n_linear, axis=1)
cifar_results['n_MAC_lin1'] = cifar_results.apply(lambda row: row['conv_out_d']*n_linear, axis=1)
cifar_results['n_dp_lin2'] = cifar_results.apply(lambda row: 10, axis=1)
cifar_results['n_MAC_lin2'] = cifar_results.apply(lambda row: 10*n_linear, axis=1)

# Number of dot products (n_dp) and MAC operations (n_MAC) computed in the linear layers
cifar_results['n_dp_tot'] = cifar_results.apply(lambda row: \
                                    row['n_dp_convs']+row['n_dp_lin1']+row['n_dp_lin2'], axis=1)
cifar_results['n_MAC_tot'] = cifar_results.apply(lambda row: \
                                    row['n_MAC_convs']+row['n_MAC_lin1']+row['n_MAC_lin2'], axis=1)

# Portion of non-SPD layers of the entire model based on n_dp / n_MAC
cifar_results['p_dp'] = cifar_results.apply(lambda row: row['n_dp_lin2']/row['n_dp_tot']\
                                            if row['linear_act']=='PD'\
                                            else (row['n_dp_lin1']+row['n_dp_lin2'])/row['n_dp_tot'], axis=1)

cifar_results['p_MAC'] = cifar_results.apply(lambda row: row['n_MAC_lin2']/row['n_MAC_tot']\
                                            if row['linear_act']=='PD'\
                                            else (row['n_MAC_lin1']+row['n_MAC_lin2'])/row['n_MAC_tot'], axis=1)

cifar_results[["n_channels", "linear_act", "n_MAC_convs", "n_MAC_lin1", "n_MAC_lin2", "p_MAC","n_dp_convs", "n_dp_lin1", "n_dp_lin2", "p_dp", "results"]]


Unnamed: 0,n_channels,linear_act,n_MAC_convs,n_MAC_lin1,n_MAC_lin2,p_MAC,n_dp_convs,n_dp_lin1,n_dp_lin2,p_dp,results
0,"(32, 64, 128, 128)",PD,35225600,204800,4000,0.000113,1409024,400,10,7.095047e-06,83.26 ± 0.23%
1,"(32, 64, 128, 128)",ReLU,35225600,204800,4000,0.005893,1409024,400,10,0.0002908969,84.10 ± 0.25%
2,"(64, 128, 128, 128)",PD,90112000,204800,4000,4.4e-05,3604480,400,10,2.77401e-06,85.56 ± 0.24%
3,"(64, 128, 128, 128)",ReLU,90112000,204800,4000,0.002312,3604480,400,10,0.0001137344,85.85 ± 0.21%
4,"(128, 128, 128, 128)",PD,147456000,204800,4000,2.7e-05,5898240,400,10,1.695303e-06,85.91 ± 0.22%
5,"(128, 128, 128, 128)",ReLU,147456000,204800,4000,0.001414,5898240,400,10,6.950743e-05,86.37 ± 0.23%
6,"(128, 256, 256, 256)",PD,350617600,409600,4000,1.1e-05,14024704,400,10,7.130067e-07,86.97 ± 0.19%
7,"(128, 256, 256, 256)",ReLU,350617600,409600,4000,0.001178,14024704,400,10,2.923327e-05,87.53 ± 0.20%
