# MobileNetV2

## Preprocess: Caculate influence metrics for each layer.

Step 1: Preprocess the performance data of the quantized model obtained from tests.

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('database/MobilenetV2_SingleLayer.csv')
layers = df['Layer'].values
num_variable = len(layers)

df['Influence_fillna'] = df['Influence'].fillna(100)

delta_accs = [1.442,0.518,1.214,12.57,0.156,3.648,41.426,0.122,0.206,0.832,0.572,4.912,17.802,0.182,0.066,1.136,0.056,5.398,66.144,0.21,0.016,0.112,0.104,2.15,5.764,0.088,0.194,0.716,0.094,0.424,3.742,0.136,0.068,0.212,0.002,0.436,68.704,0.158,0.03,0.132,0.078,3.168,6.462,0.012,0.09,0.076,0.082,0.136,1.478,0.13,0.06,0.02,0.102,0.324,1.69,0.022,0.096,0.112,0.064,0.27,59.946,0.232,0.052,0.708,0.162,4.89,7.36,0.138,0.108,0.218,0.06,0.452,3.918,0.044,0.14,0.27,0.14,0.71,67.342,0.174,0.11,0.056,0.146,8.766,7.414,0.102,0.096,0.202,0.26,0.956,6.404,0.132,0.118,0.33,0.128,2.406,38.546,0.38,0.084,0.032,0.172,4.076]
quantile = np.quantile(delta_accs, q=0.75)

pairwise_df = pd.read_csv('database/MobilenetV2_PairwiseLayers.csv', index_col=0)
pairwise_df = pairwise_df.sort_index()
pairwise_df = pairwise_df.dropna()
pairwise_df['XY-X-Y'] = pairwise_df['Delta_acc'] - pairwise_df['Delta_acc1'] - pairwise_df['Delta_acc2']

df_filtered = pairwise_df[(pairwise_df['Delta_acc1'] < quantile) & (pairwise_df['Delta_acc2'] < quantile)]

multiple_df = pd.read_csv('database/MobilenetV2_MultipleLayers.csv')
multiple_df = multiple_df.dropna()
multiple_df['XY-X-Y'] = multiple_df['Delta Acc'] - multiple_df['F1']

layer_num_dict = {'Layer_num':[0, 1, 2, ], 'Avg_corr': [0, 0, df_filtered['XY-X-Y'].values.mean()]}

for layer_num in sorted(np.unique(multiple_df['Layer_num'].values)):
    avg_corr = multiple_df.loc[multiple_df['Layer_num'] == layer_num, 'XY-X-Y'].mean()
    layer_num_dict['Layer_num'].append(layer_num)
    layer_num_dict['Avg_corr'].append(avg_corr)

max_layer = max(layer_num_dict['Layer_num'])
for layer_num in range(2, max_layer):
    if layer_num not in layer_num_dict['Layer_num']:
        avg_corr = (layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(layer_num - 1)] + layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(layer_num + 1)]) / 2
        layer_num_dict['Layer_num'].append(layer_num)
        layer_num_dict['Avg_corr'].append(avg_corr)

for layer_num in range(max_layer + 1, len(layers) + 1):
    layer_num_dict['Layer_num'].append(layer_num)
    layer_num_dict['Avg_corr'].append(layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(max_layer)] )

layer_num_df = pd.DataFrame(layer_num_dict)

Step 2: Based on the processed performance data, the corresponding influence metric for each layer is computed.

In [2]:
import numpy as np
from bit_config import bit_config
from sklearn.linear_model import LinearRegression

mobilenetv2 = bit_config['mobilenetv2']
layers = []
for key, _ in mobilenetv2.items():
    if "stage" in key and "int32" not in key and "identity" not in key:
        layers.append(key)

delta_acc_dict = dict(zip(layers, delta_accs))
filtered_layers = [layer for layer in delta_acc_dict if delta_acc_dict[layer] < quantile]

X = np.zeros((multiple_df.shape[0], len(filtered_layers)))

for i in multiple_df.index:
    selected_layers = np.array(layers)[np.array(multiple_df.loc[i, 'Config'][1:-1].split(', ')) == '0']
    for layer in selected_layers:
        X[i][filtered_layers.index(layer)] = 1

y = multiple_df['XY-X-Y'].copy().values.reshape((multiple_df.shape[0], 1))
for i in multiple_df.index:
    layer_num = multiple_df.loc[i, 'Layer_num']
    y[i, 0] -= layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0]

reg = LinearRegression().fit(X, y)

multiple_sensitivity_dict = dict(zip(filtered_layers, reg.coef_[0]))
pd.DataFrame({'layer': filtered_layers, 'influence': reg.coef_[0]})

Unnamed: 0,layer,influence
0,features.stage1.unit1.quant_act,-0.380101
1,features.stage1.unit1.conv1,-0.818005
2,features.stage1.unit1.quant_act1,-0.047065
3,features.stage1.unit1.quant_act2,-0.043368
4,features.stage2.unit1.conv1,0.157707
...,...,...
71,features.stage5.unit3.quant_act2,0.201796
72,features.stage5.unit4.conv1,-0.110581
73,features.stage5.unit4.quant_act1,-0.122189
74,features.stage5.unit4.conv2,-0.105210


Step 3: Based on the computed metrics for each layer, predict the corresponding accuracy of any mixed-precision quantized model.

In [3]:
import scipy.stats

df['MultipleInfluence'] = 0
for index, row in df.iterrows():
    if row['Layer'] in filtered_layers:
        df.loc[index, 'MultipleInfluence'] = multiple_sensitivity_dict[row['Layer']]
    else:
        df.loc[index, 'MultipleInfluence'] = float('nan')

multiple_df['prediction'] = multiple_df['F1']
for index, row in multiple_df.iterrows():
    layer_num = row['Layer_num']
    selected_layers = np.array(layers)[np.array(multiple_df.loc[index, 'Config'][1:-1].split(', ')) == '0']
    multiple_df.loc[index, 'prediction'] += df.loc[df['Layer'].isin(selected_layers), 'MultipleInfluence'].sum() + layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].sum()

print(scipy.stats.pearsonr(multiple_df['Delta Acc'], multiple_df['prediction']))

  df.loc[index, 'MultipleInfluence'] = multiple_sensitivity_dict[row['Layer']]


PearsonRResult(statistic=0.9877183787468592, pvalue=0.0)


Step 4: Verify the correlation between the predicted and true accuracies of the mixed-precision quantized models.

In [4]:
import altair as alt

qq = alt.Chart(multiple_df.sample(n=1000, random_state=0)).mark_square(size=100, opacity=1, angle=45, stroke='black').encode(
    x=alt.X('prediction:Q', scale=alt.Scale(domain=[0, 20]), title='Predicted Accuracy Loss (%)'),
    y=alt.Y('Delta Acc:Q', scale=alt.Scale(domain=[0, 20]), title='True Accuracy Loss (%)'),
)

x = np.arange(100)
source = pd.DataFrame({
  'x': x,
  'f(x)': x
})

line = alt.Chart(source).mark_line(color='red', clip=True, size=5).encode(
    x=alt.X('x', title='Predicted Accuracy Loss (%)'),
    y=alt.Y('f(x)', title='True Accuracy Loss (%)')
)

(line + qq).configure_axis(
    labelFontSize=24,
    titleFontSize=28,
    titleFontWeight='normal',
    domainWidth=3,
    gridWidth=3,
    gridDash=[3,3],
    tickWidth=3,
    domainColor='black',
    tickColor='black',
).configure_view(
    strokeOpacity=0
)

## Algorithm: Find the optimal mixed-precision quantization scheme.

Step 1: Define the functions to calculate *BOPs* and *Model Size*.

In [5]:
import numpy as np

def MAC(c_i, k, hw, c_o):
    if k == 3:
        flot = c_i * 0.5 * (2 * k * k - 1) * hw
    else:
        flot = 0.5 * (2 * c_i * k * k - 1) * hw * c_o
    return flot

c_i = [3,32,32,32,16,96,96,24,144,144,24,144,144,32,192,192,32,192,192,32,192,192,64,384,384,64,384,384,64,384,384,64,384,384,96,576,576,96,576,576,96,576,576,160,960,960,160,960,960,160,960,960,320,1280]
c_o = [32,32,32,16,96,96,24,144,144,24,144,144,32,192,192,32,192,192,32,192,192,64,384,384,64,384,384,64,384,384,64,384,384,96,576,576,96,576,576,96,576,576,160,960,960,160,960,960,160,960,960,320,1280,1000]
hw = [50176,12544,12544,12544,12544,12544,12544,12544,12544,12544,3136,3136,3136,3136,3136,3136,3136,3136,3136,784,784,784,784,784,784,784,784,784,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,49,49,49,1,1]
k_size = [3,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,3,1,1,1]

# Calculate BOPs
def calculate_bops(ba, bw):
    sum = 0
    for k in range(54):
        if k in [0, 52, 53]:
            bops = 8 * 8 * MAC(c_i[k], k_size[k], hw[k] ,c_o[k])
        else:
            bops = ba[k - 1] * bw[k - 1] * MAC(c_i[k], k_size[k], hw[k] ,c_o[k])
        sum += bops
    sum /= 1e9
    return sum

in_channel = [3, 32, 32, 32, 16, 96, 96, 24, 144, 144, 24, 144, 144, 32, 192, 192, 32, 192, 192, 32, 192, 192, 64, 384, 384, 64, 384, 384, 64, 384, 384, 64, 384, 384, 96, 576, 576, 96, 576, 576, 96, 576, 576, 160, 960, 960, 160, 960, 960, 160, 960, 960, 320, 1280]
out_channel = [32, 32, 32, 16, 96, 96, 24, 144, 144, 24, 144, 144, 32, 192, 192, 32, 192, 192, 32, 192, 192, 64, 384, 384, 64, 384, 384, 64, 384, 384, 64, 384, 384, 96, 576, 576, 96, 576, 576, 96, 576, 576, 160, 960, 960, 160, 960, 960, 160, 960, 960, 320, 1280, 1000]
kernel = [3, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1]
group = [1, 1, 32, 1, 1, 96, 1, 1, 144, 1, 1, 144, 1, 1, 192, 1, 1, 192, 1, 1, 192, 1, 1, 384, 1, 1, 384, 1, 1, 384, 1, 1, 384, 1, 1, 576, 1, 1, 576, 1, 1, 576, 1, 1, 960, 1, 1, 960, 1, 1, 960, 1, 1, 1]

# Calculate Model Size
def calculate_size(ba, bw):
    sum = 0
    for i in range(54):
        if i in [0, 52, 53]:
            size = kernel[i] ** 2 * (in_channel[i] / group[i]) * out_channel[i]
        else:
            size = kernel[i] ** 2 * (in_channel[i] / group[i]) * out_channel[i] / 8 * bw[i - 1]
        sum += size
    sum /= 1024 * 1024
    return sum

bops_8bit = calculate_bops(np.zeros((51)) + 8, np.zeros((51)) + 8) # BOPs when all the layers are 8-bit
bops_4bit = calculate_bops(np.zeros((51)) + 4, np.zeros((51)) + 4) # BOPs when all the layers are 4-bit
size_8bit = calculate_size(np.zeros((51)) + 8, np.zeros((51)) + 8) # Model Size when all the layers are 8-bit
size_4bit = calculate_size(np.zeros((51)) + 4, np.zeros((51)) + 4) # Model Size when all the layers are 4-bit

Step 2: Solve the problem using the NSGA-II algorithm.

### Model Size Limit

In [6]:
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.operators.sampling.rnd import IntegerRandomSampling
from pymoo.operators.repair.rounding import RoundingRepair
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.optimize import minimize

class MPQProblem(Problem):
    def __init__(self):
        super().__init__(
                         n_var=num_variable // 2,
                         n_obj=2,
                         n_ieq_constr=1, 
                         n_eq_constr=0,
                         xl=np.array([4] * (num_variable // 2)),
                         xu=np.array([8] * (num_variable // 2)),
                         vtype=int)

    def _evaluate(self, X, out, *args, **kwargs):
        f1 = np.sum(df['DeltaAcc'].values[1::2] * (8 - X) / 4, axis=1)
        f2 = np.sum(df['DeltaAcc'].values[1::2] * (8 - X) / 4, axis=1) + np.sum(df['Influence_fillna'].values[1::2] * (8 - X) / 4, axis=1) + [layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0] for layer_num in np.sum(X != 8, axis=1)]

        g1 = [calculate_size(np.zeros((num_variable // 2)) + 8, X[i, :]) - size_limit for i in range(X.shape[0])]

        out["F"] = np.column_stack([f1, f2])
        out["G"] = np.column_stack([g1])

nsga_dict = {'Size_limit': [],
             'Size':[],
             'Config_name': [],
             'Config': []}

size_limit = 2.83
config_name = 'size' + str(size_limit)

problem = MPQProblem()

algorithm = NSGA2(pop_size=100,
                sampling=IntegerRandomSampling(),
                crossover=SBX(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                mutation=PM(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                eliminate_duplicates=True)
result = minimize(problem, 
                algorithm, 
                termination=('n_gen', 100), 
                seed=1,
                verbose=False)

if result.X is None:
    print(f'Model size should be limited to between {int(size_4bit)} and {int(size_8bit)}')
    
for i in range(len(result.X)):
    nsga_dict['Size_limit'].append(size_limit)
    nsga_dict['Size'].append(calculate_size(np.zeros((num_variable // 2)) + 8, result.X[i]))
    nsga_dict['Config_name'].append(config_name + '_' + str(i + 1))
    nsga_dict['Config'].append(result.X[i])

nsga_df = pd.DataFrame(nsga_dict)
nsga_df


Compiled modules for significant speedup can not be used!
https://pymoo.org/installation.html#installation

from pymoo.config import Config



Unnamed: 0,Size_limit,Size,Config_name,Config
0,2.83,2.829071,size2.83_1,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, ..."
1,2.83,2.829414,size2.83_2,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, ..."
2,2.83,2.829803,size2.83_3,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, ..."
3,2.83,2.829918,size2.83_4,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, ..."
4,2.83,2.82785,size2.83_5,"[4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, ..."
5,2.83,2.828339,size2.83_6,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, ..."
6,2.83,2.829712,size2.83_7,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, ..."
7,2.83,2.829826,size2.83_8,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, ..."
8,2.83,2.829369,size2.83_9,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, ..."


### BOPs limit

In [7]:
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.operators.sampling.rnd import IntegerRandomSampling
from pymoo.operators.repair.rounding import RoundingRepair
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.optimize import minimize

class MPQProblem(Problem):
    def __init__(self):
        super().__init__(
                         n_var=num_variable,
                         n_obj=2,
                         n_ieq_constr=1, 
                         n_eq_constr=0,
                         xl=np.array([4] * num_variable),
                         xu=np.array([8] * num_variable),
                         vtype=int)

    def _evaluate(self, X, out, *args, **kwargs):
        f1 = np.sum(df['DeltaAcc'].values * (8 - X) / 4, axis=1)
        f2 = np.sum(df['DeltaAcc'].values * (8 - X) / 4, axis=1) + np.sum(df['Influence_fillna'].values * (8 - X) / 4, axis=1) + [layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0] for layer_num in np.sum(X != 8, axis=1)]

        g1 = [calculate_bops(X[i, ::2], X[i, 1::2]) - bops_limit for i in range(X.shape[0])]

        out["F"] = np.column_stack([f1, f2])
        out["G"] = np.column_stack([g1])

nsga_dict = {'BOPs_limit': [],
             'BOPs': [], 
             'Config_name': [],
             'Config': []}

bops_limit = 30
config_name = 'bops' + str(bops_limit)

problem = MPQProblem()

algorithm = NSGA2(pop_size=100,
                sampling=IntegerRandomSampling(),
                crossover=SBX(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                mutation=PM(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                eliminate_duplicates=True)
result = minimize(problem, 
                algorithm, 
                termination=('n_gen', 100), 
                seed=1,
                verbose=False)

if result.X is None:
    print(f'BOPs should be limited to between {int(bops_4bit)} and {int(bops_8bit)}')

for i in range(len(result.X)):
    nsga_dict['BOPs_limit'].append(bops_limit)
    nsga_dict['BOPs'].append(calculate_bops(result.X[i][::2], result.X[i][1::2]))
    nsga_dict['Config_name'].append(config_name + '_' + str(i + 1))
    nsga_dict['Config'].append(result.X[i])

nsga_df = pd.DataFrame(nsga_dict)
nsga_df

Unnamed: 0,BOPs_limit,BOPs,Config_name,Config
0,30,29.941971,bops30_1,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, ..."
1,30,29.999924,bops30_2,"[8, 7, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 7, ..."
2,30,29.796247,bops30_3,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, ..."
3,30,29.847778,bops30_4,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, ..."
4,30,29.992674,bops30_5,"[8, 7, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 7, ..."
5,30,29.985812,bops30_6,"[8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, ..."
6,30,29.937167,bops30_7,"[8, 4, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 7, ..."
7,30,29.967661,bops30_8,"[8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, ..."
8,30,29.907927,bops30_9,"[8, 6, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 7, ..."
9,30,29.942247,bops30_10,"[8, 5, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, ..."


Step 3: Output the resulting mixed-precision scheme.

In [8]:
from bit_config import bit_config

mobilenetv2 = bit_config['mobilenetv2']

changed_layers = {}

for i, row in nsga_df.iterrows():
    config = [8 if i % 2 == 0 else row['Config'][index//2] for index in range(num_variable)]
    changed_layers['bit_config_mobilenetv2_w1_' + row['Config_name']] = {}
    for index, bit in enumerate(config):
        if bit != 8:
            changed_layers['bit_config_mobilenetv2_w1_' + row['Config_name']][layers[index]] = bit

output_bit_config = {}

for model, paras in changed_layers.items():
    output_bit_config[model] = mobilenetv2.copy()
    for para in paras:
        output_bit_config[model][para] = paras[para]

output_bit_config

{'bit_config_mobilenetv2_w1_bops30_1': {'init_block': 8,
  'quant_act_int32': 16,
  'features.stage1.unit1.quant_act': 8,
  'features.stage1.unit1.conv1': 8,
  'features.stage1.unit1.quant_act1': 8,
  'features.stage1.unit1.conv2': 8,
  'features.stage1.unit1.quant_act2': 8,
  'features.stage1.unit1.conv3': 8,
  'features.stage1.unit1.quant_act_int32': 16,
  'features.stage2.unit1.quant_act': 8,
  'features.stage2.unit1.conv1': 8,
  'features.stage2.unit1.quant_act1': 8,
  'features.stage2.unit1.conv2': 8,
  'features.stage2.unit1.quant_act2': 8,
  'features.stage2.unit1.conv3': 8,
  'features.stage2.unit1.quant_act_int32': 16,
  'features.stage2.unit2.quant_act': 8,
  'features.stage2.unit2.conv1': 8,
  'features.stage2.unit2.quant_act1': 8,
  'features.stage2.unit2.conv2': 8,
  'features.stage2.unit2.quant_act2': 8,
  'features.stage2.unit2.conv3': 8,
  'features.stage2.unit2.quant_act_int32': 16,
  'features.stage3.unit1.quant_act': 8,
  'features.stage3.unit1.conv1': 8,
  'features

# ResNet50

## Preprocess: Caculate influence metrics for each layer.

Step 1: Preprocess the performance data of the quantized model obtained from tests.

In [9]:
import pandas as pd
import numpy as np

df = pd.read_csv('database/Resnet50_SingleLayer.csv')
layers = df['Layer'].values
num_variable = len(layers)

df['Influence_fillna'] = df['Influence'].fillna(100)

delta_accs = [14.946, 2.546, 0.512, 0.66, 0.514, 0.992, 0.306, 0.276, 0.538, 0.194, 0.124, 0.226, 0.19, 0.298, 0.078, 0.15, 0.146, 0.168, 0.616, 0.196, 0.082, 0.154, 0.104, 0.344, 0.086, 0.094, 0.126, 0.21, 0.04, 0.018, 0.11, 0.152, 0.116, 0.102, 0.132, 0.07, 0.734, 0.314, 0.406, 0.158, 0.324, 0.162, 2.506, 0.044, 1.498, 0.146, 6.106, 0.168, 0.324, 0.038, 1.442, 0.162, 0.53, 0.106, 0.246, 0.25, 1.5, 0.174, 6.144, 0.176, 0.43, -0.036, 0.708, 0.156, 1.102, 0.194, 0.308, 0.17, 1.986, 0.14, 3.05, 0.096, 0.422, 0.18, 1.024, 0.116, 4.312, 0.116, 12.264, 0.15, 0.256, 0.066, 0.15, 0.048, 1.294, 0.216, 0.122, 0.024, 0.052, 0.148, 1.1, 0.308, 0.222, 0.252, 0.512, 0.146]
quantile = np.quantile(delta_accs, q=0.75)

pairwise_df = pd.read_csv('database/Resnet50_PairwiseLayers.csv', index_col=0)
pairwise_df = pairwise_df.sort_index()
pairwise_df = pairwise_df.dropna()
pairwise_df['XY-X-Y'] = pairwise_df['Delta_acc'] - pairwise_df['Delta_acc1'] - pairwise_df['Delta_acc2']

df_filtered = pairwise_df[(pairwise_df['Delta_acc1'] < quantile) & (pairwise_df['Delta_acc2'] < quantile)]

multiple_df = pd.read_csv('database/Resnet50_MultipleLayers.csv')
multiple_df = multiple_df.dropna()
multiple_df['XY-X-Y'] = multiple_df['Delta Acc'] - multiple_df['F1']

layer_num_dict = {'Layer_num':[0, 1, 2, ], 'Avg_corr': [0, 0, df_filtered['XY-X-Y'].values.mean()]}

for layer_num in sorted(np.unique(multiple_df['Layer_num'].values)):
    avg_corr = multiple_df.loc[multiple_df['Layer_num'] == layer_num, 'XY-X-Y'].mean()
    layer_num_dict['Layer_num'].append(layer_num)
    layer_num_dict['Avg_corr'].append(avg_corr)

max_layer = max(layer_num_dict['Layer_num'])
for layer_num in range(2, max_layer):
    if layer_num not in layer_num_dict['Layer_num']:
        avg_corr = (layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(layer_num - 1)] + layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(layer_num + 1)]) / 2
        layer_num_dict['Layer_num'].append(layer_num)
        layer_num_dict['Avg_corr'].append(avg_corr)

for layer_num in range(max_layer + 1, len(layers) + 1):
    layer_num_dict['Layer_num'].append(layer_num)
    layer_num_dict['Avg_corr'].append(layer_num_dict['Avg_corr'][layer_num_dict['Layer_num'].index(max_layer)] )

layer_num_df = pd.DataFrame(layer_num_dict)

Step 2: Based on the processed performance data, the corresponding influence metric for each layer is computed.

In [10]:
import numpy as np
from bit_config import bit_config
from sklearn.linear_model import LinearRegression

resnet50 = bit_config['resnet50']
layers = []
for key, _ in resnet50.items():
    if "stage" in key and "int32" not in key and "identity" not in key:
        layers.append(key)

delta_acc_dict = dict(zip(layers, delta_accs))
filtered_layers = [layer for layer in delta_acc_dict if delta_acc_dict[layer] < quantile]

X = np.zeros((multiple_df.shape[0], len(filtered_layers)))

for i in multiple_df.index:
    selected_layers = np.array(layers)[np.array(multiple_df.loc[i, 'Config'][1:-1].split(', ')) == '0']
    for layer in selected_layers:
        X[i][filtered_layers.index(layer)] = 1

y = multiple_df['XY-X-Y'].copy().values.reshape((multiple_df.shape[0], 1))
for i in multiple_df.index:
    layer_num = multiple_df.loc[i, 'Layer_num']
    y[i, 0] -= layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0]

reg = LinearRegression().fit(X, y)

multiple_sensitivity_dict = dict(zip(filtered_layers, reg.coef_[0]))
pd.DataFrame({'layer': filtered_layers, 'influence': reg.coef_[0]})

Unnamed: 0,layer,influence
0,stage1.unit1.quant_act1,0.141299
1,stage1.unit2.quant_act,0.111668
2,stage1.unit2.quant_convbn1,0.011332
3,stage1.unit2.quant_convbn2,-0.070038
4,stage1.unit2.quant_act2,0.061393
...,...,...
67,stage4.unit3.quant_convbn1,0.146776
68,stage4.unit3.quant_act1,-0.219226
69,stage4.unit3.quant_convbn2,0.035189
70,stage4.unit3.quant_act2,0.182710


Step 3: Based on the computed metrics for each layer, predict the corresponding accuracy of any mixed-precision quantized model.

In [11]:
import scipy.stats

df['MultipleInfluence'] = 0
for index, row in df.iterrows():
    if row['Layer'] in filtered_layers:
        df.loc[index, 'MultipleInfluence'] = multiple_sensitivity_dict[row['Layer']]
    else:
        df.loc[index, 'MultipleInfluence'] = float('nan')

multiple_df['prediction'] = multiple_df['F1']
for index, row in multiple_df.iterrows():
    layer_num = row['Layer_num']
    selected_layers = np.array(layers)[np.array(multiple_df.loc[index, 'Config'][1:-1].split(', ')) == '0']
    multiple_df.loc[index, 'prediction'] += df.loc[df['Layer'].isin(selected_layers), 'MultipleInfluence'].sum() + layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].sum()

print(scipy.stats.pearsonr(multiple_df['Delta Acc'], multiple_df['prediction']))

PearsonRResult(statistic=0.993247930206348, pvalue=0.0)


Step 4: Verify the correlation between the predicted and true accuracies of the mixed-precision quantized models.

In [12]:
import altair as alt

qq = alt.Chart(multiple_df.sample(n=1000, random_state=2)).mark_square(size=100, opacity=1, angle=45, stroke='black').encode(
    x=alt.X('prediction:Q', scale=alt.Scale(domain=[0, 10]), title='Predicted Accuracy Loss (%)'),
    y=alt.Y('Delta Acc:Q', scale=alt.Scale(domain=[0, 10]), title='True Accuracy Loss (%)'),
)

x = np.arange(100)
source = pd.DataFrame({
  'x': x,
  'f(x)': x
})

line = alt.Chart(source).mark_line(color='red', clip=True, size=5).encode(
    x=alt.X('x', title='Predicted Accuracy Loss (%)'),
    y=alt.Y('f(x)', title='True Accuracy Loss (%)')
)

(line + qq).configure_axis(
    labelFontSize=24,
    titleFontSize=28,
    titleFontWeight='normal',
    domainWidth=3,
    gridWidth=3,
    gridDash=[3,3],
    tickWidth=3,
    domainColor='black',
    tickColor='black',
).configure_view(
    strokeOpacity=0
)

## Algorithm: Find the optimal mixed-precision quantization scheme.

Step 1: Define the functions to calculate *BOPs* and *Model Size*.

In [13]:
import numpy as np

def MAC(c_i, k, hw, c_o):
    flot = 0.5 * (2 * c_i * k * k - 1) * hw * c_o
    return flot

c_i = [3, 64, 64, 64, 64, 256, 64, 64, 256, 64, 64, 256, 128, 128, 256, 512, 128, 128, 512, 128, 128, 512, 128, 128, 512, 256, 256, 512, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 512, 512, 1024, 2048, 512, 512, 2048, 512, 512]
c_o = [64, 64, 64, 256, 256, 64, 64, 256, 64, 64, 256, 128, 128, 512, 512, 128, 128, 512, 128, 128, 512, 128, 128, 512, 256, 256, 1024, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 512, 512, 2048, 2048, 512, 512, 2048, 512, 512, 2048]
hw = [12544, 3136, 3136, 3136, 3136, 3136, 3136, 3136, 3136, 3136, 3136, 784, 784, 784, 784, 784, 784, 784, 784, 784, 784, 784, 784, 784, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49]
k_size = [7, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1]

# Calculate BOPs
def calculate_bops(ba, bw):
    sum = 0
    index = 0
    for k in range(53):
        if k in [0]:
            bops = 8 * 8 * MAC(c_i[k], k_size[k], hw[k] ,c_o[k])
        elif k in [4, 14, 27, 46]:
            bops = 8 * 8 * MAC(c_i[k], k_size[k], hw[k] ,c_o[k])
        else:
            bops = ba[index] * bw[index] * MAC(c_i[k], k_size[k], hw[k] ,c_o[k])
            index += 1
        sum += bops
    sum /= 1e9
    return sum

in_channel = [3, 64, 64, 64, 64, 256, 64, 64, 256, 64, 64, 256, 128, 128, 256, 512, 128, 128, 512, 128, 128, 512, 128, 128, 512, 256, 256, 512, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 512, 512, 1024, 2048, 512, 512, 2048, 512, 512]
out_channel = [64, 64, 64, 256, 256, 64, 64, 256, 64, 64, 256, 128, 128, 512, 512, 128, 128, 512, 128, 128, 512, 128, 128, 512, 256, 256, 1024, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 256, 256, 1024, 512, 512, 2048, 2048, 512, 512, 2048, 512, 512, 2048]
kernel = [7, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1]

# Calculate Model Size
def calculate_size(ba, bw):
    sum = 0
    index = 0
    for i in range(53):
        if i in [0]:
            size = kernel[i] ** 2 * in_channel[i] * out_channel[i]
        elif i in [4, 14, 27, 46]:
            size = kernel[i] ** 2 * in_channel[i] * out_channel[i]
        else:
            size = kernel[i] ** 2 * in_channel[i] * out_channel[i] / 8 * bw[index]
            index += 1
        sum += size
    sum += out_channel[-1] * 1000
    sum /= 1024 * 1024
    return sum

bops_8bit = calculate_bops(np.zeros((48)) + 8, np.zeros((48)) + 8) # BOPs when all the layers are 8-bit
bops_4bit = calculate_bops(np.zeros((48)) + 4, np.zeros((51)) + 4) # BOPs when all the layers are 4-bit
size_8bit = calculate_size(np.zeros((48)) + 8, np.zeros((48)) + 8) # Model Size when all the layers are 8-bit
size_4bit = calculate_size(np.zeros((48)) + 4, np.zeros((48)) + 4) # Model Size when all the layers are 4-bit

Step 2: Solve the problem using the NSGA-II algorithm.

### Model Size Limit

In [14]:
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.operators.sampling.rnd import IntegerRandomSampling
from pymoo.operators.repair.rounding import RoundingRepair
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.optimize import minimize

class MPQProblem(Problem):
    def __init__(self):
        super().__init__(
                         n_var=num_variable // 2,
                         n_obj=2,
                         n_ieq_constr=1, 
                         n_eq_constr=0,
                         xl=np.array([4] * (num_variable // 2)),
                         xu=np.array([8] * (num_variable // 2)),
                         vtype=int)

    def _evaluate(self, X, out, *args, **kwargs):
        f1 = np.sum(df['DeltaAcc'].values[1::2] * (8 - X) / 4, axis=1)
        f2 = np.sum(df['DeltaAcc'].values[1::2] * (8 - X) / 4, axis=1) + np.sum(df['Influence_fillna'].values[1::2] * (8 - X) / 4, axis=1) + [layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0] for layer_num in np.sum(X != 8, axis=1)]

        g1 = [calculate_size(np.zeros((num_variable // 2)) + 8, X[i, :]) - size_limit for i in range(X.shape[0])]

        out["F"] = np.column_stack([f1, f2])
        out["G"] = np.column_stack([g1])

nsga_dict = {'Size_limit': [],
             'Size':[],
             'Config_name': [],
             'Config': []}

size_limit = 14.5
config_name = 'size' + str(size_limit)

problem = MPQProblem()

algorithm = NSGA2(pop_size=100,
                sampling=IntegerRandomSampling(),
                crossover=SBX(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                mutation=PM(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                eliminate_duplicates=True)
result = minimize(problem, 
                algorithm, 
                termination=('n_gen', 100), 
                seed=1,
                verbose=False)

if result.X is None:
    print(f'Model size should be limited to between {int(size_4bit)} and {int(size_8bit)}')

for i in range(len(result.X)):
    nsga_dict['Size_limit'].append(size_limit)
    nsga_dict['Size'].append(calculate_size(np.zeros((num_variable // 2)) + 8, result.X[i]))
    nsga_dict['Config_name'].append(config_name + '_' + str(i + 1))
    nsga_dict['Config'].append(result.X[i])

nsga_df = pd.DataFrame(nsga_dict)
nsga_df

Unnamed: 0,Size_limit,Size,Config_name,Config
0,14.5,14.499207,size14.5_1,"[8, 8, 8, 7, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, ..."
1,14.5,14.499207,size14.5_2,"[8, 8, 8, 7, 4, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, ..."
2,14.5,14.499207,size14.5_3,"[8, 8, 8, 5, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, ..."
3,14.5,14.499207,size14.5_4,"[8, 8, 8, 6, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 4, ..."
4,14.5,14.499207,size14.5_5,"[8, 8, 8, 6, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, ..."
5,14.5,14.499207,size14.5_6,"[8, 8, 8, 6, 4, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, ..."
6,14.5,14.499207,size14.5_7,"[8, 8, 8, 7, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, ..."
7,14.5,14.499207,size14.5_8,"[8, 8, 8, 6, 4, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, ..."
8,14.5,14.499207,size14.5_9,"[8, 8, 8, 5, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, ..."


### BOPs limit

In [15]:
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.operators.sampling.rnd import IntegerRandomSampling
from pymoo.operators.repair.rounding import RoundingRepair
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.optimize import minimize

class MPQProblem(Problem):
    def __init__(self):
        super().__init__(
                         n_var=num_variable,
                         n_obj=2,
                         n_ieq_constr=1, 
                         n_eq_constr=0,
                         xl=np.array([4] * num_variable),
                         xu=np.array([8] * num_variable),
                         vtype=int)

    def _evaluate(self, X, out, *args, **kwargs):
        f1 = np.sum(df['DeltaAcc'].values * (8 - X) / 4, axis=1)
        f2 = np.sum(df['DeltaAcc'].values * (8 - X) / 4, axis=1) + np.sum(df['Influence_fillna'].values * (8 - X) / 4, axis=1) + [layer_num_df.loc[layer_num_df['Layer_num'] == layer_num, 'Avg_corr'].values[0] for layer_num in np.sum(X != 8, axis=1)]

        g1 = [calculate_bops(X[i, ::2], X[i, 1::2]) - bops_limit for i in range(X.shape[0])]

        out["F"] = np.column_stack([f1, f2])
        out["G"] = np.column_stack([g1])

nsga_dict = {'BOPs_limit': [],
             'BOPs': [], 
             'Config_name': [],
             'Config': []}

bops_limit = 200
config_name = 'bops' + str(bops_limit)

problem = MPQProblem()

algorithm = NSGA2(pop_size=100,
                sampling=IntegerRandomSampling(),
                crossover=SBX(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                mutation=PM(prob=1.0, eta=3.0, vtype=float, repair=RoundingRepair()),
                eliminate_duplicates=True)
result = minimize(problem, 
                algorithm, 
                termination=('n_gen', 100), 
                seed=1,
                verbose=False)

if result.X is None:
    print(f'BOPs should be limited to between {int(bops_4bit)} and {int(bops_8bit)}')

for i in range(len(result.X)):
    nsga_dict['BOPs_limit'].append(bops_limit)
    nsga_dict['BOPs'].append(calculate_bops(result.X[i][::2], result.X[i][1::2]))
    nsga_dict['Config_name'].append(config_name + '_' + str(i + 1))
    nsga_dict['Config'].append(result.X[i])

nsga_df = pd.DataFrame(nsga_dict)
nsga_df

Unnamed: 0,BOPs_limit,BOPs,Config_name,Config
0,200,199.661794,bops200_1,"[8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 6, ..."


Step 3: Output the resulting mixed-precision scheme.

In [16]:
from bit_config import bit_config

mobilenetv2 = bit_config['mobilenetv2']

changed_layers = {}

for i, row in nsga_df.iterrows():
    config = [8 if i % 2 == 0 else row['Config'][index//2] for index in range(num_variable)]
    changed_layers['bit_config_mobilenetv2_w1_' + row['Config_name']] = {}
    for index, bit in enumerate(config):
        if bit != 8:
            changed_layers['bit_config_mobilenetv2_w1_' + row['Config_name']][layers[index]] = bit

output_bit_config = {}

for model, paras in changed_layers.items():
    output_bit_config[model] = mobilenetv2.copy()
    for para in paras:
        output_bit_config[model][para] = paras[para]

output_bit_config

{'bit_config_mobilenetv2_w1_bops200_1': {'init_block': 8,
  'quant_act_int32': 16,
  'features.stage1.unit1.quant_act': 8,
  'features.stage1.unit1.conv1': 8,
  'features.stage1.unit1.quant_act1': 8,
  'features.stage1.unit1.conv2': 8,
  'features.stage1.unit1.quant_act2': 8,
  'features.stage1.unit1.conv3': 8,
  'features.stage1.unit1.quant_act_int32': 16,
  'features.stage2.unit1.quant_act': 8,
  'features.stage2.unit1.conv1': 8,
  'features.stage2.unit1.quant_act1': 8,
  'features.stage2.unit1.conv2': 8,
  'features.stage2.unit1.quant_act2': 8,
  'features.stage2.unit1.conv3': 8,
  'features.stage2.unit1.quant_act_int32': 16,
  'features.stage2.unit2.quant_act': 8,
  'features.stage2.unit2.conv1': 8,
  'features.stage2.unit2.quant_act1': 8,
  'features.stage2.unit2.conv2': 8,
  'features.stage2.unit2.quant_act2': 8,
  'features.stage2.unit2.conv3': 8,
  'features.stage2.unit2.quant_act_int32': 16,
  'features.stage3.unit1.quant_act': 8,
  'features.stage3.unit1.conv1': 8,
  'feature