In [2]:
import re

def calculate_bops(model_name, bit_width, sparsities, input_size=10000):
    # Define the architectures
    architectures = {
        'Large': {
            'phi': [(3, 32), (32, 32)],
            'rho': [(32, 32), (32, 64), (64, 5)]
        },
        'Medium': {
            'phi': [(3, 32), (32, 16)],
            'rho': [(16, 64), (64, 8), (8, 32), (32, 5)]
        },
        'Small': {
            'phi': [(3, 8), (8, 8)],
            'rho': [(8, 16), (16, 16), (16, 5)]
        },
        'Tiny': {
            'phi': [(3, 16)],
            'rho': [(16, 8), (8, 8), (8, 4), (4, 5)]
        }
    }

    if model_name not in architectures:
        raise ValueError(f"Unknown model name: {model_name}")

    phi_layers = architectures[model_name]['phi']
    rho_layers = architectures[model_name]['rho']

    if len(rho_layers) != len(sparsities):
        raise ValueError(f"Number of rho layers ({len(rho_layers)}) does not match number of sparsities ({len(sparsities)})")

    total_bops = 0

    # Calculate BOPs for phi layers (not pruned)
    for in_features, out_features in phi_layers:
        layer_bops = in_features * out_features * input_size * 2 * bit_width
        total_bops += layer_bops

    # Calculate BOPs for rho layers (pruned)
    for (in_features, out_features), sparsity in zip(rho_layers, sparsities):
        # Calculate the number of non-zero weights
        non_zero_weights = int(in_features * out_features * (1 - sparsity))
        layer_bops = non_zero_weights * 2 * bit_width
        total_bops += layer_bops

    return total_bops

def parse_log_line(line):
    match = re.match(r"Deepsets (\w+) Model (\d+)-Bit QAT Model Prune Iter: (\d+), .* Sparsities: \((.*)\)", line)
    if match:
        model_name, bit_width, prune_iter, sparsities_str = match.groups()
        sparsities = [float(s.split('(')[1].split(',')[0]) for s in sparsities_str.split('), ')]
        return model_name, int(bit_width), int(prune_iter), sparsities
    return None

# Example usage
log_file_path = "NAC_Compress.txt"
results = []

with open(log_file_path, 'r') as file:
    for line in file:
        parsed = parse_log_line(line)
        if parsed:
            model_name, bit_width, prune_iter, sparsities = parsed
            bops = calculate_bops(model_name, bit_width, sparsities)
            results.append((model_name, bit_width, prune_iter, bops))

# Print or process the results
for model_name, bit_width, prune_iter, bops in results:
    print(f"{model_name} Model, {bit_width}-bit, Prune Iter {prune_iter}: {bops} BOPs")

ValueError: Number of rho layers (4) does not match number of sparsities (3)

In [None]:
import re
import math
import pandas as pd

def extract_data(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    pattern = r"(\w+ \w+ Model \w+-Bit QAT Model) Prune Iter: (\d+), Test Accuracy: ([\d.]+), Val Accuracy: ([\d.]+), Val Loss: ([\d.]+), Sparsities: \((.*?)\)"
    matches = re.findall(pattern, content)

    data = {}
    for match in matches:
        model_name = match[0]
        if model_name not in data:
            data[model_name] = []
        
        sparsities = tuple(map(float, re.findall(r"[\d.]+", match[5])))
        data[model_name].append({
            "iter": int(match[1]),
            "test_accuracy": float(match[2]),
            "val_accuracy": float(match[3]),
            "val_loss": float(match[4]),
            "sparsities": sparsities
        })

    return data

def get_linear_bops(in_features, out_features, sparsity, bit_width):
    return out_features * in_features * ((1-sparsity) * bit_width**2 + 2*bit_width + math.log2(in_features))

def calculate_model_bops(model_name, iteration):
    total_bops = 0
    bit_width = int(re.search(r'(\d+)-Bit', model_name).group(1))
    sparsities = iteration['sparsities']
    
    def get_sparsity(index):
        return sparsities[index] if index < len(sparsities) else 0

    if 'Large' in model_name:
        # Phi layers
        total_bops += get_linear_bops(3, 32, get_sparsity(0), bit_width)
        total_bops += get_linear_bops(32, 32, get_sparsity(0), bit_width)
        total_bops += get_linear_bops(32, 32, get_sparsity(0), bit_width)
        
        # Rho layers
        total_bops += get_linear_bops(32, 32, get_sparsity(1), bit_width)
        total_bops += get_linear_bops(32, 5, get_sparsity(2), bit_width)
    
    elif 'Medium' in model_name:
        # Phi layers
        total_bops += get_linear_bops(3, 32, get_sparsity(0), bit_width)
        total_bops += get_linear_bops(32, 16, get_sparsity(1), bit_width)
        
        # Rho layers
        total_bops += get_linear_bops(16, 64, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(64, 8, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(8, 32, get_sparsity(3), bit_width)
        total_bops += get_linear_bops(32, 5, get_sparsity(3), bit_width)
    
    elif 'Small' in model_name:
        # Phi layers
        total_bops += get_linear_bops(3, 8, get_sparsity(0), bit_width)
        total_bops += get_linear_bops(8, 8, get_sparsity(1), bit_width)
        
        # Rho layers
        total_bops += get_linear_bops(8, 16, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(16, 16, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(16, 5, get_sparsity(2), bit_width)
    
    elif 'Tiny' in model_name:
        # Phi layers
        total_bops += get_linear_bops(3, 16, get_sparsity(0), bit_width)
        
        # Rho layers
        total_bops += get_linear_bops(16, 8, get_sparsity(1), bit_width)
        total_bops += get_linear_bops(8, 8, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(8, 4, get_sparsity(2), bit_width)
        total_bops += get_linear_bops(4, 5, get_sparsity(2), bit_width)
    
    return total_bops


In [None]:

file_path = 'NAC_Compress.txt'  # Update this with the actual path to your file
data = extract_data(file_path)

results = {}
for model_name, iterations in data.items():
    results[model_name] = {
        "Prune Iter": [],
        "Test Accuracy": [],
        "Val Accuracy": [],
        "Val Loss": [],
        "BOPs": [],
        "Global Sparsity": []
    }
    print(f"\nModel: {model_name}")
    for iteration in iterations:
        bops = calculate_model_bops(model_name, iteration)
        global_sparsity = 1 - (0.8 ** iteration['iter'])

        results[model_name]["Prune Iter"].append(iteration['iter'])
        results[model_name]["Test Accuracy"].append(iteration['test_accuracy'])
        results[model_name]["Val Accuracy"].append(iteration['val_accuracy'])
        results[model_name]["Val Loss"].append(iteration['val_loss'])
        results[model_name]["BOPs"].append(bops)
        results[model_name]["Global Sparsity"].append(global_sparsity)

        print(f"Iteration {iteration['iter']}: {bops:.2e} BOPs, Global Sparsity: {global_sparsity:.4f}")
    
    break

for model_name, values in results.items():
    df = pd.DataFrame(values)
    filename = f"./plots/Pareto_BOPs/{model_name.replace(' ', '_')}_BOPs.csv"
    df.to_csv(filename, index=False)
    print(f"Results for {model_name} saved to {filename}")
