In [135]:
# import torch.nn as nn
# from model20C_2F import MNIST_CNN
# # Device configuration
# device = 'cpu'  # 'cuda' if torch.cuda.is_available() else 'cpu'
# 
# def compute_cost(layer, input_shape):
#     """
#     Compute the computation cost C_i for a given layer.
#     
#     Parameters:
#     - layer: The layer to compute the cost for.
#     - input_shape: Tuple representing the shape (batch_size, channels, height, width) for conv layers,
#                    (batch_size, features) for dense layers.
#     
#     Returns:
#     - cost: Computation cost C_i for the layer.
#     """
#     if isinstance(layer, nn.Conv2d):
#         # Conv2D layers
#         output_channels = layer.out_channels
#         input_channels = layer.in_channels
#         kernel_height, kernel_width = layer.kernel_size
#         
#         # Assuming stride=1 and padding keeps input and output size the same
#         output_height, output_width = input_shape[2], input_shape[3]
#         print("Conv2d layer:", output_channels, kernel_height, kernel_width)
#         # Calculate cost for convolutional layer
#         cost = (output_height * output_width * output_channels *
#                 kernel_height * kernel_width * input_channels)
#     
#     elif isinstance(layer, nn.Linear):
#         # Dense (Fully Connected) layers
#         print("Dense", layer.in_features, layer.out_features)
#         input_features = layer.in_features
#         output_features = layer.out_features
#         
#         # Calculate cost for dense layer
#         cost = input_features * output_features
# 
#     else:
#         # No cost for other layers like ReLU or Flatten
#         cost = 0
#     print(f"Computation Cost:", cost)
#     return cost
# 
# # Function to calculate total cost for the entire model
# def compute_total_cost(model, input_shape):
#     total_cost = 0
#     for layer in model.children():
#         if isinstance(layer, nn.Sequential):
#             for sublayer in layer:
#                 input_shape = (input_shape[0], layer[0].out_channels, input_shape[2], input_shape[3]) # Update input shape
#                 layer_cost = compute_cost(sublayer, input_shape)
#                 total_cost += layer_cost
#         else:
#             layer_cost = compute_cost(layer, input_shape)
#             total_cost += layer_cost
#             if isinstance(layer, nn.Linear):
#                 input_shape = (input_shape[0], layer.out_features)  # Update for next layer if dense layer
# 
#     return total_cost
# 
# # Initialize model and compute total cost
# input_shape = (1, 1, 28, 28)  # Example input shape (batch_size=1, channels=1, height=28, width=28)
# model = MNIST_CNN(input_shape=1, hidden_units=10, output_shape=10).to(device)
# total_cost = compute_total_cost(model, input_shape)
# 
# print("Total Computation Cost:", total_cost)


In [24]:
from torch import nn
from model20C_2F import MNIST_CNN
device = 'cpu'  

layer_map = {
    **{i: {"type": "conv", "in_channels": 10, "out_channels": 10, "kernel_size": 3, "height": 28, "width": 28} for i in range(1, 21)},
    21: {"type": "dense", "in_features": 10 * 28 * 28, "out_features": 10},
    22: {"type": "dense", "in_features": 10, "out_features": 10}
}

def layer_memory_req(layer_info):
    """
    Calculate memory requirement for a single layer based on its type and parameters.
    
    Parameters:
    - layer_info: Dictionary containing layer type and parameters.
    
    Returns:
    - memory_req: Memory requirement for the layer in bytes (or any unit).
    """
    if layer_info["type"] == "conv":
        in_channels = layer_info["in_channels"]
        out_channels = layer_info["out_channels"]
        kernel_size = layer_info["kernel_size"]
        height = layer_info["height"]
        width = layer_info["width"]
        
        # Memory required for activations (output size) + parameters (weights)
        activations_mem = height * width * out_channels * 4  # 4 bytes per float
        weights_mem = (kernel_size * kernel_size * in_channels * out_channels) * 4
        memory_req = activations_mem + weights_mem
        

    elif layer_info["type"] == "dense":
        in_features = layer_info["in_features"]
        out_features = layer_info["out_features"]
        
        # Memory for activations (output size) + parameters (weights)
        activations_mem = out_features * 4  # 4 bytes per float
        weights_mem = in_features * out_features * 4
        memory_req = activations_mem + weights_mem

    else:
        memory_req = 0  # For non-trainable or unsupported layer types

    return memory_req/1024

def max_segment_mem_req(segment):
    """
    Calculate the maximum memory requirement for a given segment of layers.
    
    Parameters:
    - segment: List of layer indices representing a segment of the model (e.g., [1, 3, 7, 12]).
    
    Returns:
    - max_memory: Maximum memory requirement within the segment.
    """
    max_memory = 0
    
    for layer_index in segment:
        layer_info = layer_map.get(layer_index, None)
        if layer_info is not None:
            layer_memory = layer_memory_req(layer_info)
            print(layer_memory)
            max_memory = max(max_memory, layer_memory)
        else:
            print(f"Warning: Layer {layer_index} not found in layer map.")

    return max_memory
`
# Example Usage
segment = [1, 2, 7,12, 21 ,22]
max_memory = max_segment_mem_req(segment)
print("Maximum Memory Requirement for Segment:", max_memory)


34.140625
34.140625
34.140625
34.140625
306.2890625
0.4296875
Maximum Memory Requirement for Segment: 306.2890625


In [None]:
from torch import mm
device = 'cpu'

layer_map = {
    **{i: {"type": "conv", "in_channels": 10, "out_channels": 10, "kernel_size": 3, "height": 28, "width": 28} for i in range(1, 21)},
    21: {"type": "dense", "in_features": 10 * 28 * 28, "out_features": 10},
    22: {"type": "dense", "in_features": 10, "out_features": 10}
}

## compute and store all Ci 


In [24]:
import torch.nn as nn
from model5C_2F import MNIST_CNN

# Device configuration
device = 'cpu'  # 'cuda' if torch.cuda.is_available() else 'cpu'

def compute_cost(layer, input_shape):
    """
    Compute the computation cost C_i for a given layer.
    
    Parameters:
    - layer: The layer to compute the cost for.
    - input_shape: Tuple representing the shape (batch_size, channels, height, width) for conv layers,
                   (batch_size, features) for dense layers.
    
    Returns:
    - cost: Computation cost C_i for the layer.
    """
    if isinstance(layer, nn.Conv2d):
        output_channels = layer.out_channels
        input_channels = layer.in_channels
        kernel_height, kernel_width = layer.kernel_size
        output_height, output_width = input_shape[2], input_shape[3]
        
        cost = (output_height * output_width * output_channels *
                kernel_height * kernel_width * input_channels)
    
    elif isinstance(layer, nn.Linear):
        input_features = layer.in_features
        output_features = layer.out_features
        cost = input_features * output_features
    else:
        cost = 0
    return cost

def compute_layer_costs(model, input_shape):
    """
    Compute and return the computation cost C_i for each layer in the model.
    
    Parameters:
    - model: The neural network model.
    - input_shape: The input shape for the model.
    
    Returns:
    - layer_costs: A list of tuples, each containing the layer name and its computation cost.
    """
    layer_costs = []
    layer_num = 1  # Start layer numbering at 1
    
    for layer in model.children():
        if isinstance(layer, nn.Sequential):
            for sublayer in layer:
                input_shape = (input_shape[0], layer[0].out_channels, input_shape[2], input_shape[3])  # Update input shape
                layer_cost = compute_cost(sublayer, input_shape)
                if layer_cost == 0:
                    continue
                layer_costs.append(layer_cost)
                layer_num += 1
        else:
            layer_cost = compute_cost(layer, input_shape)
            if layer_cost == 0:
                continue
            layer_costs.append( layer_cost)
            if isinstance(layer, nn.Linear):
                input_shape = (input_shape[0], layer.out_features)  # Update for next layer if dense layer
            layer_num += 1

    return layer_costs

# Initialize model and compute layer costs
input_shape = (1, 1, 28, 28)  # Example input shape (batch_size=1, channels=1, height=28, width=28)
model = MNIST_CNN(input_shape=1, hidden_units=10, output_shape=10).to(device)
layer_costs = compute_layer_costs(model, input_shape)

# Print the list of all Ci values
print("List of all Ci values (computation costs for each layer):")
i =0
for  cost in layer_costs:
    print(f"Layer{i}: Computation Cost = {cost}")
    i+=1


List of all Ci values (computation costs for each layer):
Layer0: Computation Cost = 70560
Layer1: Computation Cost = 705600
Layer2: Computation Cost = 705600
Layer3: Computation Cost = 705600
Layer4: Computation Cost = 705600
Layer5: Computation Cost = 78400
Layer6: Computation Cost = 100


In [25]:
import torch.nn as nn
from model5C_2F import MNIST_CNN

# Device configuration
device = 'cpu'  # 'cuda' if torch.cuda.is_available() else 'cpu'

def layer_memory_req(layer, input_shape):
    """
    Calculate memory requirement for a single layer based on its type and parameters.
    
    Parameters:
    - layer: The layer object (either Conv2d or Linear).
    - input_shape: Tuple representing the shape (batch_size, channels, height, width) for conv layers,
                   (batch_size, features) for dense layers.
    
    Returns:
    - memory_req_kb: Memory requirement for the layer in kilobytes (KB).
    """
    if isinstance(layer, nn.Conv2d):
        # Conv2D layer parameters
        out_channels = layer.out_channels
        kernel_size = layer.kernel_size[0]
        in_channels = layer.in_channels
        output_height, output_width = input_shape[2], input_shape[3]

        # Memory for activations (output size) + parameters (weights)
        activations_mem = output_height * output_width * out_channels * 4  # 4 bytes per float
        weights_mem = (kernel_size * kernel_size * in_channels * out_channels) * 4  # Weights
        memory_req = activations_mem + weights_mem

    elif isinstance(layer, nn.Linear):
        # Linear layer parameters
        in_features = layer.in_features
        out_features = layer.out_features

        # Memory for activations (output size) + parameters (weights)
        activations_mem = out_features * 4  # 4 bytes per float
        weights_mem = in_features * out_features * 4  # Weights
        memory_req = activations_mem + weights_mem

    else:
        memory_req = 0  # Unsupported layer types

    # Convert bytes to kilobytes
    memory_req_kb = memory_req / 1024
    return memory_req_kb

def layer_memory_list(model, input_shape):
    """
    Calculate the memory requirement for each layer in the model and return a list.
    
    Parameters:
    - model: The neural network model.
    - input_shape: The input shape for the model.
    
    Returns:
    - layer_memory_list: List of memory requirements for each layer in kilobytes (KB).
    """
    layer_memory_list = []
    layer_num = 1  # Start layer numbering at 1
    
    # Iterate over the model layers
    for layer in model.children():
        if isinstance(layer, nn.Sequential):
            for sublayer in layer:
                layer_memory = layer_memory_req(sublayer, input_shape)
                if layer_memory != 0:
                    layer_memory_list.append( layer_memory)
                    # print(f"Layer {layer_num}: Memory Requirement = {layer_memory} KB")
                    layer_num += 1
        else:
            layer_memory = layer_memory_req(layer, input_shape)
            if layer_memory != 0:
                layer_memory_list.append( layer_memory/4)
                # print(f"Layer {layer_num}: Memory Requirement = {layer_memory} KB")
                layer_num += 1

    return layer_memory_list

# Initialize model
input_shape = (1, 1, 28, 28)  # Example input shape (batch_size=1, channels=1, height=28, width=28)
model = MNIST_CNN(input_shape=1, hidden_units=10, output_shape=10).to(device)

# Get memory requirements for each layer
layer_memory = layer_memory_list(model, input_shape)
print("\nList of Memory Requirements for Each Layer (in KB):")
i=0
for  memory in layer_memory:
    print(f"{i}: {memory} KB")
    i+=1



List of Memory Requirements for Each Layer (in KB):
0: 30.9765625 KB
1: 34.140625 KB
2: 34.140625 KB
3: 34.140625 KB
4: 34.140625 KB
5: 76.572265625 KB
6: 0.107421875 KB


In [41]:
def Rkp(Ci, F, L):
    """
    Calculate the weighted computation cost for a segment of layers.
    
    Parameters:
    - Ci: List of computation costs for each layer.
    - F: Starting index of the segment (exclusive).
    - L: Ending index of the segment (inclusive).
    
    Returns:
    - sum: Total weighted computation cost for the segment.
    """
    sum = 0
    if F == 0:
        for i in range(F , L):
            # print(i, i - F+1)  
            sum += Ci[i] * (i - F+1)
    else:
        for i in range(F + 1, L):
            # print(i, i - F)  
            sum += Ci[i] * (i - F)
    return sum
def computation_cost(Ci, seg):
    """
    Calculate the total computation cost across multiple segments of layers.
    
    Parameters:
    - Ci: List of computation costs for each layer.
    - segment: List of segment boundaries (layer indices). Each boundary marks the end of a segment.
    
    Returns:
    - sum: Total computation cost across all segments.
    """
    sum = 0
    prev = 0
    segment = seg.copy()
    segment.append(len(Ci)-1)  # Ensure the last segment goes to the end of Ci

    for i in range(len(segment)):
        # print(prev, segment[i])  # Debug: Print segment start and end indices
        sum += Rkp(Ci, prev, segment[i])
        prev = segment[i]

    return sum


Rkp(layer_costs ,2,3)


0

In [42]:
segment = []
computation_cost(layer_costs,segment )

10419360

In [43]:
def max_mem_req(Mi , curr_seg):
    mem_sum = 0
    for i in range(len(curr_seg)):
        mem_sum += Mi[curr_seg[i]]
    return mem_sum

max_mem_req(layer_memory ,[2])

34.140625

In [44]:
len(layer_memory)

7

In [49]:
def optimalSegFinder(Ci,Mi,M, n):
    """
    Finds the optimal segmentation of layers with minimal computation cost, subject to memory constraints.
    
    Parameters:
    - Ci: List of computation costs for each layer.
    - M: Available memory size.
    - n: Total number of layers.
    
    Returns:
    - optimal_seg: Optimal segmentation that minimizes computation cost.
    - min_cost: Minimum computation cost of the optimal segmentation.
    """
    # Initialize variables
    min_cost = float('inf')
    optimal_seg = []
    cur_seg = []

    def recursive_optimal_seg_finder(cur_seg, index):
        nonlocal min_cost, optimal_seg

        # Check if the current segment meets memory requirements
        if max_mem_req(Mi,cur_seg) <= M:
            # Calculate the computation cost for the current segmentation
            cost = computation_cost(Ci, cur_seg)
            # print(f" {cost} {cur_seg} ")
            # Update minimum cost and optimal segmentation if a better solution is found
            if cost < min_cost:
                min_cost = cost
                optimal_seg = cur_seg[:]
        
        # Iterate over possible next layers to add to the current segmentation
        for i in range(index, n):
            # Recursively find the optimal segmentation with the next layer added to cur_seg
            # print(f"adding {i} to curr {cur_seg}")
            recursive_optimal_seg_finder(cur_seg + [i], i + 1)

    # Start the recursion with an empty segmentation and at the first layer
    recursive_optimal_seg_finder(cur_seg, 1)

    return optimal_seg, min_cost


In [50]:
Ci = layer_costs
Mi = layer_memory
M = 200
n = len(layer_costs)-1
optimal_seg, min_cost = optimalSegFinder(Ci,Mi ,M, n)
print("Optimal Segmentation:", optimal_seg)
print("Minimum Computation Cost:", min_cost)

Optimal Segmentation: [1, 2, 3, 4]
Minimum Computation Cost: 148960
