In [24]:
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)

2.3.0+cu121
0.18.0+cu121


In [30]:
import torch
import torchvision
import torch.nn as nn


In [31]:
def get_clean_leaf_layers(model):
    leaves = []
    def collect_leaves(module, prefix=''):
        for name, child in module.named_children():
            full_name = f"{prefix}.{name}" if prefix else name
            if len(list(child.children())) == 0:
                leaves.append((full_name, child))
            else:
                collect_leaves(child, prefix=full_name)
    collect_leaves(model)
    return leaves

In [None]:
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights
from torchvision.models import mnasnet1_0, MNASNet1_0_Weights
from torchvision.models import googlenet, GoogLeNet_Weights
from torchvision.models import squeezenet1_0, SqueezeNet1_0_Weights

weights = SqueezeNet1_0_Weights.DEFAULT
model_squeezenet = squeezenet1_0(weights=weights)
model_squeezenet.eval()

weights = GoogLeNet_Weights.DEFAULT
model_googlenet = googlenet(weights=weights)
model_googlenet.eval()

weights = MobileNet_V2_Weights.DEFAULT
model_mobilenet = mobilenet_v2(weights=weights)
model_mobilenet.eval()


weights = MNASNet1_0_Weights.DEFAULT
model_mnasnet = mnasnet1_0(weights=weights)
model_mnasnet.eval()

models = {"squeezenet": model_squeezenet,
         "googlenet": model_googlenet,
         "mobilenet": model_mobilenet,
         "mnasnet": model_mnasnet}


for name, model in models.items():
    print(f"Model: {name}")
    print(f"Number of modules: {len(list(model.modules()))}")
    print(f"number of leaf layers: {len(get_clean_leaf_layers(model))} ")
    print("-" * 40)


In [51]:
class SafeLayerDeviceController:
    def __init__(self, model, device_config):
        """
        개선된 LayerDeviceController - 레이어와 데이터를 함께 이동
        
        model: PyTorch 모델
        device_config: 각 레이어의 device 설정 (0=CPU, 1=GPU)
                      예: [1, 1, 0, 0, 1] -> 1,2,5번째는 GPU, 3,4번째는 CPU
        """
        self.model = model
        self.device_config = device_config
        self.leaf_layers = self._get_leaf_layers()
        self.execution_times = {}
        self.hooks = []
        self.original_devices = {}  # 원래 device 저장
        
        # device 설정 길이와 레이어 수 확인
        if len(device_config) != len(self.leaf_layers):
            raise ValueError(f"Device config length ({len(device_config)}) must match number of leaf layers ({len(self.leaf_layers)})")
        
        self._setup_safe_hooks()
    
    def _get_leaf_layers(self):
        """모든 leaf 레이어들을 순서대로 가져오기"""
        leaves = []
        
        def collect_leaves(module, prefix=''):
            for name, child in module.named_children():
                full_name = f"{prefix}.{name}" if prefix else name
                
                if len(list(child.children())) == 0:
                    leaves.append((full_name, child))
                else:
                    collect_leaves(child, prefix=full_name)
        
        collect_leaves(self.model)
        return leaves
    
    def _setup_safe_hooks(self):
        """안전한 hook 설정 - 레이어와 데이터를 함께 이동"""
        for i, (layer_name, layer) in enumerate(self.leaf_layers):
            device_type = self.device_config[i]
            
            # 원래 device 저장
            if hasattr(layer, 'weight') and layer.weight is not None:
                self.original_devices[layer_name] = layer.weight.device
            
            # Pre-hook: 레이어와 입력을 같은 device로 이동
            def make_pre_hook(idx, name, dev_type, layer_ref):
                def pre_hook(module, input):
                    # 시간 측정 시작
                    torch.cuda.synchronize() if torch.cuda.is_available() else None
                    start_time = time.time()
                    module._start_time = start_time
                    
                    # Target device 결정
                    target_device = 'cpu' if dev_type == 0 else 'cuda'
                    
                    # 레이어를 target device로 이동
                    module.to(target_device)
                    
                    # 입력을 같은 device로 이동
                    if isinstance(input, tuple):
                        return tuple(inp.to(target_device) if torch.is_tensor(inp) else inp for inp in input)
                    else:
                        return input.to(target_device) if torch.is_tensor(input) else input
                        
                return pre_hook
            
            # Post-hook: 실행시간 측정 및 다음 레이어 준비
            def make_post_hook(idx, name, dev_type):
                def post_hook(module, input, output):
                    # 시간 측정 종료
                    torch.cuda.synchronize() if torch.cuda.is_available() else None
                    end_time = time.time()
                    execution_time = (end_time - module._start_time) * 1000  # ms로 변환
                    
                    # 실행시간 저장
                    device_name = "CPU" if dev_type == 0 else "GPU"
                    self.execution_times[f"Layer_{idx:02d}_{name}"] = {
                        'time_ms': execution_time,
                        'device': device_name
                    }
                    
                    return output.to("cuda")
                    
                return post_hook
            
            # Hook 등록
            pre_handle = layer.register_forward_pre_hook(make_pre_hook(i, layer_name, device_type, layer))
            post_handle = layer.register_forward_hook(make_post_hook(i, layer_name, device_type))
            
            self.hooks.extend([pre_handle, post_handle])
    
    def forward(self, x):
        """모델 실행"""
        self.execution_times.clear()
        
        # 첫 번째 레이어의 device에 맞게 입력 이동
        first_device = 'cpu' if self.device_config[0] == 0 else 'cuda'
        x = x.to(first_device)
        
        self.model.eval()
        with torch.inference_mode():
            output = self.model(x)
        
        return output
    
    def print_execution_report(self):
        """실행시간 리포트 출력"""
        print("\n" + "="*80)
        print("LAYER EXECUTION REPORT")
        print("="*80)
        print(f"{'Layer':<30} {'Device':<10} {'Time (ms)':<15}")
        print("-"*80)
        
        total_time = 0
        cpu_time = 0
        gpu_time = 0
        
        for layer_name, info in self.execution_times.items():
            time_ms = info['time_ms']
            device = info['device']
            
            print(f"{layer_name:<30} {device:<10} {time_ms:<15.4f}")
            
            total_time += time_ms
            if device == "CPU":
                cpu_time += time_ms
            else:
                gpu_time += time_ms
        
        print("-"*80)
        print(f"{'TOTAL':<30} {'MIXED':<10} {total_time:<15.4f}")
        print(f"{'CPU TOTAL':<30} {'CPU':<10} {cpu_time:<15.4f}")
        print(f"{'GPU TOTAL':<30} {'GPU':<10} {gpu_time:<15.4f}")
        print("="*80)
    
    def restore_model(self):
        """모델을 원래 device로 복원"""
        print("Restoring model to original state...")
        for layer_name, original_device in self.original_devices.items():
            for name, layer in self.leaf_layers:
                if name == layer_name:
                    layer.to(original_device)
                    break
        print("Model restored to original devices")
    
    def remove_hooks(self):
        """모든 hook 제거 및 모델 복원"""
        for hook in self.hooks:
            hook.remove()
        self.hooks.clear()
        self.restore_model()
        print("All hooks removed and model restored!")

In [52]:
import numpy as np

for name, model_instance in models.items():
    print(f"\n\n🔍 Testing model: {name}")
    
    num_layers = len(get_clean_leaf_layers(model_instance))
    baseline_device_config = [1] * num_layers  # 모든 레이어 GPU
    
    # baseline 측정
    controller = SafeLayerDeviceController(model_instance, baseline_device_config)
    dummy_input = torch.randn(1, 3, 224, 224)  # ImageNet 크기
    _ = controller.forward(dummy_input)
    controller.print_execution_report()
    controller.remove_hooks()

    # 하나씩 CPU로 옮기며 시간 측정
    for i in range(num_layers):
        print(f"\n[TEST] Moving layer {i} to CPU...")
        test_config = [1] * num_layers
        test_config[i] = 0  # 해당 레이어만 CPU

        controller = SafeLayerDeviceController(model_instance, test_config)
        _ = controller.forward(dummy_input)
        controller.print_execution_report()
        controller.remove_hooks()




🔍 Testing model: googlenet

LAYER EXECUTION REPORT
Layer                          Device     Time (ms)      
--------------------------------------------------------------------------------
Layer_00_conv1.conv            GPU        0.2673         
Layer_01_conv1.bn              GPU        0.2227         
Layer_02_maxpool1              GPU        0.1574         
Layer_03_conv2.conv            GPU        0.2007         
Layer_04_conv2.bn              GPU        0.1664         
Layer_05_conv3.conv            GPU        0.2542         
Layer_06_conv3.bn              GPU        0.1209         
Layer_07_maxpool2              GPU        0.0689         
Layer_08_inception3a.branch1.conv GPU        0.1342         
Layer_09_inception3a.branch1.bn GPU        0.3905         
Layer_10_inception3a.branch2.0.conv GPU        0.1409         
Layer_11_inception3a.branch2.0.bn GPU        0.1411         
Layer_12_inception3a.branch2.1.conv GPU        0.1678         
Layer_13_inception3a.branch2.1.bn GPU