In [1]:
import torch
import torch.nn as nn

In [2]:
import torchvision

In [3]:
import random
import time
import os
import sys

In [6]:
network = 'alexnet'
batch_size = 512
iterations = 10

net = torchvision.models.alexnet()
is_gpu_available = False

In [7]:
if torch.cuda.is_available():
    print ("INFO: GPU is available, hence switching to gpu computation.")
    is_gpu_available = True
else:
    print ("INFO: GPU is not available, using CPU..")

INFO: GPU is available, hence switching to gpu computation.


In [8]:
inp = torch.randn(batch_size, 3, 224, 224)
if is_gpu_available:
    inp = inp.cuda()
    net = net.cuda()
    
target = torch.arange(batch_size)
if is_gpu_available:
    target = target.to('cuda:0')
    
param_copy = net.parameters()

In [9]:
print (net)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature

In [15]:

sub_modules = net.__dict__['_modules']
print (sub_modules)

OrderedDict([('features', Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)), ('classifier', Sequential(
  (0): Dropout(p=0.5)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace)
  (3): Dropout(p=0.5)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inp

In [33]:
layer_info = []
def getLayers(module):
    sub_modules = module.__dict__['_modules']
    count = 0
    for name, sub_module in sub_modules.items():
        if sub_module is None or isinstance(sub_module, nn.Module) is False:
            break
        if isinstance(sub_module, nn.Container) or isinstance(sub_module, nn.Sequential):
            getLayers(sub_module)
        else:
            layer_info.append(sub_module)
            print (sub_module.__class__)

getLayers(net)
print (len(layer_info))

<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.pooling.MaxPool2d'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.pooling.MaxPool2d'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.pooling.MaxPool2d'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.linear.Linear'>
20


In [53]:
for i in range(1):
    layer = layer_info[0]
    print(layer.__class__)
    x = torch.randn(1, 3, 224, 224).to('cuda:0')
    print(layer)
    output = layer(x)
    output_size = output.size()
    grad_output = torch.randn(output_size[0], output_size[1], output_size[2], output_size[3]).cuda()
    output.backward(grad_output)

<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))


In [59]:
def generate_time(layer, x, iter=10):
    torch.cuda.synchronize()
    start_time = time.time()
    for j in range(1):
        output = layer(x)
        output_size = output.size()
        grad_output = torch.randn(output_size[0], output_size[1], output_size[2], output_size[3]).cuda()
        output.backward(grad_output)
    torch.cuda.synchronize()
    elapsed_time = time.time() - start_time
    print(elapsed_time)
    return elapsed_time, output_size

In [60]:
x = torch.randn(batch_size, 3, 224, 224).cuda()
layer_data = {}
for i in range(1):
    layer_data['layer_num'] = i
    layer = layer_info[i]
    elapsed_time, output_size = generate_time(layer, x)
    layer_data['elapsed_time'] = elapsed_time
    layer_data['input_size'] = x.size()
    layer_data['output_size'] = output_size
    print(elapsed_time)

RuntimeError: CUDA out of memory. Tried to allocate 294.00 MiB (GPU 0; 4.00 GiB total capacity; 2.86 GiB already allocated; 213.87 MiB free; 3.41 MiB cached)