In [84]:
import torch
from torch import nn
from torch import cuda
from time import time 

%matplotlib inline

In [10]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

In [98]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layers = nn.Sequential(*[nn.Linear(1, 500), 
                                     nn.ReLU(), 
                                     nn.Linear(500, 500),
                                      nn.ReLU(), 
                                     nn.Linear(500, 500),
                                      nn.ReLU(), 
                                     nn.Linear(500, 500),
                                      nn.ReLU(), 
                                     nn.Linear(500, 500),
                                      nn.ReLU(), 
                                     nn.Linear(500, 500),
                                     nn.ReLU(), 
                                     nn.Linear(500, 10), 
                                     nn.Softmax(dim=1)])
        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
    
    def regularizer(self):
        reg = torch.tensor(0.).to(device)
        for m in self.modules():
            if hasattr(m, 'weight') and m.weight != None:
                reg += m.weight.norm()**2
        return reg
            
    def lossfn(self, ypred, ybatch):
        cent = self.cross_entropy(ypred, ybatch.argmax(1)).mean()
        loss = cent + self.regularizer()
        return loss
        
    def forward(self, x):
        return self.layers(x)

In [99]:
cases = [torch.device('cpu'), torch.device('cuda:0')]
for device in cases:
    N = 10000
    M = 256
    x = torch.randn(N, 1).to(device)
    y = np.random.randint(0, 10, size=(N,))
    y = np.eye(10)[y]
    y = torch.tensor(y, dtype=torch.float32).to(device)

    net = Net().to(device)
    optimizer = torch.optim.Adam(params=net.parameters(), lr=1e-3)

    start = time()
    for i in range(1000):
        idx = np.random.randint(0, 100, M)
        xbatch, ybatch = x[idx], y[idx]
        optimizer.zero_grad()
        ypred = net(xbatch)
        loss = net.lossfn(ypred, ybatch)
        loss.backward()
        optimizer.step()
        if (i+1)%100 == 0:
            print("Iteration : %4d"%(i+1))
    stop = time()
    total = (stop - start)/60.
    print("Total time taken on %s = %.5f minutes"%(device.type, total))

Iteration :  100
Iteration :  200
Iteration :  300
Iteration :  400
Iteration :  500
Iteration :  600
Iteration :  700
Iteration :  800
Iteration :  900
Iteration : 1000
Total time taken on cpu = 0.72449 minutes
Iteration :  100
Iteration :  200
Iteration :  300
Iteration :  400
Iteration :  500
Iteration :  600
Iteration :  700
Iteration :  800
Iteration :  900
Iteration : 1000
Total time taken on cuda = 0.16357 minutes


In [89]:
torch.device('cuda:0')

device(type='cuda', index=0)

In [72]:
net.cross_entropy(net(xbatch), ybatch.argmax(1))

tensor([2.3193, 2.2896, 2.3170, 2.2531, 2.3002, 2.3144, 2.2820, 2.3348, 2.3258,
        2.3194, 2.3288, 2.3074, 2.3115, 2.3194, 2.3115], device='cuda:0',
       grad_fn=<NllLossBackward>)

torch.Size([15])