Skip to content

cuda runtime error (59) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20 #9585

@ninjakx

Description

@ninjakx

I am running this code on google colab:

from __future__ import print_function
import os
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from ssd import SSD300
from multibox_loss import MultiBoxLoss
from torch.autograd import Variable

lr = 1e-3
resume = ''

use_cuda = torch.cuda.is_available()
best_loss = float('inf')  # best test loss
start_epoch = 0  # start from epoch 0 or last epoch

# Data
print('==> Preparing data..')
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

trainset = ListDataset(root='Images', list_file='itrain.txt', train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

testset = ListDataset(root='Images', list_file='itest.txt', train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=4)


# Model
net = SSD300()
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = torch.load('checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']
else:
    # Convert from pretrained VGG model.
    net.load_state_dict(torch.load('ssd.pth'))

criterion = MultiBoxLoss()
if use_cuda:
    #net = torch.nn.DataParallel(net, device_ids=[0,1,2,3,4,5,6,7])
    net.cuda()
    cudnn.benchmark = True

optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)


This is the error i am getting:

RuntimeError                              Traceback (most recent call last)
<ipython-input-45-1d68494f6580> in <module>()
     62 if use_cuda:
     63     #net = torch.nn.DataParallel(net, device_ids=[0,1,2,3,4,5,6,7])
---> 64     net.cuda()
     65     cudnn.benchmark = True
     66 

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in cuda(self, device)
    247             Module: self
    248         """
--> 249         return self._apply(lambda t: t.cuda(device))
    250 
    251     def cpu(self):

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    174     def _apply(self, fn):
    175         for module in self.children():
--> 176             module._apply(fn)
    177 
    178         for param in self._parameters.values():

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    174     def _apply(self, fn):
    175         for module in self.children():
--> 176             module._apply(fn)
    177 
    178         for param in self._parameters.values():

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    180                 # Tensors stored in modules are graph leaves, and we don't
    181                 # want to create copy nodes, so we have to unpack the data.
--> 182                 param.data = fn(param.data)
    183                 if param._grad is not None:
    184                     param._grad.data = fn(param._grad.data)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in <lambda>(t)
    247             Module: self
    248         """
--> 249         return self._apply(lambda t: t.cuda(device))
    250 
    251     def cpu(self):

RuntimeError: cuda runtime error (59) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions