In [1]:
import os
from pathlib import Path
import numpy as np
import torch
from torch.utils.data import Dataset
import torchvision
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import re

import matplotlib.pyplot as plt



## dataset

In [6]:
root = Path('/home/data/Imagenet')
train_path = root / 'train'
val_path = root / 'val'
label_path = root / 'label'

def ImgNetData(train_path, val_path, distributed=True):
    train_transforms = transforms.Compose([
        transforms.RandomChoice([transforms.Resize(256), transforms.Resize(480)]),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        
        ### 이걸로 사용해볼 순 없나?
        # transforms.RandomResizedCrop((224,224)),
        # transforms.Resize((256,256)),
        # transforms.CenterCrop((224,224)),
        
        transforms.RandomHorizontalFlip(0.5),
    ])
    val_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(244),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])
    train_data = ImageFolder(root=train_path, transform=train_transforms)
    val_data = ImageFolder(root=val_path, transform=val_transforms)
    
    return train_data, val_data
    

In [11]:
synset_path = label_path / 'synset_words.txt'

def synset2word(synset_path=synset_path):
    label_dict = {}
    with open(synset_path, 'r') as f:
        synset_word = f.readlines()
        for i in range(len(synset_word)):
            synset = synset_word[i].split()[0]
            word = re.sub(r'[^a-zA-Z]', '', synset_word[i].split()[1])
            label_dict[synset] = word
            
    return label_dict

label_dict = synset2word()
label_dict

{'n01440764': 'tench',
 'n01443537': 'goldfish',
 'n01484850': 'great',
 'n01491361': 'tiger',
 'n01494475': 'hammerhead',
 'n01496331': 'electric',
 'n01498041': 'stingray',
 'n01514668': 'cock',
 'n01514859': 'hen',
 'n01518878': 'ostrich',
 'n01530575': 'brambling',
 'n01531178': 'goldfinch',
 'n01532829': 'house',
 'n01534433': 'junco',
 'n01537544': 'indigo',
 'n01558993': 'robin',
 'n01560419': 'bulbul',
 'n01580077': 'jay',
 'n01582220': 'magpie',
 'n01592084': 'chickadee',
 'n01601694': 'water',
 'n01608432': 'kite',
 'n01614925': 'bald',
 'n01616318': 'vulture',
 'n01622779': 'great',
 'n01629819': 'European',
 'n01630670': 'common',
 'n01631663': 'eft',
 'n01632458': 'spotted',
 'n01632777': 'axolotl',
 'n01641577': 'bullfrog',
 'n01644373': 'tree',
 'n01644900': 'tailed',
 'n01664065': 'loggerhead',
 'n01665541': 'leatherback',
 'n01667114': 'mud',
 'n01667778': 'terrapin',
 'n01669191': 'box',
 'n01675722': 'banded',
 'n01677366': 'common',
 'n01682714': 'American',
 'n0168

## Utils

In [None]:
import sys

class Logger(object):
    def __init__(self, local_rank=0, no_save=False):
        self.terminal = sys.stdout
        self.file = None
        self.local_rank = local_rank
        self.no_save = no_save
    def open(self, fp, mode=None):
        if mode is None: mode = 'w'
        if self.local_rank and not self.no_save == 0: self.file = open(fp, mode)
    def write(self, msg, is_terminal=1, is_file=1):
        if msg[-1] != "\n": msg = msg + "\n"
        if self.local_rank == 0:
            if '\r' in msg: is_file = 0
            if is_terminal == 1:
                self.terminal.write(msg)
                self.terminal.flush()
            if is_file == 1 and not self.no_save:
                self.file.write(msg)
                self.file.flush()
    def flush(self): 
        pass
    
def print_args(args, logger=None):
    if logger is not None:
        logger.write("#### configurations ####")
    for k, v in vars(args).items():
        if logger is not None:
            logger.write('{}: {}\n'.format(k, v))
        else:
            print('{}: {}'.format(k, v))
    if logger is not None:
        logger.write("########################")
      
import argparse
import json
def save_args(args, to_path):
    with open(to_path, "w") as f:
        json.dump(args.__dict__, f, indent=2)
def load_args(from_path):
    parser = argparse.ArgumentParser()
    args = parser.parse_args()
    with open(from_path, "r") as f:
        args.__dict__ = json.load(f)
    return args    

class AverageMeter (object):
    def __init__(self):
        self.reset ()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def Accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

## Model

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super(BottleNeck, self).__init__()
        
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * self.expansion),
        )
        self.expansion = 4
        self.shortcut = nn.Sequential()
        self.relu = nn.ReLU()
        
        # 하위 layer로 내려갈때 downsampling(1/2줄이기)하면서 feature 수가 달라짐 그러므로 skip connection을 할 수 있게 보정
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )
    def forward(self, x):
        out = self.residual_function(x)
        out = out + self.shortcut(x)
        return self.relu(out)
        
    

In [18]:
class ResNet(nn.Module):
    def __init__(self, block=BottleNeck, layers=[3,4,6,3], in_channels=3, num_classes=1000):
        self.num_classes = num_classes
        super().__init__()
        self.in_channels = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            
        )        
        self.conv2 = self._make_layer(block, 64, layers[0])
        self.conv3 = self._make_layer(block, 128, layers[1], downsampling=True)
        self.conv4 = self._make_layer(block, 256, layers[2], downsampling=True)
        self.conv5 = self._make_layer(block, 512, layers[3], downsampling=True)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(2048, num_classes)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        
        return out
    
    def _make_layer(self, block, out_channels, num_blocks, downsampling=False):
        if downsampling is True:
            stride = 2
        else:
            stride = 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)
    

import torchsummary
model = ResNet()
torchsummary.summary(model, (3,224,224), device='cpu')
# print(model)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [21]:
import argparse
import os
import random
import shutil
import time
import warnings
from enum import Enum

import torch
import torch.backends.cudnn as cudnn # cudnn.benchmark = True 최적의 backend 연산을 찾는 flag를 True로 함. ex)입력크기가 고정된 모델 등에 유효

# for DDP
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn.parallel
import torch.utils.data.distributed

import torch.nn as nn
import torch.optim
import torchvision.datasets as datasets
import torchvision.models as models
from torch.optim.lr_scheduler import StepLR
from model.resnet50 import BottleNeck, ResNet
from data.data import ImgNetData
from utils import AverageMeter, Logger, print_args, save_args, load_args, Accuracy

model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__")
    and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='Imagenet classifictation')
parser.add_argument('data', metavar='DIR', nargs='?', default='/home/data/Imagenet',
                    help='path to dataset') # metavar:인자의 이름지정, nargs 값 개수 지정
parser.add_argument('-m','--model', default='resnet50', choices=model_names,
                    help='model architecture: ' + ' | '.join(model_names) + ' (defalut: resnet50')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs of run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b','--batch_size', default=256, type=int, metavar='N',
                    help='mini-batch size (default: 256), this is the total batch size of all GPUs on the current node whe using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('-p', '--print-freq', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set') # dest: 적용 위치 지정, '-e'값이 args.evaluate에 저장되는것
parser.add_argument('--world-size', default=-1, type=int,
                    help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
                    help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://10.201.134.133:8892', type=str,
                    help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
                    help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
                    help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
                    help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
                    help='Use multi-processing distributed training to launch '
                         'N processes per node, which has N GPUs. This is the '
                         'fastest way to use PyTorch for either single node or '
                         'multi node data parallel training')
parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark") # 제대로 돌아가는지 보기위한 Fake데이터

best_acc1 = 0

def main():
    args = parser.parse_args()
    
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True # seed를 정했으므로 nondeterministic하지 않게 작업
        cudnn.benchmark = False # 지금 환경에 가장 적합한 알고리즘을 찾을 필요가 없다.
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')
    
    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely disable data parallelism')
        
    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"]) # local를 가정한건가??
        print(args.world_size)
        
    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
    
    if torch.cuda.is_available():
        ngpus_per_node = torch.cuda.device_count() - 2 # GPU 0, 1번만 쓰기로 해서
    else:
        ngpus_per_node = 1
        
    if args.multiprocessing_distributed:
        args.world_size = ngpus_per_node * args.world_size # 각 노드(machine)별 쓸 수 있는 gpu 다 합치기
        # distributed processing 시작
        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) # (fn:작동시킬 함수, nprocs:프로세서 갯수, args:fn에 넣을 args) / fn(i, *args) i is the process index
    else:
        # 단순하게 작동시킬 때
        main_worker(args.gpu, ngpus_per_node, args)

def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    # GPU 설정
    print(gpu, ngpus_per_node)
    args.gpu = gpu
    
    if args.gpu is not None:
        print("use GPU: {} for training".format(args.gpu))
    
    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # gpu = 0,1,2 ... ngpus_per_node-1
            args.rank=args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank)
        
    # Model 설정
    print("Creatin model '{}'".format(args.model))
    if args.dataset == 'imagenet':
        num_classes = 1000
    if args.model == 'resnet50':
        model = ResNet(num_classes=num_classes)
    elif args.model == 'vit':
        pass
    else:
        raise Exception('unknown model: {}'.format(args.model))
    
    # multiprocessing 설정
    if not torch.cuda.is_available() and not torch.backends.mps.is_available():
        print('using CPU, this will be slow') #gpu는 되는데 multiprocessing이 안될때?
    elif args.distributed:
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # when using a single GPU per process and per DDP, we need to divide tha batch size ourselves based on the total number of GPUs we have 왜지??
            args.batch_size = int(args.batch_size / ngpus_per_node) # gpu에 나눠주기 위함이겠지?
            args.worker = int((args.worker+ngpus_per_node-1)/ngpus_per_node) # 왜 이렇게해주지
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # 만약에 device_ids를 따로 설정해주지 않으면, 가능한 모든 gpu를 기준으로 ddp가 알아서 배치사이즈와 workers를 나눠준다는 뜻.
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        raise NotImplementedError("Only DistributedDataparallel is supported.")
    
    if torch.cuda.is_available():
        if args.gpu:
            device = torch.device('cuda:{}'.format(args.gpu))
        else:
            device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    
    # criterion & optimizer 정의
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
    
    # 옵션1: resume 방법
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint: '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # MAP model to be loaded to specific single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict((checkpoint['state_dict']))
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                    .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            
    if args.dummy:
        print("=> Dummy data is used!")
        train_dataset = datasets.FakeData(1281167, (3, 224, 224), 1000, transforms.ToTensor())
        val_dataset = datasets.FakeData(50000, (3, 224, 224), 1000, transforms.ToTensor())
    else:
        train_path = os.path.join(args.data, 'train')
        val_path = os.path.join(args.data, 'val')
        
    train_dataset, val_dataset = ImgNetData(train_path, val_path)
        
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=True)
    else:
        train_sampler = None
        val_sampler = None
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampelr=train_sampler)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampelr=val_sampler)
    
    if args.evaluate: # eval mode
        validate(val_loader, model, criterion, args)
        return
    
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            # 내용 6-1: train_sampler.set_epoch
            # In distributed mode, calling the set_eopch() method at the beggining of 
            # each epoch before creating the "dataloader" iterator is necessary to make
            # suffling work properly across multiple epochs. Otherwise, the same ordering will be always used.
            train_sampler.set_epoch(epoch)  # 매 에폭마다 train_sampler.set_epoch(epoch)를 해주어야 shuffle이 잘 사용된다고 한다.
            
            train(train_loader, model, criterion, optimizer, epoch, device, args)
            
            acc1 = validate(val_loader, model, criterion, args)
            
            scheduler.step()
            
            is_best = acc1 > best_acc1
            best_acc1 = max(acc1, best_acc1)
            
            if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
                save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer' : optimizer.state_dict(),
                'scheduler' : scheduler.state_dict()
            }, is_best)

def train(train_loader, model, criterion, optimizer, epoch, device, args):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc1 = AverageMeter()
    
    model.train()
    
    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time - end)
        
        # 또한 텐서 및 스토리지를 고정하면 비동기(asynchronous) GPU 복사본을 사용할 수 있습니다.
        # 비동기식으로 GPU에 데이터 전달 기능을 추가하려면 non_blocking = True 인수를 to() 또는 cuda() 호출 시 argument로 전달하면 됩니다.
        input = input.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)
        
        output = model(input)
        loss = criterion(output, target)
        
        acc1 = Accuracy(output.data, target, topk=(1,))
        
        losses.update(loss.item(), input.size(0))
        acc1.update(acc1.item(), input.size(0))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        batch_time.update(time.time - end)
        end = time.time
        
        if 1 & args.print_freq == 0:
            print('Epoch: [{0}/{1}][{2}/{3}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-acc {top1.val:.4f} ({top1.avg:.4f})'.format(
                epoch, args.epochs, i, len(train_loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=acc1))
            
    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f} Train Loss {loss.avg:.3f}'.format(
        epoch, args.epochs, top1=acc1, loss=losses))

    return losses.avg

def validate(val_loader, model, criterion, args):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc1 = AverageMeter()
    
    model.eval()
    
    end = time.time
    for i, (input, target) in enumerate(val_loader):
        input = input.cuda()
        target = target.cuda()
        
        output = model(input)
        loss = criterion(output, target)
        
        acc1 = Accuracy(output, target)
        losses.update(loss.item(), input.size(0))
        acc1.update(acc1.item(), input.size(0))
        
        batch_time.update(time.time - end)
        end = time.time
        
        if 1 & args.print_freq == 0:
            print('Test (on val set): [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.ave:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-acc {top1.val:.4f} ({top1.avg:.4f})'.format(
                i, len(val_loader), batch_time=batch_time,
                loss=losses, top1=acc1))
    print('* Top 1-err {top1.avg:.3f}  Test Loss {loss.avg:.3f}'.format(
        top1=acc1, loss=losses))
    return acc1.avg, losses.avg

def save_checkpoint(state, is_best,args, filename='checkpoint.pth.tar'):
    directory = "runs/%s/" % (args.expname)
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = directory + filename
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'runs/%s/' % (args.expname) + 'model_best.pth.tar')

if __name__ == '__main__':
    main()

usage: ipykernel_launcher.py [-h]
                             [-m {alexnet,convnext_base,convnext_large,convnext_small,convnext_tiny,densenet121,densenet161,densenet169,densenet201,efficientnet_b0,efficientnet_b1,efficientnet_b2,efficientnet_b3,efficientnet_b4,efficientnet_b5,efficientnet_b6,efficientnet_b7,efficientnet_v2_l,efficientnet_v2_m,efficientnet_v2_s,get_weight,googlenet,inception_v3,mnasnet0_5,mnasnet0_75,mnasnet1_0,mnasnet1_3,mobilenet_v2,mobilenet_v3_large,mobilenet_v3_small,regnet_x_16gf,regnet_x_1_6gf,regnet_x_32gf,regnet_x_3_2gf,regnet_x_400mf,regnet_x_800mf,regnet_x_8gf,regnet_y_128gf,regnet_y_16gf,regnet_y_1_6gf,regnet_y_32gf,regnet_y_3_2gf,regnet_y_400mf,regnet_y_800mf,regnet_y_8gf,resnet101,resnet152,resnet18,resnet34,resnet50,resnext101_32x8d,resnext101_64x4d,resnext50_32x4d,shufflenet_v2_x0_5,shufflenet_v2_x1_0,shufflenet_v2_x1_5,shufflenet_v2_x2_0,squeezenet1_0,squeezenet1_1,swin_b,swin_s,swin_t,vgg11,vgg11_bn,vgg13,vgg13_bn,vgg16,vgg16_bn,vgg19,vgg19_bn,vit_b_1

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [16]:
torch.cuda.device_count()

4

In [17]:
!nvidia-smi

Mon Jan 30 11:45:30 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN RTX           On   | 00000000:3F:00.0 Off |                  N/A |
| 41%   24C    P8    14W / 280W |  19314MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           On   | 00000000:42:00.0 Off |                  N/A |
| 41%   28C    P8    12W / 280W |      3MiB / 24220MiB |      0%      Default |
|       