# ML functions

In [1]:
import torch

## Train

### TODO

- [ ] logging time

In [2]:
def train(
    net,
    criterion,
    optimizer,
    dataloader,
    epoch: int = 0,
    cuda: bool = False,
    log: bool = False,
    log_file = None
):
    # tells net to do training
    net.train()

    # for log
    nProcessed = 0
    nTrain = len(dataloader.dataset)

    for batch_idx, (inputs, targets) in enumerate(dataloader):
        if cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        # sets gradient to 0
        optimizer.zero_grad()

        # forward, backward, and opt
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        # log
        nProcessed += len(inputs)
        pred = outputs.data.max(dim=1)[1]  # get the index of the max log-probability
        incorrect = pred.ne(targets.data).cpu().sum()  # ne: not equal
        err = 100. * incorrect / len(inputs)
        partialEpoch = epoch + batch_idx / len(dataloader)

        if log and (log_file is not None):  # saves at csv file
            log_file.write('{},{},{}\n'.format(partialEpoch, loss.item(), err))
            log_file.flush()
        else:  # print at STDOUT
            print('Train Epoch: {:.2f} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tError: {:.6f}'.format(
                partialEpoch, nProcessed, nTrain, 100. * batch_idx / len(dataloader), loss.item(), err
            ))

## Test

### TODO

- [ ] logging time

In [3]:
def test(
    net,
    criterion,
    dataloader,
    epoch: int = 0,
    cuda: bool = False,
    log: bool = False,
    log_file = None
):
    # tells net to do evaluating
    net.eval()

    # for log
    test_loss = 0.
    incorrect = 0.

    for inputs, targets in dataloader:
        if cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        # eval
        with torch.no_grad():
            outputs = net(inputs)
            test_loss += criterion(outputs, targets).item()
            pred = outputs.data.max(dim=1)[1]  # get the index of the max log-probability
            incorrect += pred.ne(targets.data).cpu().sum()  # ne: not equal

    # log
    test_loss /= len(dataloader)  # loss function already averages over batch size
    nTotal = len(dataloader.dataset)
    err = 100. * incorrect / nTotal

    if log and (log_file is not None):  # saves at csv file
        log_file.write('{},{},{}\n'.format(epoch, test_loss, err))
        log_file.flush()
    else:  # print at STDOUT
        print('\nTest Set\tAverage Loss: {:.4f}\tError: {}/{} ({:.06f}%)\n'.format(
            test_loss, incorrect, nTotal, err
        ))

## Save

In [4]:
def save(
    epoch: int,
    net,
    optimizer,
    pth_path: str = 'latest.pth'
):
    state = {
        'epoch': epoch,
        'state_dict': net.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(state, pth_path)

## Load

In [5]:
def load(
    pth_path: str = 'latest.pth'
):
    """Use the return value as the following codes:
    
    checkpoint = load(pth_path=netPath)
    epoch = checkpoint['epoch'] + 1  # current epoch
    net.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    """
    return torch.load(pth_path)

# main

In [6]:
if __name__ == "__main__":
    import os

    import torch.nn as nn
    import torch.optim as optim

    import torchvision.datasets as dset
    import torchvision.transforms as transforms

    from torch.utils.data import DataLoader  # TODO: DistributedDataParallel

    import import_ipynb
    import nets

    """Hyperparams"""
    numWorkers = 4
    cuda = True

    base_path = './ml_test'
    os.makedirs(base_path, exist_ok=True)

    trainFile = open(os.path.join(base_path, 'train.csv'), 'w')
    testFile = open(os.path.join(base_path, 'test.csv'), 'w')
    netPath = os.path.join(base_path, 'net.pth')

    epochs = 2
    batchSz = 256

    """Datasets"""
    # # gets mean and std
    # transform = transforms.Compose([transforms.ToTensor()])
    # dataset = dset.CIFAR10(root='cifar', train=True, download=True, transform=transform)
    # normMean, normStd = utils.getNorm(dataset)
    normMean = [0.49139968, 0.48215841, 0.44653091]
    normStd = [0.24703223, 0.24348513, 0.26158784]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.ToTensor(),
        normTransform
    ])

    # num_workers: number of CPU cores to use for data loading
    # pin_memory: being able to speed up the host to device transfer by enabling
    kwargs = {'num_workers': numWorkers, 'pin_memory': cuda}

    # loaders
    trainLoader = DataLoader(
        dset.CIFAR10(root='cifar', train=True, download=True, transform=trainTransform),
        batch_size=batchSz, shuffle=True, **kwargs
    )
    testLoader = DataLoader(
        dset.CIFAR10(root='cifar', train=False, download=True, transform=testTransform),
        batch_size=batchSz, shuffle=False, **kwargs
    )

    """Nets"""
    num_classes = 10
    net = nets.resnet18(num_classes=num_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9)

    if cuda:
        # if multi-gpus
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)

        # use cuda
        net.cuda()

    """Train & Test & Save"""
    epoch = 0
    while True:  # epoch < epochs
        # load if exist
        if os.path.isfile(netPath):
            checkpoint = load(netPath)

            epoch = checkpoint['epoch'] + 1  # current epoch
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

        # escape condition
        if epoch >= epochs:
            break

        train(
            net, criterion, optimizer, trainLoader,
            epoch=epoch, cuda=cuda, log=True, log_file=trainFile
        )
        test(
            net, criterion, testLoader,
            epoch=epoch, cuda=cuda, log=True, log_file=testFile
        )

        # save
        save(epoch, net, optimizer, netPath)

        # current epoch
        epoch += 1

importing Jupyter notebook from nets.ipynb
Files already downloaded and verified
Files already downloaded and verified
