# Training with Intel-extension for PyTorch 

In [1]:
import torch

print(*torch.__config__.show().split("\n"), sep="\n")

PyTorch built with:
  - GCC 7.5
  - C++ Version: 201402
  - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitia

In [None]:
import os
import sys
import time
from statistics import mean
from tqdm import trange
from tqdm import tqdm
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

In [12]:
use_ipex=True

if use_ipex: 
    import intel_pytorch_extension as ipex
    device = torch.device("xpu")
else: 
    device = torch.device("cpu")

1.2.0


#### Import dataset

In [13]:
import medmnist
from medmnist.models import ResNet18
from medmnist.dataset import PathMNIST, ChestMNIST, DermaMNIST, OCTMNIST, PneumoniaMNIST, RetinaMNIST, BreastMNIST, OrganMNISTAxial, OrganMNISTCoronal, OrganMNISTSagittal
from medmnist.evaluator import getAUC, getACC
from medmnist.info import INFO

#### Define learning hyperparameters

In [14]:
nb_epochs = 10
batch_size = 1024
lr = 0.001

#### Load and preprocess dataset into Dataloader

In [15]:
download = True
input_root = 'tmp_data/'
data_flag = 'breastmnist'

flag_to_class = {
    "pathmnist": PathMNIST,
    "chestmnist": ChestMNIST,
    "dermamnist": DermaMNIST,
    "octmnist": OCTMNIST,
    "pneumoniamnist": PneumoniaMNIST,
    "retinamnist": RetinaMNIST,
    "breastmnist": BreastMNIST,
    "organmnist_axial": OrganMNISTAxial,
    "organmnist_coronal": OrganMNISTCoronal,
    "organmnist_sagittal": OrganMNISTSagittal,
}

DataClass = flag_to_class[data_flag]

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

In [16]:
# preprocessing
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset = DataClass(root=input_root, split='train', transform=data_transform, download=download)
test_dataset = DataClass(root=input_root, split='test', transform=data_transform, download=download)
val_dataset = DataClass(root=input_root,split='val',transform=data_transform, download=download)


# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Using downloaded and verified file: tmp_data/breastmnist.npz
Using downloaded and verified file: tmp_data/breastmnist.npz
Using downloaded and verified file: tmp_data/breastmnist.npz


#### Define models and optimizer

In [17]:
model = ResNet18(in_channels=n_channels, num_classes=n_classes).to(device)

if task == "multi-label, binary-class":
    criterion = nn.BCEWithLogitsLoss()
else:
    criterion = nn.CrossEntropyLoss()
    
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

#### Define training function

In [18]:
def train(model, optimizer, criterion, train_loader, device, task):
    ''' training function
    :param model: the model to train
    :param optimizer: optimizer used in training
    :param criterion: loss function
    :param train_loader: DataLoader of training set
    :param device: cpu, xpu
    :param task: task of current dataset, binary-class/multi-class/multi-label, binary-class

    '''
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs.to(device))
        if task == 'multi-label, binary-class':
            targets = targets.to(torch.float32).to(device)
            loss = criterion(outputs, targets)
        else:
            targets = targets.squeeze().long().to(device)
            loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

#### Define validation function

In [19]:
def val(model, val_loader, device, val_auc_list, task, dir_path, epoch):
    ''' validation function
    :param model: the model to validate
    :param val_loader: DataLoader of validation set
    :param device: cpu or xpu
    :param val_auc_list: the list to save AUC score of each epoch
    :param task: task of current dataset, binary-class/multi-class/multi-label, binary-class
    :param dir_path: where to save model
    :param epoch: current epoch

    '''
    model.eval()
    y_true = torch.tensor([]).to(device)
    y_score = torch.tensor([]).to(device)
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            
            outputs = model(inputs.to(device))
            if task == 'multi-label, binary-class':
                targets = targets.to(torch.float32).to(device)
                m = nn.Sigmoid()
                outputs = m(outputs).to(device)
            else:
                targets = targets.squeeze().long().to(device)
                m = nn.Softmax(dim=1)
                outputs = m(outputs).to(device)
                targets = targets.float().resize_(len(targets), 1)
                
            y_true = torch.cat((y_true, targets), 0)
            y_score = torch.cat((y_score, outputs), 0)

        y_true = y_true.cpu().numpy()
        y_score = y_score.detach().cpu().numpy()
        auc = getAUC(y_true, y_score, task)
        val_auc_list.append(auc)

    state = {
        'net': model.state_dict(),
        'auc': auc,
        'epoch': epoch,
    }

    path = os.path.join(dir_path, 'ckpt_%d_auc_%.5f.pth' % (epoch, auc))
    torch.save(state, path)

#### Training

In [20]:
## save the checkpoints of all epochs
val_auc_list = []
dir_path = os.path.join("./output", '%s_checkpoints' % (data_flag))
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
    
time_training=[]

## finally training 
for epoch in trange(0,nb_epochs):
        time1 = time.time()
        train(model, optimizer, criterion, train_loader, device, task)
        time_epoch = time.time()-time1 
        print("training "+str(time_epoch)+ "sec")
        
        if nb_epochs>0: # avoid any warm-up effect 
            time_training.append(time_epoch)
            
        val(model, val_loader, device, val_auc_list, task, dir_path, epoch)

average_training_time_per_epoch=mean(time_training)
print("Training per epoch took "+str(average_training_time_per_epoch) + "s") 

  0%|          | 0/10 [00:00<?, ?it/s]

training 1.4526829719543457sec


 10%|█         | 1/10 [00:02<00:18,  2.08s/it]

training 1.412468671798706sec


 20%|██        | 2/10 [00:04<00:16,  2.02s/it]

training 1.2222394943237305sec


 30%|███       | 3/10 [00:05<00:13,  1.89s/it]

training 1.1777422428131104sec


 40%|████      | 4/10 [00:07<00:10,  1.82s/it]

training 1.3112516403198242sec


 50%|█████     | 5/10 [00:09<00:09,  1.84s/it]

training 1.22499418258667sec


 60%|██████    | 6/10 [00:11<00:07,  1.81s/it]

training 1.1669392585754395sec


 70%|███████   | 7/10 [00:12<00:05,  1.80s/it]

training 1.43815279006958sec


 80%|████████  | 8/10 [00:14<00:03,  1.87s/it]

training 1.5158050060272217sec


 90%|█████████ | 9/10 [00:17<00:01,  1.94s/it]

training 1.3670127391815186sec


100%|██████████| 10/10 [00:18<00:00,  1.90s/it]

Training per epoch took 1.3289288997650146s





#### Define test function 

In [44]:
def test(model, split, data_loader, device, flag, task):
    ''' testing function
    :param model: the model to test
    :param split: the data to test, 'train/val/test'
    :param data_loader: DataLoader of data
    :param device: cpu or xpu
    :param flag: subset name
    :param task: task of current dataset, binary-class/multi-class/multi-label, binary-class

    '''
    model.eval()
    y_true = torch.tensor([]).to(device)
    y_score = torch.tensor([]).to(device)

    with torch.no_grad():
       
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            
            outputs = model(inputs.to(device))
            
            if task == 'multi-label, binary-class':
                targets = targets.to(torch.float32).to(device)
                m = nn.Sigmoid()
                outputs = m(outputs).to(device)
            else:
                targets = targets.squeeze().long().to(device)
                m = nn.Softmax(dim=1)
                outputs = m(outputs).to(device)
                targets = targets.float().resize_(len(targets), 1)

            y_true = torch.cat((y_true, targets), 0)
            y_score = torch.cat((y_score, outputs), 0)

        y_true = y_true.cpu().numpy()
        y_score = y_score.detach().cpu().numpy()
        auc = getAUC(y_true, y_score, task)
        acc = getACC(y_true, y_score, task)
        print('%s AUC: %.5f ACC: %.5f' % (split, auc, acc))

#### Evaluate model on test dataset

In [33]:
# evaluation
auc_list = np.array(val_auc_list)
index = auc_list.argmax()
print('epoch %s is the best model' % (index))

print('==> Testing model...')
restore_model_path = os.path.join(dir_path, 'ckpt_%d_auc_%.5f.pth' % (index, auc_list[index]))
#restore_model_path = './output/pathmnist_checkpoints/ckpt_22_auc_0.99737.pth'

model.load_state_dict(torch.load(restore_model_path)['net'])
        
print('==> Evaluating ...')
test(model,'train',train_loader, device, data_flag, task)
test(model,'val', val_loader, device, data_flag, task)
test(model, 'test', test_loader,device, data_flag, task)


epoch 9 is the best model
==> Testing model...
==> Evaluating ...
train AUC: 0.67334 ACC: 0.45093
val AUC: 0.72270 ACC: 0.45000
test AUC: 0.66801 ACC: 0.43500
