# Quantize yolov7 model using Vitis AI

In [2]:
# ! conda activate vitis-ai-pytorch
! pip install -r requirements.txt

Collecting opencv-python>=4.1.1
  Downloading opencv_python-4.7.0.68-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m00:01[0mm00:01[0m
Collecting tensorboard>=2.4.1
  Downloading tensorboard-2.11.2-py3-none-any.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting seaborn>=0.11.0
  Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.3/293.3 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnx>=1.9.0
  Downloading onnx-1.13.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting onnx-simplifier>=

In [None]:
import os
import re
import sys
import argparse
import time
import pdb
import random
from pytorch_nndct.apis import torch_quantizer
import torch
import torchvision
import torchvision.transforms as transforms
# from torchvision.models.resnet import resnet18
from hubconf import custom
from tqdm import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
data_dir = "/workspace/yolov7/MTV2/"
# quant_mode = args.quant_mode # choices=["float", "calib", "test"]
finetune = False
deploy = False
batch_size = 32
subset_len = None # type: int
inspect = False
config_file = None # quantization configuration file
target = "DPUCZDX8G_ISA1_B4096"
path_or_model = "runs/train/detect/weights/best.pt"
model_name = "yolov7"

# if quant_mode != "test" and deploy:
#     deploy = False
#     print(r"Warning: Exporting xmodel needs to be done in quantization test mode, turn off it in this running!")
# if deploy and (batch_size != 1 or subset_len != 1):
#     print(
#         r"Warning: Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, change them automatically!"
#     )
#     batch_size = 1
#     subset_len = 1

Create data loader.

In [None]:
def load_data(
    train=True,
    data_dir="dataset/imagenet",
    batch_size=128,
    subset_len=None,
    sample_method="random",
    distributed=False,
    model_name="yolov7",
    **kwargs
):
    # prepare data
    # random.seed(12345)
    # traindir = data_dir + '/train'
    traindir = data_dir + "/calib"
    # valdir = data_dir + '/val'
    valdir = data_dir + "/calib"
    train_sampler = None
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    if model_name == "inception_v3":
        size = 299
        resize = 299
    else:
        size = 224
        resize = 256
    if train:
        dataset = torchvision.datasets.ImageFolder(
            traindir,
            transforms.Compose(
                [
                    transforms.RandomResizedCrop(size),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
                ]
            ),
        )
        if subset_len:
            assert subset_len <= len(dataset)
            if sample_method == "random":
                dataset = torch.utils.data.Subset(dataset, random.sample(range(0, len(dataset)), subset_len))
            else:
                dataset = torch.utils.data.Subset(dataset, list(range(subset_len)))
        if distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=(train_sampler is None), sampler=train_sampler, **kwargs
        )
    else:
        dataset = torchvision.datasets.ImageFolder(
            valdir,
            transforms.Compose(
                [
                    transforms.Resize(resize),
                    transforms.CenterCrop(size),
                    transforms.ToTensor(),
                    normalize,
                ]
            ),
        )
        if subset_len:
            assert subset_len <= len(dataset)
            if sample_method == "random":
                dataset = torch.utils.data.Subset(dataset, random.sample(range(0, len(dataset)), subset_len))
            else:
                dataset = torch.utils.data.Subset(dataset, list(range(subset_len)))
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, **kwargs)
    return data_loader, train_sampler

val_loader, _ = load_data(
    subset_len=subset_len,
    train=False,
    batch_size=batch_size,
    sample_method="random",
    data_dir=data_dir,
    model_name="yolov7",
)

Declare model, input and criterion.

In [None]:
model = custom(path_or_model=path_or_model)

loss_fn = torch.nn.CrossEntropyLoss().to(device)

In [None]:
model.modules

## Inspect

**Inspect Float Model Before Quantization**

Vai_q_pytorch provides a function called inspector to help you diagnose neural network (NN) models under different device architectures. The inspector can predict target device assignments based on hardware constraints. The generated inspection report can be used to guide  users to modify or optimize the NN model, greatly reducing the difficulty and time of deployment. It is recommended to inspect float models before quantization.

1. Import vai_q_pytorch module
    ``` python
    from pytorch_nndct.apis import Inspector
    ```
2. Create a inspector with target name or fingerprint
    ``` python
    inspector = Inspector("0x603000b16013831") # by target fingerprint
    inspector = Inspector("DPUCAHX8L_ISA0_SP") # by target name
    ```
3. Inspect float model

    ``` python
    input = torch.randn([batch_size, 3, 224, 224])
    inspector.inspect(model, input)
    ```

In [None]:
input = torch.randn([128, 3, 224, 224]) # batch size 128, 3 channels, 224, 224
if not target:
    raise RuntimeError("A target should be specified for inspector.")

from pytorch_nndct.apis import Inspector

# create inspector
inspector = Inspector(target)  # by name
# start to inspect
inspector.inspect(model, (input,), device=device, image_format="svg")

## Calib

When calibrating forward, borrow the float evaluation flow to minimize code change from float script. If you encounter loss and accuracy messages displayed in the end, you can ignore them.

It is important to control iteration numbers during quantization and evaluation. Generally, 100-1000 images are enough for quantization and the whole validation set is required for evaluation. The iteration numbers can be controlled in the data loading part. In this case, the subset_len argument controls the number of images that are used for network forwarding. If the float evaluation script does not have an argument with a similar role, you must add one.

If this quantization command runs successfully, two important files are generated in the output directory `./quantize_result`.

- {model_name}.py

    Converted vai_q_pytorch format model.

- Quant_info.json

    Quantization steps of tensors. Retain this file for evaluating quantized models.


In [None]:
input = torch.randn([batch_size, 3, 224, 224]) # batch_size = 128
quantizer = torch_quantizer(quant_mode="calib", module=model, input_args=(input), output_dir="quantize_result", device=device, quant_config_file=config_file, target=target)

In [None]:
quant_model = quantizer.quant_model
quant_model.eval()
quant_model = quant_model.to(device)
outputs = quant_model(input)
quantizer.export_quant_config()

## Test

Generate xmodel to deploy the model.

In [None]:
input = torch.randn([1, 3, 224, 224]) # batch_size, 3, 224, 224

val_loader, _ = load_data(
    subset_len=subset_len,
    train=False,
    batch_size=1, # batch_size must be 1 for testing
    sample_method="random",
    data_dir=data_dir,
    model_name="yolov7",
)
quantizer = torch_quantizer(quant_mode="test", module=model, input_args=(input), output_dir="quantize_result", device=device, quant_config_file=config_file, target=target)


In [None]:
quant_model = quantizer.quant_model
quant_model.eval()
quant_model = quant_model.to(device)
outputs = quant_model(input)

quantizer.export_torch_script()
quantizer.export_onnx_model()
quantizer.export_xmodel(deploy_check=False)


## Remove some modules

In [4]:
import argparse
import json
import os
from pathlib import Path
from threading import Thread
import sys

import numpy as np
import torch
import torch.nn as nn
import yaml
from tqdm import tqdm

from models.experimental import attempt_load
from utils.datasets import create_dataloader
from utils.general import (
    coco80_to_coco91_class,
    check_dataset,
    check_file,
    check_img_size,
    check_requirements,
    box_iou,
    non_max_suppression,
    scale_coords,
    xyxy2xywh,
    xywh2xyxy,
    set_logging,
    increment_path,
    colorstr,
)
from utils.metrics import ap_per_class, ConfusionMatrix
from utils.plots import plot_images, output_to_target, plot_study_txt
from utils.torch_utils import select_device, time_synchronized, TracedModel

from pytorch_nndct.apis import torch_quantizer


[0;32m[VAIQ_NOTE]: Loading NNDCT kernels...[0m


In [5]:
batch_size = 32
weights = "runs/train/detect/weights/best.pt"
input = torch.randn([batch_size, 3, 224, 224])
device = select_device("cpu", batch_size=batch_size)
model = attempt_load(weights=weights, map_location=device)

Fusing layers... 
count =  76


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [11]:
weights=None,
batch_size=32,
imgsz=640,
conf_thres=0.001,
iou_thres=0.6,  # for NMS
save_json=False,
single_cls=False,
augment=False,
verbose=False,
inspect=False,
deploy=False,
quant_mode="float",
target="DPUCZDX8G_ISA1_B4096",
config_file=None,
model=None,
dataloader=None,
save_dir=Path(""),  # for saving images
save_txt=False,  # for auto-labelling
save_hybrid=False,  # for hybrid auto-labelling
save_conf=False,  # save auto-label confidences
plots=False,
wandb_logger=None,
compute_loss=None,
half_precision=True,
trace=False,
is_coco=False,
v5_metric=False,

batch_size = 32
weights = "runs/train/detect/weights/best.pt"
input = torch.randn([batch_size, 3, 224, 224])
device = select_device("cpu", batch_size=batch_size)
model = attempt_load(weights=weights, map_location=device)
data = "data/odessa_docker.yaml"
name = "yolov7_tiny_odessa"
project = "runs/test"

parser = argparse.ArgumentParser(prog="test.py")
parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
opt = parser.parse_args()
opt.single_cls = False

# Initialize/load model and set device
# training = model is not None
# if training:  # called by train.py
#     device = next(model.parameters()).device  # get model device

# else:  # called directly
set_logging()


# Directories
save_dir = Path(increment_path(Path(project) / name, exist_ok=False))  # increment run
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

# Load model
model = attempt_load(weights, map_location=device)  # load FP32 model
gs = max(int(model.stride.max()), 32)  # grid size (max stride)
imgsz = check_img_size(imgsz, s=gs)  # check img_size

# ! inspect
input = torch.randn([batch_size, 3, 512, 672])  # ! 640 640?
# model.model[77] = torch.nn.Sequential()
layer_detect = model.model[-1]
model.model = nn.Sequential(*list(model.model.children())[:-1])

if quant_mode == "float":
    quant_model = model
    if inspect:
        if not target:
            raise RuntimeError("A target should be specified for inspector.")

        from pytorch_nndct.apis import Inspector

        # create inspector
        inspector = Inspector(target)  # by name
        # start to inspect
        inspector.inspect(quant_model, (input,), device=device, image_format="svg")
        # sys.exit()

else:
    ## new api
    ####################################################################################
    quantizer = torch_quantizer(
        quant_mode, model, (input), device=device, quant_config_file=config_file, target=target
    )

    quant_model = quantizer.quant_model
    #####################################################################################

# handle quantization result
# if quant_mode == "calib":
#     quantizer.export_quant_config()
#     sys.exit()
# if deploy:
#     quantizer.export_torch_script()
#     quantizer.export_onnx_model()
#     quantizer.export_xmodel(deploy_check=False)
#     sys.exit()

# print(quant_model)

# ! Add back the last layer
# quant_model.add_module("Detect", layer_detect)

# print(quant_model.module_0.names)
# sys.exit()

quant_model.eval()
if isinstance(data, str):
    is_coco = data.endswith("coco.yaml")
    with open(data) as f:
        data = yaml.load(f, Loader=yaml.SafeLoader)
check_dataset(data)  # check
nc = 1 if single_cls else int(data["nc"])  # number of classes
iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
niou = iouv.numel()

# Logging
log_imgs = 0
if wandb_logger and wandb_logger.wandb:
    log_imgs = min(wandb_logger.log_imgs, 100)
# Dataloader
# if not training:
if device.type != "cpu":
    quant_model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(quant_model.parameters())))  # run once
task = "val"

dataloader = create_dataloader(
    data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True, prefix=colorstr(f"{task}: ")
)[0]

seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
# names = {k: v for k, v in enumerate(quant_model.names if hasattr(quant_model, "names") else quant_model.module.names)} # find names from model
names = {0: "Boat", 1: "Human"}
s = ("%20s" + "%12s" * 6) % ("Class", "Images", "Labels", "P", "R", "mAP@.5", "mAP@.5:.95")
p, r, f1, mp, mr, map50, map, t0, t1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
loss = torch.zeros(3, device=device)
jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
    img = img.to(device, non_blocking=True)
    # img = img.half() if half else img.float()  # uint8 to fp16/32
    img = img.float()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    targets = targets.to(device)
    nb, _, height, width = img.shape  # batch size, channels, height, width

    with torch.no_grad():
        # Run model
        t = time_synchronized()
        # print("\n############ out ############\n")
        # print(quant_model(img, augment=augment))
        # print(f"len(out) = ", len(quant_model(img, augment=augment)))
        # sys.exit(0)
        out = quant_model(img, augment=augment)  # inference and training outputs
        print(out)
        # sys.exit(0)
        t0 += time_synchronized() - t

        # Compute loss
        # if compute_loss:
        #     loss += compute_loss([x.float() for x in train_out], targets)[1][:3]  # box, obj, cls

        # Run NMS
        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
        t = time_synchronized()
        out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True)
        t1 += time_synchronized() - t

    # Statistics per image
    for si, pred in enumerate(out):
        labels = targets[targets[:, 0] == si, 1:]
        nl = len(labels)
        tcls = labels[:, 0].tolist() if nl else []  # target class
        path = Path(paths[si])
        seen += 1

        if len(pred) == 0:
            if nl:
                stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
            continue

        # Predictions
        predn = pred.clone()
        scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred

        # Append to text file
        if save_txt:
            gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
            for *xyxy, conf, cls in predn.tolist():
                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                with open(save_dir / "labels" / (path.stem + ".txt"), "a") as f:
                    f.write(("%g " * len(line)).rstrip() % line + "\n")

        # W&B logging - Media Panel Plots
        if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
            if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
                box_data = [
                    {
                        "position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
                        "class_id": int(cls),
                        "box_caption": "%s %.3f" % (names[cls], conf),
                        "scores": {"class_score": conf},
                        "domain": "pixel",
                    }
                    for *xyxy, conf, cls in pred.tolist()
                ]
                boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
                wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name))
        wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None

        # Assign all predictions as incorrect
        correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
        if nl:
            detected = []  # target indices
            tcls_tensor = labels[:, 0]

            # target boxes
            tbox = xywh2xyxy(labels[:, 1:5])
            scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels
            if plots:
                confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))

            # Per target class
            for cls in torch.unique(tcls_tensor):
                ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
                pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices

                # Search for detections
                if pi.shape[0]:
                    # Prediction to target ious
                    ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1)  # best ious, indices

                    # Append detections
                    detected_set = set()
                    for j in (ious > iouv[0]).nonzero(as_tuple=False):
                        d = ti[i[j]]  # detected target
                        if d.item() not in detected_set:
                            detected_set.add(d.item())
                            detected.append(d)
                            correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                            if len(detected) == nl:  # all targets already located in image
                                break

        # Append statistics (correct, conf, pcls, tcls)
        stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

    # Plot images
    if plots and batch_i < 3:
        f = save_dir / f"test_batch{batch_i}_labels.jpg"  # labels
        Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
        f = save_dir / f"test_batch{batch_i}_pred.jpg"  # predictions
        Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()

# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
if len(stats) and stats[0].any():
    p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, v5_metric=v5_metric, save_dir=save_dir, names=names)
    ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
    mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
    nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
else:
    nt = torch.zeros(1)

# Print results
pf = "%20s" + "%12i" * 2 + "%12.3g" * 4  # print format
print(pf % ("all", seen, nt.sum(), mp, mr, map50, map))

# Print results per class
# if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
    print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

# Print speeds
t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
# if not training:
print("Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g" % t)

# Plots
if plots:
    confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
    if wandb_logger and wandb_logger.wandb:
        val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob("test*.jpg"))]
        wandb_logger.log({"Validation": val_batches})
if wandb_images:
    wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})

# Return results
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
print(f"Results saved to {save_dir}{s}")
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
    maps[c] = ap[i]

# handle quantization result
if quant_mode == "calib":
    quantizer.export_quant_config()
    # sys.exit(0)
if deploy:
    quantizer.export_torch_script()
    quantizer.export_onnx_model()
    quantizer.export_xmodel(deploy_check=False)
    # sys.exit(0)

print((mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t)



YOLOR 🚀 2023-1-13 torch 1.12.1 CPU

Model Summary: 200 layers, 6009343 parameters, 0 gradients, 13.0 GFLOPS


Fusing layers... 
count =  76


usage: test.py [-h] [--single-cls]
test.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme="hmac-sha256" --Session.key=b"b30dbc22-3869-48e0-8165-1abf531651b3" --shell=9002 --transport="tcp" --iopub=9004 --f=/root/.local/share/jupyter/runtime/kernel-v2-77528OvInajbCbe.json


SystemExit: 2

In [None]:
quantizer = torch_quantizer(quant_mode="test", module=model, input_args=(input), output_dir="quantize_result", device=device, quant_config_file=config_file, target=target)

In [None]:
quant_model = quantizer.quant_model
quant_model.eval()
quant_model = quant_model.to(device)
outputs = quant_model(input)

quantizer.export_torch_script()
quantizer.export_onnx_model()
quantizer.export_xmodel(deploy_check=False)


# Bigger terminal

In [None]:
! ./test_quant.sh

In [None]:
! ./test_docker.sh

In [3]:
! python test_quant.py --data data/odessa_docker.yaml --img 640 --batch-size 1 --conf-thres 0.001 --iou 0.65 --device cpu --weights runs/train/detect/weights/best.pt --name yolov7_tiny_odessa --quant_mode calib


[0;32m[VAIQ_NOTE]: Loading NNDCT kernels...[0m
Namespace(augment=False, batch_size=1, conf_thres=0.001, config_file=None, data='data/odessa_docker.yaml', deploy=False, device='cpu', exist_ok=False, img_size=640, inspect=False, iou_thres=0.65, name='yolov7_tiny_odessa', no_trace=False, project='runs/test', quant_mode='calib', save_conf=False, save_hybrid=False, save_txt=False, single_cls=False, target='DPUCZDX8G_ISA1_B4096', task='val', verbose=False, weights=['runs/train/detect/weights/best.pt'])
YOLOR 🚀 2023-1-13 torch 1.12.1 CPU

Fusing layers... 
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 200 layers, 6009343 parameters, 0 gradients, 13.0 GFLOPS

[0;32m[VAIQ_NOTE]: OS and CPU information:
               system --- Linux
                 node --- adlabsticc-Precision-3650-Tower
              release --- 5.19.0-32-generic
              version --- #33~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jan 30 17:03:34 UTC 2
              machine --- x8

In [7]:
print(out)

NameError: name 'out' is not defined