From a6dc0cdb4b269719b194e779fa486e9262f60533 Mon Sep 17 00:00:00 2001 From: jiapli Date: Tue, 17 Oct 2023 22:06:01 +0800 Subject: [PATCH 1/6] Consistent dataloader for benchmark --- examples/resnet/user_script.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index 82f3ff3851..a379c82ac6 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -31,10 +31,19 @@ def __init__( def setup(self, stage: str): transform = transforms.Compose( - [transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()] + [ + transforms.Pad(4), + # use CenterCrop to keep consistent dataset + transforms.CenterCrop(32), + # Ban RandomHorizontalFlip and RandomCrop to keep consistent dataset + # for real case, we can use them to augment dataset + # transforms.RandomHorizontalFlip(), + # transforms.RandomCrop(32), + transforms.ToTensor(), + ] ) self.train_dataset = CIFAR10(root=self.train_path, train=True, transform=transform, download=False) - self.val_dataset = CIFAR10(root=self.vld_path, train=True, transform=transform, download=False) + self.val_dataset = CIFAR10(root=self.vld_path, train=False, transform=transform, download=False) class PytorchResNetDataset(Dataset): @@ -42,6 +51,7 @@ def __init__(self, dataset): self.dataset = dataset def __len__(self): + # return 100 return len(self.dataset) def __getitem__(self, index): @@ -71,8 +81,7 @@ def post_process(output): def create_dataloader(data_dir, batch_size, *args, **kwargs): cifar10_dataset = CIFAR10DataSet(data_dir) - _, val_set = torch.utils.data.random_split(cifar10_dataset.val_dataset, [49000, 1000]) - return DataLoader(PytorchResNetDataset(val_set), batch_size=batch_size, drop_last=True) + return DataLoader(PytorchResNetDataset(cifar10_dataset.val_dataset), batch_size=batch_size, drop_last=True) # ------------------------------------------------------------------------- @@ -161,8 +170,7 @@ def create_qat_config(): def create_train_dataloader(data_dir, batchsize, *args, **kwargs): cifar10_dataset = CIFAR10DataSet(data_dir) - train_dataset, _ = torch.utils.data.random_split(cifar10_dataset.train_dataset, [40000, 10000]) - return DataLoader(PytorchResNetDataset(train_dataset), batch_size=batchsize, drop_last=True) + return DataLoader(PytorchResNetDataset(cifar10_dataset.train_dataset), batch_size=batchsize, drop_last=True) # ------------------------------------------------------------------------- From d497451eefc7fb2a76df504fb2f85a6f9b9d1039 Mon Sep 17 00:00:00 2001 From: jiapli Date: Wed, 18 Oct 2023 12:11:26 +0800 Subject: [PATCH 2/6] Use train datset and sampler 500 for calibration --- examples/resnet/user_script.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index a379c82ac6..2cae16a593 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -92,11 +92,17 @@ def create_dataloader(data_dir, batch_size, *args, **kwargs): class ResnetCalibrationDataReader(CalibrationDataReader): def __init__(self, data_dir: str, batch_size: int = 16): super().__init__() - self.iterator = iter(create_dataloader(data_dir, batch_size)) + self.iterator = iter(create_train_dataloader(data_dir, batch_size)) + self.sample_counter = 500 def get_next(self) -> dict: + if self.sample_counter <= 0: + return None + try: - return {"input": next(self.iterator)[0].numpy()} + item = {"input": next(self.iterator)[0].numpy()} + self.sample_counter -= 1 + return item except Exception: return None From 2a8247c2101abded2a2c4e19cdf386692887fbb2 Mon Sep 17 00:00:00 2001 From: jiapli Date: Wed, 18 Oct 2023 13:00:27 +0800 Subject: [PATCH 3/6] Use seed for reproducibility --- examples/resnet/resnet_ptq_cpu.json | 1 + examples/resnet/user_script.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/resnet/resnet_ptq_cpu.json b/examples/resnet/resnet_ptq_cpu.json index cf285151be..35a6077fc1 100644 --- a/examples/resnet/resnet_ptq_cpu.json +++ b/examples/resnet/resnet_ptq_cpu.json @@ -90,6 +90,7 @@ "evaluator": "common_evaluator", "execution_providers": ["CPUExecutionProvider"], "cache_dir": "cache", + "clean_cache": true, "output_dir": "models/resnet_ptq_cpu" } } diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index 2cae16a593..3e758a8105 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -2,6 +2,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import random + +import numpy as np import torch import torchmetrics from onnxruntime.quantization.calibrate import CalibrationDataReader @@ -17,6 +20,14 @@ # Common Dataset # ------------------------------------------------------------------------- +seed = 0 + +np.random.seed(seed) +random.seed(seed) +torch.manual_seed(seed) +torch.random.manual_seed(seed) +torch.cuda.manual_seed(seed) + class CIFAR10DataSet: def __init__( @@ -34,11 +45,8 @@ def setup(self, stage: str): [ transforms.Pad(4), # use CenterCrop to keep consistent dataset - transforms.CenterCrop(32), - # Ban RandomHorizontalFlip and RandomCrop to keep consistent dataset - # for real case, we can use them to augment dataset - # transforms.RandomHorizontalFlip(), - # transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(), + transforms.RandomCrop(32), transforms.ToTensor(), ] ) @@ -51,7 +59,6 @@ def __init__(self, dataset): self.dataset = dataset def __len__(self): - # return 100 return len(self.dataset) def __getitem__(self, index): From 1518e5268a49a2eb64ca5888df2f35f48290d032 Mon Sep 17 00:00:00 2001 From: jiapli Date: Wed, 18 Oct 2023 13:37:45 +0800 Subject: [PATCH 4/6] remove clean cache --- examples/resnet/resnet_ptq_cpu.json | 1 - examples/resnet/user_script.py | 1 - 2 files changed, 2 deletions(-) diff --git a/examples/resnet/resnet_ptq_cpu.json b/examples/resnet/resnet_ptq_cpu.json index 35a6077fc1..cf285151be 100644 --- a/examples/resnet/resnet_ptq_cpu.json +++ b/examples/resnet/resnet_ptq_cpu.json @@ -90,7 +90,6 @@ "evaluator": "common_evaluator", "execution_providers": ["CPUExecutionProvider"], "cache_dir": "cache", - "clean_cache": true, "output_dir": "models/resnet_ptq_cpu" } } diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index 3e758a8105..74044c78c8 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -44,7 +44,6 @@ def setup(self, stage: str): transform = transforms.Compose( [ transforms.Pad(4), - # use CenterCrop to keep consistent dataset transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor(), From d49184a611dcb22a02ba54d9413cd2d37119aff0 Mon Sep 17 00:00:00 2001 From: jiapli Date: Wed, 18 Oct 2023 16:59:51 +0800 Subject: [PATCH 5/6] set seed --- examples/resnet/prepare_model_data.py | 9 +++++---- examples/resnet/user_script.py | 8 +++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/resnet/prepare_model_data.py b/examples/resnet/prepare_model_data.py index 87581ee00e..1496148e1e 100644 --- a/examples/resnet/prepare_model_data.py +++ b/examples/resnet/prepare_model_data.py @@ -56,12 +56,13 @@ def update_lr(optimizer, lr): def prepare_model(num_epochs=1, models_dir="models", data_dir="data"): + seed = 0 # seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html - random.seed(0) - np.random.seed(0) - torch.manual_seed(0) + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) # the following are needed only for GPU - torch.cuda.manual_seed(0) + torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index 74044c78c8..82c1b03973 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -21,12 +21,14 @@ # ------------------------------------------------------------------------- seed = 0 - -np.random.seed(seed) +# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html random.seed(seed) +np.random.seed(seed) torch.manual_seed(seed) -torch.random.manual_seed(seed) +# the following are needed only for GPU torch.cuda.manual_seed(seed) +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False class CIFAR10DataSet: From 24bd3bb2faa8f08a8021dd9c5a65b74e5df91c1c Mon Sep 17 00:00:00 2001 From: jiapli Date: Wed, 18 Oct 2023 21:22:52 +0800 Subject: [PATCH 6/6] fix --- examples/resnet/user_script.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/resnet/user_script.py b/examples/resnet/user_script.py index 82c1b03973..6278c9f5d2 100644 --- a/examples/resnet/user_script.py +++ b/examples/resnet/user_script.py @@ -2,9 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -import random -import numpy as np import torch import torchmetrics from onnxruntime.quantization.calibrate import CalibrationDataReader @@ -22,8 +20,7 @@ seed = 0 # seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html -random.seed(seed) -np.random.seed(seed) +# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict torch.manual_seed(seed) # the following are needed only for GPU torch.cuda.manual_seed(seed)