检查 gap 产生的原因

ERM：权重加载问题



In [1]:
import argparse
import datetime
import glob
from pathlib import Path
# from test import repeat_eval_ckpt, eval_single_ckpt
# from noise import add_noise_to_weights

import numba
import logging
import os

import torch
import torch.distributed as dist
import torch.nn as nn

from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file
from pcdet.datasets import build_dataloader
from pcdet.models import build_network, model_fn_decorator
from pcdet.utils import common_utils
from train_utils.optimization import build_optimizer, build_scheduler
from train_utils.train_utils import train_model, model_save
from eval_utils import eval_utils
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

from hardware_noise.weight_mapping_finalv import weight_mapping as add_noise_to_weights


# from datetime import datetime
print(datetime.datetime.now())

def parse_config():
    parser = argparse.ArgumentParser(description='arg parser')
    parser.add_argument('--cfg_file', type=str, default='cfgs/kitti_models/pointpillar.yaml', \
                        help='specify the config for training')

    parser.add_argument('--batch_size', type=int, default=None, required=False, help='batch size for training')
    parser.add_argument('--epochs', type=int, default=None, required=False, help='number of epochs to train for')
    parser.add_argument('--workers', type=int, default=32, help='number of workers for dataloader')
    parser.add_argument('--extra_tag', type=str, default='default', help='extra tag for this experiment')
    parser.add_argument('--ckpt', type=str, default='checkpoint_epoch_80.pth', \
                        help='checkpoint to start from')
    parser.add_argument('--pretrained_model', type=str, default=None, help='pretrained_model')
    parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm'], default='none')
    parser.add_argument('--tcp_port', type=int, default=29051, help='tcp port for distrbuted training')
    parser.add_argument('--sync_bn', action='store_true', default=False, help='whether to use sync bn')
    parser.add_argument('--fix_random_seed', action='store_true', default=True, help='')
    parser.add_argument('--ckpt_save_interval', type=int, default=1, help='number of training epochs')
    parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training')
    parser.add_argument('--max_ckpt_save_num', type=int, default=30, help='max number of saved checkpoint')
    parser.add_argument('--merge_all_iters_to_one_epoch', action='store_true', default=False, help='')
    parser.add_argument('--set', dest='set_cfgs', default=None, nargs=argparse.REMAINDER,
                        help='set extra config keys if needed')

    parser.add_argument('--max_waiting_mins', type=int, default=0, help='max waiting minutes')
    parser.add_argument('--start_epoch', type=int, default=0, help='')
    parser.add_argument('--save_to_file', action='store_true', default=False, help='')
    
    # parser.add_argument('--local_rank', type=int, default=-1, metavar='N', help='Local process rank.')

    args = parser.parse_known_args()[0]

    cfg_from_yaml_file(args.cfg_file, cfg)
    cfg.TAG = Path(args.cfg_file).stem
    cfg.EXP_GROUP_PATH = '/'.join(args.cfg_file.split('/')[1:-1])  # remove 'cfgs' and 'xxxx.yaml'

    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs, cfg)

    return args, cfg


args, cfg = parse_config()



if args.launcher == 'none':
    dist_train = False  # True
    total_gpus = 1
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp')
    memory_gpu = [int(x.split()[2]) for x in open('tmp', 'r').readlines()]
    print('Using GPU:' + str(np.argmax(memory_gpu)))
    os.environ["CUDA_VISIBLE_DEVICES"] = str(np.argmax(memory_gpu))
    os.system('rm tmp')
else:
    args.is_master = args.local_rank == 0
    args.device = torch.cuda.device(args.local_rank)
    torch.cuda.manual_seed_all(666)
    # dist.init_process_group(backend='nccl', init_method='env://', rank = 0, world_size = 1)
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '5678'
    # dist.init_process_group(backend='nccl', init_method='env://', rank = 0, world_size = 1)
    torch.cuda.set_device(1)
    device = torch.device('cuda', cfg.LOCAL_RANK)
    total_gpus, cfg.LOCAL_RANK = getattr(common_utils, 'init_dist_%s' % args.launcher)(
        args.tcp_port, args.local_rank, backend='nccl'
    )
    dist_train = True


if args.batch_size is None:
    args.batch_size = cfg.OPTIMIZATION.BATCH_SIZE_PER_GPU
else:
    assert args.batch_size % total_gpus == 0, 'Batch size should match the number of gpus'
    args.batch_size = args.batch_size // total_gpus

args.epochs = cfg.OPTIMIZATION.NUM_EPOCHS if args.epochs is None else args.epochs

if args.fix_random_seed:
    common_utils.set_random_seed(666)

output_dir = cfg.ROOT_DIR / 'output' / cfg.EXP_GROUP_PATH / cfg.TAG / args.extra_tag
ckpt_dir = output_dir / 'ckpt'
output_dir.mkdir(parents=True, exist_ok=True)
ckpt_dir.mkdir(parents=True, exist_ok=True)

log_file = output_dir / ('log_train_%s.txt' % datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
logger = common_utils.create_logger('./baseline/pointpillar/log.txt', rank=cfg.LOCAL_RANK)

file = open('./baseline/pointpillar/result.txt','w')
file.write('results\n')
file.close()




'''Test on Noises'''  
test_set, test_loader, sampler = build_dataloader(
                                dataset_cfg=cfg.DATA_CONFIG,
                                class_names=cfg.CLASS_NAMES,
                                batch_size=args.batch_size,
                                dist=dist_train, workers=args.workers, logger=logger, training=False
                            )


model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=test_set)
model.cuda()


logger.info('**********************Start evaluation %s/%s(%s)**********************' %
            (cfg.EXP_GROUP_PATH, cfg.TAG, args.extra_tag))


eval_output_dir = output_dir / 'eval' / 'eval_with_train'
eval_output_dir.mkdir(parents=True, exist_ok=True)
args.start_epoch = max(args.epochs - 10, 0)  # Only evaluate the last 10 epochs



logger.info('----------------Noise Experiment----------------')

save_path = './save_path/'
f = open(save_path+'3d.txt', "a+")
f.write(str(datetime.datetime.now())+'\n')
f.close()

with open(save_path+'3d.txt', "a+") as f:
    f.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
                                        'file', 'usability', 'sigma', 'n', 'mthd',
                                        'Car/easy_R40',
                                        'Car/moderate_R40',
                                        'Car/hard_R40',
                                        'Pedestrian/easy_R40',
                                        'Pedestrian/moderate_R40',
                                        'Pedestrian/hard_R40',
                                        'Cyclist/easy_R40',
                                        'Cyclist/moderate_R40',
                                        'Cyclist/hard_R40',
                                        'easy_R40',
                                        'moderate_R40',
                                        'hard_R40',
                                        'avg', 'time'
                                            ))

hw_data_files = os.listdir('./hardware_noise/hardware_data/')

file2usability = np.load('./hardware_noise/file2usability.npy', allow_pickle=True).item()
file2sigma = np.load('./hardware_noise/file2sigma.npy', allow_pickle=True).item()

N = 1
file2ap_dict = {}

for sigma, f_name in sorted(zip(file2sigma.values(), file2sigma.keys())):
    print(sigma, f_name)
    usability = file2usability[f_name]
    if f_name.endswith('xlsx'):
        file2ap_dict[f_name] = {}
        print(f_name)
         
        usability = file2usability[f_name]
        sigma = file2sigma[f_name]

        for n in range(N):
            print('file:{}, evaluate-{}'.format(f_name, n))
            model.load_params_from_file(filename=args.ckpt, to_cpu=dist, logger=logger)
            # add_noise_to_weights('./hardware_noise/hardware_data/'+f_name, model, device='cuda')


            # acc1 = eval_utils.eval_simple(p1, p2, sigma, n, cfg, model, test_loader, logger, save_path, dist_test=dist_train, save_to_file=args.save_to_file, result_dir=eval_output_dir)
            acc1, ret_dict = eval_utils.eval_simple(f_name, usability, sigma, 
                            n, cfg, model, test_loader, logger, 
                            save_path, dist_test=False, 
                            save_to_file=False, result_dir=save_path)
            print(ret_dict)
            file2ap_dict[f_name][n] = ret_dict
    break

2023-07-20 19:06:39,153   INFO  Loading KITTI dataset
2023-07-20 19:06:39,248   INFO  Total samples for KITTI dataset: 3769


2023-07-20 19:06:39.092686
Using GPU:0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
2023-07-20 19:06:41,138   INFO  **********************Start evaluation kitti_models/pointpillar(default)**********************
2023-07-20 19:06:41,139   INFO  ----------------Noise Experiment----------------
2023-07-20 19:06:41,147   INFO  ==> Loading parameters from checkpoint checkpoint_epoch_80.pth to CPU
2023-07-20 19:06:41,220   INFO  ==> Checkpoint trained from version: pcdet+0.3.0+26a1612
2023-07-20 19:06:41,228   INFO  ==> Done (loaded 127/127)


1.3700022894417224e-16 I-V_data_1.5um_length_5nm_diameter_NA_third_etch_8min_Pb_ED_3h_180C_MAI_no_150nm_Ag_memory_6V_carbon_paste.xlsx
I-V_data_1.5um_length_5nm_diameter_NA_third_etch_8min_Pb_ED_3h_180C_MAI_no_150nm_Ag_memory_6V_carbon_paste.xlsx
file:I-V_data_1.5um_length_5nm_diameter_NA_third_etch_8min_Pb_ED_3h_180C_MAI_no_150nm_Ag_memory_6V_carbon_paste.xlsx, evaluate-0


eval: 100%|█████████▉| 942/943 [03:22<00:00, 25.34it/s]2023-07-20 19:10:03,521   INFO  Average predicted number of objects(3769 samples): 18.535
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
[1m
File "../pcdet/datasets/kitti/kitti_object_eval_python/eval.py", line 122:[0m
[1m@numba.jit(nopython=True, parallel=True)
[1mdef d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1):
[0m[1m^[0m[0m
[0m
eval: 100%|██████████| 943/943 [03:44<00:00,  4.19it/s]

{'recall/roi_0.3': 0.0, 'recall/rcnn_0.3': 0.0, 'recall/roi_0.5': 0.0, 'recall/rcnn_0.5': 0.0, 'recall/roi_0.7': 0.0, 'recall/rcnn_0.7': 0.0, 'Car_aos/easy_R40': 95.61578509443258, 'Car_aos/moderate_R40': 91.4136648038295, 'Car_aos/hard_R40': 88.50518907162345, 'Car_3d/easy_R40': 86.54110558325874, 'Car_3d/moderate_R40': 75.60897640192752, 'Car_3d/hard_R40': 72.41026926582, 'Car_bev/easy_R40': 92.0385706885058, 'Car_bev/moderate_R40': 87.63963301298523, 'Car_bev/hard_R40': 84.94691163363035, 'Car_image/easy_R40': 95.63621396636988, 'Car_image/moderate_R40': 91.6757184242311, 'Car_image/hard_R40': 88.91199399291182, 'Pedestrian_aos/easy_R40': 46.16667362559813, 'Pedestrian_aos/moderate_R40': 41.82001902404614, 'Pedestrian_aos/hard_R40': 39.12522735353639, 'Pedestrian_3d/easy_R40': 50.889023762933824, 'Pedestrian_3d/moderate_R40': 44.355223336549074, 'Pedestrian_3d/hard_R40': 39.59111380724358, 'Pedestrian_bev/easy_R40': 56.96358295424485, 'Pedestrian_bev/moderate_R40': 50.51955197481440




In [5]:
acc1

63.158854192496364

In [2]:
file2ap_dict

{'I-V_data_1.5um_length_5nm_diameter_NA_third_etch_8min_Pb_ED_3h_180C_MAI_no_150nm_Ag_memory_6V_carbon_paste.xlsx': {0: {'recall/roi_0.3': 0.0,
   'recall/rcnn_0.3': 0.0,
   'recall/roi_0.5': 0.0,
   'recall/rcnn_0.5': 0.0,
   'recall/roi_0.7': 0.0,
   'recall/rcnn_0.7': 0.0,
   'Car_aos/easy_R40': 95.61578509443258,
   'Car_aos/moderate_R40': 91.4136648038295,
   'Car_aos/hard_R40': 88.50518907162345,
   'Car_3d/easy_R40': 86.54110558325874,
   'Car_3d/moderate_R40': 75.60897640192752,
   'Car_3d/hard_R40': 72.41026926582,
   'Car_bev/easy_R40': 92.0385706885058,
   'Car_bev/moderate_R40': 87.63963301298523,
   'Car_bev/hard_R40': 84.94691163363035,
   'Car_image/easy_R40': 95.63621396636988,
   'Car_image/moderate_R40': 91.6757184242311,
   'Car_image/hard_R40': 88.91199399291182,
   'Pedestrian_aos/easy_R40': 46.16667362559813,
   'Pedestrian_aos/moderate_R40': 41.82001902404614,
   'Pedestrian_aos/hard_R40': 39.12522735353639,
   'Pedestrian_3d/easy_R40': 50.889023762933824,
   'Pe

In [3]:
for f_name in sorted(hw_data_files):
    print(f_name)

.ipynb_checkpoints
I-V_data_0.7um_length_200nm_diameter_NA_third_etch_10min_Pb_ED_1h_180C_MAI_no_100nm_Ag_memory_1V_carbon_paste.xlsx
I-V_data_0.7um_length_200nm_diameter_NA_third_etch_10min_Pb_ED_1h_180C_MAI_no_100nm_Ag_memory_1V_silver_paste.xlsx
I-V_data_0.7um_length_200nm_diameter_NA_third_etch_10min_Pb_ED_1h_180C_MAI_no_100nm_Ag_memory_5V_silver_paste.xlsx
I-V_data_0.7um_length_200nm_diameter_NA_third_etch_25min_Pb_ED_1h_180C_MAI_no_100nm_Ag_memory_8V_carbon_paste.xlsx
I-V_data_1.2um_length_200nm_diameter_NA_third_etch_15min_Pb_ED_10h_180+210C_FAI_PMMA_200nm_Ag_memory_6V_probe.xlsx
I-V_data_1.2um_length_200nm_diameter_NA_third_etch_15min_Pb_ED_10h_180+210C_FAI_PMMA_200nm_Ag_memory_6V_silver_paste.xlsx
I-V_data_1.2um_length_200nm_diameter_NA_third_etch_15min_Pb_ED_10h_180+210C_FAI_no_200nm_Ag_memory_6V_probe.xlsx
I-V_data_1.2um_length_200nm_diameter_NA_third_etch_15min_Pb_ED_10h_210C_MABr_PMMA_200nm_Ag_memory_6V_silver_paste.xlsx
I-V_data_1.2um_length_200nm_diameter_NA_third_etch_1

In [4]:
new_dict

NameError: name 'new_dict' is not defined