In [1]:
import os
import argparse
import datetime
import time
import csv
import pandas as pd
import importlib

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torch.multiprocessing as mp
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader

from models import gan
from models.models import classifier32, classifier32ABN
from datasets.osr_dataloader import MNIST_OSR, CIFAR10_OSR, CIFAR100_OSR, SVHN_OSR, Tiny_ImageNet_OSR, VialyticsTrafficSigns_OSR
from utils import Logger, save_networks, load_networks
from core import train, train_cs, test
from class_grouping_traffic_signs import get_group, get_label_index

In [2]:
empty_training_classes = ['Vorgeschriebene_Fahrtrichtung_hier_links_211-10'] # folders, that do not contain training images
training_classes_with_subfolders = ['Verbot_fuer_Fahrzeuge_ueber_angegebene_tatsaechliche_Hoehe_265', 'Verbot_fuer_Fahrzeuge_ueber_angegebenes_tatsaechliches_Gewicht_262',
                                    'Verbot_fuer_Fahrzeuge_und_Zuege_ueber_angegebene_tatsaechliche_Laenge_266', 'Zusatzzeichen_weiss']


empty_test_classes = ['Sackgasse_fuer_Radverkehr_durchlaessig_357-52']  # folders, that do not contain test images
test_classes_with_subfolders = ['Verkehrszeichen_auf_anderem_Schild', 'Verbot_fuer_Fahrzeuge_ueber_angegebene_tatsaechliche_Breite_264']

In [3]:
options = {
    # Dataset
    'dataset': 'vialytics_traffic_signs',
    'dataroot': '/home/ec2-user/SageMaker/Traffic_Sign_Dataset_vialytics_and_GTSRB_2022_07_11',
    'num_classes': len(get_label_index(include_background_training_classes=False, include_unknown_classes=False)),
    'outf': '/home/ec2-user/SageMaker/ARPL/log',
    'out-num': 50, # help='For CIFAR100'

    # optimization
    'img-size': 128,
    'batch-size': 64,
    'lr': 0.1,
    'gan-lr': 0.0002,
    'max_epoch': 100,
    'stepsize': 30,
    'temp': 1.0,
    'num-centers': 1,

    # model
    'weight_pl': 0.1, #help="weight for center loss"
    'beta': 0.1, #help="weight for entropy loss"
    'model': 'classifier32',

    # misc
    'nz': 100,
    'ns': 1,
    'eval_freq': 1,
    'print_freq': 100,
    'gpu': '0',
    'use_cpu': False,
    'seed': 0,
    #parser.add_argument('--use-cpu', action='store_true')
    'save_dir': '../log',
    'loss': 'ARPLoss',
    'eval': False, # parser.add_argument('--eval', action='store_true', help="Eval", default=False)
    'cs': False #parser.add_argument('--cs', action='store_true', help="Confusing Sample", default=False)
}



In [4]:
dir_name = '{}_{}'.format(options['model'], options['loss'])
dir_path = os.path.join(options['outf'], 'results', dir_name)
if not os.path.exists(dir_path):
    os.makedirs(dir_path)

In [5]:
torch.manual_seed(options['seed'])
os.environ['CUDA_VISIBLE_DEVICES'] = options['gpu']
use_gpu = torch.cuda.is_available()
if options['use_cpu']: use_gpu = False
options['use_gpu'] = use_gpu
    
if use_gpu:
    print("Currently using GPU: {}".format(options['gpu']))
    cudnn.benchmark = True
    torch.cuda.manual_seed_all(options['seed'])
else:
    print("Currently using CPU")

Currently using GPU: 0


In [6]:
use_gpu

True

In [13]:
pin_memory = True if use_gpu else False
num_workers=4

traindataset = VialyticsTrafficSigns_OSR(dataroot=options['dataroot'], data_loader_type='train', empty_classes=empty_training_classes, 
                                         classes_with_subfolders=training_classes_with_subfolders, img_size=options['img-size'])
trainloader = DataLoader(traindataset, batch_size=options['batch-size'], shuffle=True, num_workers=num_workers, pin_memory=pin_memory)

testdataset = VialyticsTrafficSigns_OSR(dataroot=options['dataroot'], data_loader_type='test', empty_classes=[], classes_with_subfolders=[], img_size=options['img-size'])
testloader = DataLoader(testdataset, batch_size=options['batch-size'], shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

outdataset = VialyticsTrafficSigns_OSR(dataroot=options['dataroot'], data_loader_type='out', empty_classes=empty_test_classes, classes_with_subfolders=test_classes_with_subfolders, 
                                       img_size=options['img-size'])
outloader = DataLoader(testdataset, batch_size=options['batch-size'], shuffle=False, num_workers=num_workers, pin_memory=pin_memory)


In [14]:
# for imgs, labels in trainloader:
#     print(imgs)
#     print(labels)
#     break

In [15]:
# Model
print("Creating model: {}".format(options['model']))
if options['cs']:
    net = classifier32ABN(num_classes=options['num_classes'])
else:
    net = classifier32(num_classes=options['num_classes'])
feat_dim = 128

if options['cs']:
    print("Creating GAN")
    nz, ns = options['nz'], 1
    if 'tiny_imagenet' in options['dataset']:
        netG = gan.Generator(1, nz, 64, 3)
        netD = gan.Discriminator(1, 3, 64)
    else:
        netG = gan.Generator32(1, nz, 64, 3)
        netD = gan.Discriminator32(1, 3, 64)
    fixed_noise = torch.FloatTensor(64, nz, 1, 1).normal_(0, 1)
    criterionD = nn.BCELoss()

Creating model: classifier32


In [16]:
# Loss
options.update(
    {
        'feat_dim': feat_dim,
        'use_gpu':  use_gpu
    }
)

Loss = importlib.import_module('loss.'+options['loss'])
criterion = getattr(Loss, options['loss'])(**options)

if use_gpu:
    net = nn.DataParallel(net).cuda()
    criterion = criterion.cuda()
    if options['cs']:
        netG = nn.DataParallel(netG, device_ids=[i for i in range(len(options['gpu'].split(',')))]).cuda()
        netD = nn.DataParallel(netD, device_ids=[i for i in range(len(options['gpu'].split(',')))]).cuda()
        fixed_noise.cuda()

model_path = os.path.join(options['outf'], 'models', options['dataset'])
if not os.path.exists(model_path):
    os.makedirs(model_path)

In [17]:
file_name = '{}_{}_{}'.format(options['model'], options['loss'], options['cs'])

params_list = [{'params': net.parameters()},
            {'params': criterion.parameters()}]

if options['dataset'] in ['tiny_imagenet', 'vialytics_traffic_signs']:
    optimizer = torch.optim.Adam(params_list, lr=options['lr'])
else:
    optimizer = torch.optim.SGD(params_list, lr=options['lr'], momentum=0.9, weight_decay=1e-4)
if options['cs']:
    optimizerD = torch.optim.Adam(netD.parameters(), lr=options['gan_lr'], betas=(0.5, 0.999))
    optimizerG = torch.optim.Adam(netG.parameters(), lr=options['gan_lr'], betas=(0.5, 0.999))

if options['stepsize'] > 0:
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,90,120])

In [None]:
training_progress = open(os.path.join(options['outf'], 'training_progress.txt'), 'w')

start_time = time.time()

for epoch in range(options['max_epoch']):
    print("==> Epoch {}/{}".format(epoch+1, options['max_epoch']))
    # paste training progress to file, because sagemaker often looses server connection, but still trains in the background
    with open(os.path.join(options['outf'], 'training_progress.txt'), 'a') as f:
        f.write("\n")
        f.write(f"==> Epoch {epoch+1}/{options['max_epoch']}")

    if options['cs']:
        train_cs(net, netD, netG, criterion, criterionD,
            optimizer, optimizerD, optimizerG,
            trainloader, epoch=epoch, **options)

    train(net, criterion, optimizer, trainloader, epoch=epoch, **options)

    if options['eval_freq'] > 0 and (epoch+1) % options['eval_freq'] == 0 or (epoch+1) == options['max_epoch']:
        print("==> Test", options['loss'])
        # paste training progress to file, because sagemaker often looses server connection, but still trains in the background
#         with open(os.path.join(options['outf'], 'training_progress.txt'), 'a') as f:
#             f.write("\n")
#             f.write(f"==> Test, {options['loss']}")
            
    
        results = test(net, criterion, testloader, outloader, epoch=epoch, **options)
        print("Acc (%): {:.3f}\t AUROC (%): {:.3f}\t OSCR (%): {:.3f}\t".format(results['ACC'], results['AUROC'], results['OSCR']))
        
        # paste training progress to file, because sagemaker often looses server connection, but still trains in the background
        with open(os.path.join(options['outf'], 'training_progress.txt'), 'a') as f:
            f.write("\n")
            f.write("Acc (%): {:.3f}\t AUROC (%): {:.3f}\t OSCR (%): {:.3f}\t".format(results['ACC'], results['AUROC'], results['OSCR']))

        save_networks(net, model_path, str(epoch)+'_'+str(round(results['ACC'], 3)), criterion=criterion)

    if options['stepsize'] > 0: scheduler.step()

elapsed = round(time.time() - start_time)
elapsed = str(datetime.timedelta(seconds=elapsed))
print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
# paste training progress to file, because sagemaker often looses server connection, but still trains in the background
with open(os.path.join(options['outf'], 'training_progress.txt'), 'a') as f:
    f.write("\n")
    f.write("Finished. Total elapsed time (h:m:s): {}".format(elapsed))

df = pd.DataFrame(results)
df.to_csv(os.path.join(dir_path, file_name))

==> Epoch 1/100
Batch 100/1482	 Loss 3.778719 (3.702163)
Batch 200/1482	 Loss 3.213026 (3.547456)
Batch 300/1482	 Loss 2.981953 (3.453630)
Batch 400/1482	 Loss 3.316528 (3.389215)
Batch 500/1482	 Loss 3.193722 (3.336328)
Batch 600/1482	 Loss 3.312529 (3.293527)
Batch 700/1482	 Loss 3.234801 (3.243996)
Batch 800/1482	 Loss 2.977823 (3.196497)
Batch 900/1482	 Loss 2.925632 (3.153650)
Batch 1000/1482	 Loss 2.685591 (3.116610)
Batch 1100/1482	 Loss 2.328896 (3.081428)
Batch 1200/1482	 Loss 2.038402 (3.040014)
Batch 1300/1482	 Loss 2.549214 (3.000491)
Batch 1400/1482	 Loss 2.408072 (2.963069)
==> Test ARPLoss
Acc: 40.32227
       TNR    AUROC  DTACC  AUIN   AUOUT 
Bas     4.987 49.995 50.000 49.987 49.987
Acc (%): 40.322	 AUROC (%): 49.995	 OSCR (%): 26.467	
==> Epoch 2/100
Batch 100/1482	 Loss 2.321947 (2.359645)
Batch 200/1482	 Loss 2.184645 (2.343234)
Batch 300/1482	 Loss 1.633160 (2.250832)
Batch 400/1482	 Loss 1.811486 (2.184373)
Batch 500/1482	 Loss 1.789421 (2.098077)
Batch 600/1482	

In [21]:
!zip -r '/home/ec2-user/SageMaker/ARPL/ARPL_models_1.zip' '/home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/'

updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/ (stored 0%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/96_98.169_.pth (deflated 7%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/53_97.988_.pth (deflated 7%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/10_74.733__criterion.pth (deflated 11%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/83_98.198_.pth (deflated 7%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/4_77.136_.pth (deflated 7%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/98_98.16__criterion.pth (deflated 15%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traffic_signs/checkpoints/13_92.553__criterion.pth (deflated 12%)
updating: home/ec2-user/SageMaker/ARPL/log/models/vialytics_traff

# Only evaluation

In [None]:
net, criterion = load_networks(net, model_path, file_name, criterion=criterion)
results = test(net, criterion, testloader, outloader, epoch=0, **options)
print("Acc (%): {:.3f}\t AUROC (%): {:.3f}\t OSCR (%): {:.3f}\t".format(results['ACC'], results['AUROC'], results['OSCR']))