In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] =  '0' #'3,2,1,0'
import sys
sys.path.append("..")
from common import *
from data   import *
from imgaug import augmenters as iaa


##----------------------------------------
from model_seresnext50 import *

TkAgg
@common.py:  
	set random seed
		SEED=35202
	set cuda environment
		torch.__version__              = 0.4.1
		torch.version.cuda             = 9.0
		torch.backends.cudnn.version() = 7005
		os['CUDA_VISIBLE_DEVICES']     = 0
		torch.cuda.device_count()      = 1



In [2]:

def test_augment(drawing,label,index, augment):
    #augment is an array with 5 elements: like [prob_flip, top, right, bottom, left]
    flip_prob,top_percent,right_percent,bottom_percent,left_percent = \
    augment[0],augment[1],augment[2],augment[3],augment[4]
    seq = iaa.Sequential([
     iaa.Fliplr(flip_prob),
    iaa.Crop(percent=(top_percent, right_percent, \
                      bottom_percent, left_percent), keep_size=True)
    ])
    image = drawing_to_image_with_color_aug(drawing, 96, 96, seq)
    return image, label, None




##############################################################################################

#generate prediction npy_file
def make_npy_file_from_model(checkpoint, mode, split, augment, out_test_dir, npy_file):

    ## setup  -----------------
    # os.makedirs(out_test_dir +'/backup', exist_ok=True)
    # backup_project_as_zip(PROJECT_PATH, out_dir +'/backup/code.test.%s.zip'%IDENTIFIER)

    log = Logger()
    log.open(out_test_dir +'/log.submit.txt',mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))
    log.write('\tSEED         = %u\n' % SEED)
    log.write('\tPROJECT_PATH = %s\n' % PROJECT_PATH)
    log.write('\tout_test_dir = %s\n' % out_test_dir)
    log.write('\n')


    ## dataset ----------------------------------------
    log.write('** dataset setting **\n')
    batch_size  = 512 #256 #512

    test_dataset = DoodleDataset(mode, split,
                              lambda drawing, label, index : test_augment(drawing, label, index, augment),)
    test_loader  = DataLoader(
                        test_dataset,
                        sampler     = SequentialSampler(test_dataset),
                        batch_size  = batch_size,
                        drop_last   = False,
                        pin_memory  = True,
                        collate_fn  = null_collate)

    assert(len(test_dataset)>=batch_size)
    log.write('test_dataset : \n%s\n'%(test_dataset))
    log.write('\n')


    ## net ----------------------------------------
    log.write('** net setting **\n')
    net = Net().cuda()

    log.write('%s\n\n'%(type(net)))
    log.write('\n')



    if 1:
        log.write('\tcheckpoint = %s\n' % checkpoint)
        net.load_state_dict(torch.load(checkpoint, map_location=lambda storage, loc: storage))


        ####### start here ##########################
        criterion = softmax_cross_entropy_criterion
        test_num  = 0
        probs    = []
        truths   = []
        losses   = []
        corrects = []

        net.set_mode('test')
        for input, truth, cache in test_loader:
            print('\r\t',test_num, end='', flush=True)
            test_num += len(truth)

            with torch.no_grad():
                input = input.cuda()
                logit = data_parallel(net,input)
                prob  = F.softmax(logit,1)
                probs.append(prob.data.cpu().numpy())


                if mode=='train': # debug only
                    truth = truth.cuda()
                    loss    = criterion(logit, truth, False)
                    correct = metric(logit, truth, False)

                    losses.append(loss.data.cpu().numpy())
                    corrects.append(correct.data.cpu().numpy())
                    truths.append(truth.data.cpu().numpy())


        assert(test_num == len(test_loader.sampler))
        print('\r\t',test_num, end='\n', flush=True)
        prob = np.concatenate(probs)

        if mode=='train': # debug only
            correct = np.concatenate(corrects)
            truth   = np.concatenate(truths).astype(np.int32).reshape(-1,1)
            loss    = np.concatenate(losses)
            loss    = loss.mean()
            correct = correct.mean(0)
            top = [correct[0], correct[0]+correct[1], correct[0]+correct[1]+correct[2]]
            precision = correct[0]/1 + correct[1]/2 + correct[2]/3
            print('top      ', top)
            print('precision', precision)
            print('')
    #-------------------------------------------


    np.save(npy_file, np_float32_to_uint8(prob))
    print(prob.shape)
    log.write('\n')






def prob_to_csv(prob, key_id, csv_file):
    top = np.argsort(-prob,1)[:,:3]
    word = []
    for (t0,t1,t2) in top:
        word.append(
            CLASS_NAME[t0] + ' ' + \
            CLASS_NAME[t1] + ' ' + \
            CLASS_NAME[t2]
        )
    df = pd.DataFrame({ 'key_id' : key_id , 'word' : word}).astype(str)
    df.to_csv(csv_file, index=False, columns=['key_id', 'word'], compression='gzip')



def npy_file_to_sbmit_csv(mode, split, npy_file, csv_file):
    print('NUM_CLASS', NUM_CLASS)
    complexity='simplified'

    if mode=='train':
        raise NotImplementedError

    if mode=='test':
        assert(NUM_CLASS==340)
        global TEST_DF

        if TEST_DF == []:
            TEST_DF = pd.read_csv(DATA_DIR + '/csv/test_%s.csv'%(complexity))
        key_id = TEST_DF['key_id'].values


    prob = np_uint8_to_float32(np.load(npy_file))
    print(prob.shape)

    prob_to_csv(prob, key_id, csv_file)

### TTA setting

In [3]:
mode  = 'test' #'train'
configures =[
     Struct(
        split        = '<NIL>', #'valid_0', #
        out_test_dir = '../split/test',
        checkpoint   = '../../output/backup/887_crop.pth',
     ),
]
#5 elements in aug list: 1st -- flip, 2nd to 5th -- top, right, bottom, left crop percent
augments = []
for flip_prob in [0,1]:
    for top in [0, 0.1]:
        for right in [0, 0.1]:
            augments.append([flip_prob, top, right, 0.1-top, 0.1-right])
    augments.append([flip_prob, 0.05, 0.05, 0.05, 0.05])

In [5]:
step = 0
for configure in configures:
    split        = configure.split
    out_test_dir = configure.out_test_dir
    checkpoint   = configure.checkpoint
    
    for augment in augments:
        print('produce augment: ', step)
        npy_file = out_test_dir + '/%s-%s.prob.uint8.npy'%(mode,str(step))
        make_npy_file_from_model(checkpoint, mode, split, augment, out_test_dir, npy_file)
        step += 1

produce augment:  0

--- [START 2018-11-15_22-56-53] ----------------------------------------------------------------

	SEED         = 35202
	PROJECT_PATH = C:\Users\Admin\Desktop\doodle\code
	out_test_dir = ../split/test

** dataset setting **

test_dataset : 
	split        = <NIL>
	mode         = test
	complexity   = simplified
	len(self.id) = 112199



** net setting **
Load imagenet pretrained SERESNEXT50
<class 'model_seresnext50.Net'>


	checkpoint = ../../output/backup/887_crop.pth
	 0 512 1024 1536 2048 2560 3072 3584 4096 4608 5120 5632 6144 6656 7168 7680 8192 8704 9216 9728 10240 10752 11264 11776 12288 12800 13312 13824 14336 14848 15360 15872 16384 16896 17408 17920 18432 18944 19456 19968 20480 20992 21504 22016 22528 23040 23552 24064 24576 25088 25600 26112 26624 27136 27648 28160 28672 29184 29696 30208 30720 31232 31744 32256 32768 33280 33792 34304 34816 35328 35840 36352 36864 37376 37888 38400 38912 39424 39936 40448 40960 41472 41984 42496 43008 43520 44032 44544 

	 0 512 1024 1536 2048 2560 3072 3584 4096 4608 5120 5632 6144 6656 7168 7680 8192 8704 9216 9728 10240 10752 11264 11776 12288 12800 13312 13824 14336 14848 15360 15872 16384 16896 17408 17920 18432 18944 19456 19968 20480 20992 21504 22016 22528 23040 23552 24064 24576 25088 25600 26112 26624 27136 27648 28160 28672 29184 29696 30208 30720 3123231744 32256 32768 33280 33792 34304 34816 35328 35840 36352 36864 37376 37888 38400 38912 39424 39936 40448 40960 41472 41984 42496 43008 43520 44032 44544 45056 45568 46080 46592 47104 47616 48128 48640 49152 49664 50176 50688 51200 51712 52224 52736 53248 53760 54272 54784 55296 55808 56320 56832 57344 57856 58368 58880 59392 59904 60416 60928 61440 61952 62464 62976 63488 64000 64512 65024 65536 66048 66560 67072 67584 68096 68608 69120 69632 70144 70656 71168 71680 72192 72704 73216 73728 74240 74752 75264 75776 76288 76800 77312 77824 78336 78848 79360 79872 80384 80896 81408 81920 82432 82944 83456 83968 84480 84992 85504 86016 86528 870

	 0 512 1024 1536 2048 2560 3072 3584 4096 4608 5120 5632 6144 6656 7168 7680 8192 8704 9216 9728 10240 10752 11264 11776 12288 12800 13312 13824 14336 14848 15360 15872 16384 16896 17408 17920 18432 18944 19456 19968 20480 20992 21504 22016 22528 23040 23552 24064 24576 25088 25600 26112 26624 27136 27648 28160 28672 29184 29696 30208 30720 31232 31744 32256 32768 33280 33792 34304 34816 35328 35840 36352 36864 37376 37888 38400 38912 39424 39936 40448 40960 41472 41984 42496 43008 43520 44032 44544 45056 45568 46080 46592 47104 47616 48128 48640 49152 49664 50176 50688 51200 51712 52224 52736 53248 53760 54272 54784 55296 55808 56320 56832 57344 57856 58368 58880 59392 59904 60416 60928 61440 61952 62464 62976 63488 64000 64512 65024 65536 66048 66560 67072 67584 68096 68608 69120 69632 70144 70656 71168 71680 72192 72704 73216 73728 74240 74752 75264 75776 76288 76800 77312 77824 78336 78848 79360 79872 80384 80896 81408 81920 82432 82944 83456 83968 84480 84992 85504 86016 86528 87

### Read all TTA related npy files

In [58]:
result = np.load(out_test_dir + '/%s-%s.prob.uint8.npy'%(mode,str(0)))
result = result.astype('float16')
for t in range(1,len(augments)):
    result_add = np.load(out_test_dir + '/%s-%s.prob.uint8.npy'%(mode,str(t)))
    result_add = result_add.astype('float16')
    result += result_add
result = result/10

### Output results

In [64]:
TEST_DF = pd.read_csv(DATA_DIR + '/csv/test_%s.csv'%('simplified'))
key_id = TEST_DF['key_id'].values
prob = np_uint8_to_float32(result)
prob_to_csv(prob, key_id, out_test_dir + '/%s-%s.submit_887.csv.gz'%(mode,'tta'))