In [1]:
import numpy as np, pandas as pd
import os
import time

In [2]:
import glob
files = glob.glob('input/stage_2_images/*.jpg')
imageid = list(map(lambda x: x.split('/')[-1][:-4], files))
sub2 = pd.DataFrame({'image_id':imageid, 'labels':' '.join(['/m/01g317', '/m/05s2s'])})
sub2.head()

Unnamed: 0,image_id,labels
0,6e4c794d707761744877453d,/m/01g317 /m/05s2s
1,44683973366a3546784d773d,/m/01g317 /m/05s2s
2,45526a6d51764c4e4633383d,/m/01g317 /m/05s2s
3,66563932463636774a786b3d,/m/01g317 /m/05s2s
4,4845534e6a546a76704f383d,/m/01g317 /m/05s2s


In [3]:
sub2.to_csv('input/stage_2_sample_submission2.csv', index=False)

In [4]:
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))

In [5]:
import torch
import torchvision.models as models
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

In [6]:
import random
seed = 34
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed) 
torch.cuda.manual_seed_all(seed) 
torch.backends.cudnn.deterministic=True

In [7]:
from helpers import Imagefolder_inference as myImagefolder_inf

In [8]:
def load_checkpoint(load_path, model, optimizer=None, warmup=False):
    if os.path.isfile(load_path):
        print("-> Loading checkpoint '{}'".format(load_path))
        checkpoint = torch.load(load_path)
        epoch = checkpoint['epoch'] if not warmup else -1
        acc_valid = checkpoint['acc_valid']
        acc_train = checkpoint['acc_train']
        loss_valid = checkpoint['loss_valid']
        loss_train = checkpoint['loss_train']
        state_dict = checkpoint['state_dict']
        itrn_chkpt = checkpoint['step'] if not warmup else 0
        
        model.load_state_dict(state_dict)
#         from collections import OrderedDict
#         new_state_dict = OrderedDict()
#         for k, v in state_dict.items():
#             name = k[7:] # remove 'module.' of dataparallel
#             new_state_dict[name]=v
#         model.load_state_dict(new_state_dict)
            
        print("-> Loaded checkpoint at epoch {} step {} ".format(epoch, itrn_chkpt))
        if warmup:
            return epoch, acc_valid, acc_train, loss_train, loss_valid, itrn_chkpt
        else:
            if optimizer != None:
                optimizer.load_state_dict(checkpoint['optimizer'])
                for state in optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()
            return epoch-1, acc_valid, acc_train, loss_train, loss_valid, itrn_chkpt
    else:
        print("-> No checkpoint found at '{}'".format(load_path))
        return None

def labels_loop(list_class, label):
    for ilabel in label:
        for j in ilabel:
            list_class[j] += 1
    return list_class
    
class SoftF2Loss(torch.nn.Module):

    def __init__(self):
        super(SoftF2Loss,self).__init__()
        
    def forward(self, logits, labels):
        __small_value=1e-6
        beta = 2
        batch_size = logits.size()[0]
        p = torch.nn.functional.sigmoid(logits)
        l = labels
        num_pos = torch.sum(p, 1) + __small_value
        num_pos_hat = torch.sum(l, 1) + __small_value
        tp = torch.sum(l * p, 1)
        precise = tp / num_pos
        recall = tp / num_pos_hat
        fs = (1 + beta * beta) * precise * recall / (beta * beta * precise + recall + __small_value)
        loss = fs.sum() / batch_size
        return (1 - loss)
    
def add_to_result(result, valid, img, path, pred, labels_val):
    batch_size = pred.shape[0]
    for ii in range(batch_size):
        # result is dict, keys are classes, values are probs, pred is probs
        img.append(path[ii].split('/')[-1][:-4])
        for kk, p in enumerate(pred[ii]):
            result[kk].append(float(p)) # result is probs
        if labels_val is not None:
            for kk, l in enumerate(labels_val[ii]):
                valid[kk].append(int(l))

In [9]:
image_resize = (224,224)
batch_size_valid = 64*2
num_workers_valid = 12*2

chkpt = True
warmup = False
chkpt_file = 'accval-0.1413_lossval-56.2363_epoch-9_step-10440_checkpoint.pth'

In [10]:
img_transform_valid = transforms.Compose([transforms.Resize(image_resize),transforms.ToTensor()])

In [11]:
imgset_valid = myImagefolder_inf.DatasetFolder(root='input/stage_2_images/', label_file='input/stage_2_sample_submission2.csv', desc_file='input/label_hmn_mch_desc.csv', transform=img_transform_valid)
loader_valid = torch.utils.data.DataLoader(imgset_valid, batch_size=batch_size_valid, num_workers=num_workers_valid, shuffle=False)
n_class = len(imgset_valid.classes)

In [12]:
from senet import se_resnext101_32x4d
device = torch.device('cuda')
model = se_resnext101_32x4d(pretrained=None, num_classes=553 if (chkpt&warmup) else n_class, bn0=True)
model.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
model = nn.DataParallel(model)

In [13]:
# criterion = nn.BCEWithLogitsLoss().to(device)
criterion = SoftF2Loss().to(device)

In [14]:
if chkpt:
    epoch, acc_valid, acc_train, loss_valid, loss_train, itrn_chkpt = load_checkpoint('chkpt/'+chkpt_file, 
                                                   model, None, warmup=warmup)
    if warmup:
        model.module.last_linear = nn.Linear(model.module.last_linear.in_features, n_class).to(device)
    print(epoch, acc_valid, acc_train, loss_valid, loss_train, itrn_chkpt)

-> Loading checkpoint 'chkpt/accval-0.1413_lossval-56.2363_epoch-9_step-10440_checkpoint.pth'
-> Loaded checkpoint at epoch 8 step 10440 
7 0.14126008805833443 0.1410138576230247 tensor(119.0210, device='cuda:0', requires_grad=True) tensor(56.2363, device='cuda:0') 10440


In [16]:
model = model.to(device)
writer = SummaryWriter()

# Train the model
total_step = len(loader_valid)
# Evaluate validation
with torch.no_grad():
    correct_val = 0
    total_val = 0
    loss_valid = 0
    icnt_val = 0
    correct_class_val = torch.zeros(n_class)
    correct_class_prob_val = torch.zeros(n_class)
    total_class_val = torch.zeros(n_class)
    result = {}
    img = []
    for ii in range(n_class):
        result[ii] = []

    model.eval()
    t77 = time.time()
    for ival, (images_val, _, path) in enumerate(loader_valid):
        print("Val_Step:{}".format(ival))
        t7 = time.time()
        print("Loader:{}".format(t7-t77))
        images_val = images_val.to(device)
        outputs_val = model(images_val)
        probs_val = torch.nn.Sigmoid()(outputs_val).cpu()

        t8 = time.time()
        t77 = t8
        print("Inference:{}".format(t8-t7))
        
        pred = probs_val.data

        add_to_result(result, _, img, path, pred, None)


Val_Step:0
Loader:6.296557426452637
Inference:6.846416711807251
Val_Step:1
Loader:4.656213760375977
Inference:0.2714226245880127
Val_Step:2
Loader:5.1169281005859375
Inference:0.26366209983825684
Val_Step:3
Loader:5.0422279834747314
Inference:0.25800108909606934
Val_Step:4
Loader:4.90475058555603
Inference:0.5064373016357422
Val_Step:5
Loader:4.674800157546997
Inference:0.2530026435852051
Val_Step:6
Loader:4.805131196975708
Inference:0.2553749084472656
Val_Step:7
Loader:4.677137136459351
Inference:0.25246524810791016
Val_Step:8
Loader:4.757432460784912
Inference:0.25661802291870117
Val_Step:9
Loader:4.830447196960449
Inference:0.2548055648803711
Val_Step:10
Loader:4.9616029262542725
Inference:0.2722620964050293
Val_Step:11
Loader:4.776740074157715
Inference:0.25931572914123535
Val_Step:12
Loader:4.481338262557983
Inference:0.2522876262664795
Val_Step:13
Loader:4.82720160484314
Inference:0.2558867931365967
Val_Step:14
Loader:4.668629169464111
Inference:0.2531008720397949
Val_Step:15
Loa

Inference:0.24918103218078613
Val_Step:124
Loader:5.088399887084961
Inference:0.2693824768066406
Val_Step:125
Loader:5.097487449645996
Inference:0.25815892219543457
Val_Step:126
Loader:5.1825761795043945
Inference:0.25518012046813965
Val_Step:127
Loader:5.170257806777954
Inference:0.2548253536224365
Val_Step:128
Loader:5.360436677932739
Inference:0.2519054412841797
Val_Step:129
Loader:5.344872713088989
Inference:0.24959635734558105
Val_Step:130
Loader:5.70245361328125
Inference:0.25064992904663086
Val_Step:131
Loader:5.196382999420166
Inference:0.2515723705291748
Val_Step:132
Loader:5.138046979904175
Inference:0.25333404541015625
Val_Step:133
Loader:5.167848825454712
Inference:0.26823902130126953
Val_Step:134
Loader:5.186892032623291
Inference:0.24983525276184082
Val_Step:135
Loader:5.208659410476685
Inference:0.25077271461486816
Val_Step:136
Loader:5.240781545639038
Inference:0.2595536708831787
Val_Step:137
Loader:5.0702526569366455
Inference:0.24988484382629395
Val_Step:138
Loader:5.

Inference:0.2520151138305664
Val_Step:246
Loader:5.051320314407349
Inference:0.26018548011779785
Val_Step:247
Loader:5.220085382461548
Inference:0.2517220973968506
Val_Step:248
Loader:5.701671123504639
Inference:0.2532482147216797
Val_Step:249
Loader:5.13836669921875
Inference:0.25077080726623535
Val_Step:250
Loader:4.956829786300659
Inference:0.2507350444793701
Val_Step:251
Loader:5.085843086242676
Inference:0.26666712760925293
Val_Step:252
Loader:5.097516298294067
Inference:0.25754523277282715
Val_Step:253
Loader:4.995311737060547
Inference:0.2500636577606201
Val_Step:254
Loader:5.384106397628784
Inference:0.2508726119995117
Val_Step:255
Loader:5.00697660446167
Inference:0.24884676933288574
Val_Step:256
Loader:4.974262475967407
Inference:0.2529337406158447
Val_Step:257
Loader:5.000724792480469
Inference:0.2524595260620117
Val_Step:258
Loader:5.048516273498535
Inference:13.546960830688477
Val_Step:259
Loader:5.120457887649536
Inference:0.2587716579437256
Val_Step:260
Loader:5.06961774

Inference:0.2507014274597168
Val_Step:368
Loader:4.9191412925720215
Inference:0.25563597679138184
Val_Step:369
Loader:4.892214775085449
Inference:0.2526838779449463
Val_Step:370
Loader:4.837423801422119
Inference:0.25170063972473145
Val_Step:371
Loader:4.839480876922607
Inference:0.24966835975646973
Val_Step:372
Loader:4.884212017059326
Inference:0.25185227394104004
Val_Step:373
Loader:4.905927658081055
Inference:0.2664604187011719
Val_Step:374
Loader:5.225936651229858
Inference:0.2506523132324219
Val_Step:375
Loader:5.084890127182007
Inference:0.2537956237792969
Val_Step:376
Loader:5.096700668334961
Inference:0.25197863578796387
Val_Step:377
Loader:4.905029296875
Inference:18.01394510269165
Val_Step:378
Loader:4.856716156005859
Inference:0.26459288597106934
Val_Step:379
Loader:4.906259536743164
Inference:0.27141618728637695
Val_Step:380
Loader:4.882983922958374
Inference:0.2571108341217041
Val_Step:381
Loader:4.878129720687866
Inference:0.2508981227874756
Val_Step:382
Loader:4.8744449

Inference:0.24988722801208496
Val_Step:490
Loader:4.931384086608887
Inference:0.2676708698272705
Val_Step:491
Loader:4.936732769012451
Inference:0.2569265365600586
Val_Step:492
Loader:4.81702733039856
Inference:0.2507779598236084
Val_Step:493
Loader:4.947946548461914
Inference:0.25211143493652344
Val_Step:494
Loader:4.914796352386475
Inference:0.2523982524871826
Val_Step:495
Loader:4.903579235076904
Inference:0.2512972354888916
Val_Step:496
Loader:4.838625431060791
Inference:23.75042986869812
Val_Step:497
Loader:4.893056631088257
Inference:0.25409388542175293
Val_Step:498
Loader:4.656244993209839
Inference:0.2520418167114258
Val_Step:499
Loader:4.85783576965332
Inference:0.25234055519104004
Val_Step:500
Loader:4.891924858093262
Inference:0.2506062984466553
Val_Step:501
Loader:4.901354074478149
Inference:0.25101161003112793
Val_Step:502
Loader:4.67458701133728
Inference:0.25429749488830566
Val_Step:503
Loader:5.818671464920044
Inference:0.26688361167907715
Val_Step:504
Loader:4.93502163

Inference:0.26123952865600586
Val_Step:612
Loader:5.0500648021698
Inference:0.2512784004211426
Val_Step:613
Loader:5.161428451538086
Inference:0.24970221519470215
Val_Step:614
Loader:5.128931760787964
Inference:0.2512683868408203
Val_Step:615
Loader:4.900631904602051
Inference:29.40719723701477
Val_Step:616
Loader:4.845097780227661
Inference:0.2555675506591797
Val_Step:617
Loader:4.718364477157593
Inference:0.2503623962402344
Val_Step:618
Loader:4.847153663635254
Inference:0.2524731159210205
Val_Step:619
Loader:5.109896183013916
Inference:0.24843883514404297
Val_Step:620
Loader:4.902796030044556
Inference:0.2514781951904297
Val_Step:621
Loader:4.860464334487915
Inference:0.2689037322998047
Val_Step:622
Loader:4.849769830703735
Inference:0.26706743240356445
Val_Step:623
Loader:4.83315372467041
Inference:0.25158238410949707
Val_Step:624
Loader:4.652759075164795
Inference:0.25423765182495117
Val_Step:625
Loader:4.807819604873657
Inference:0.24798250198364258
Val_Step:626
Loader:5.01163220

Inference:0.2572026252746582
Val_Step:734
Loader:5.168977499008179
Inference:39.477030754089355
Val_Step:735
Loader:5.191528558731079
Inference:0.2578737735748291
Val_Step:736
Loader:5.608321905136108
Inference:0.25001001358032227
Val_Step:737
Loader:5.160480499267578
Inference:0.25140976905822754
Val_Step:738
Loader:5.069202661514282
Inference:0.2568984031677246
Val_Step:739
Loader:5.261093616485596
Inference:0.2483656406402588
Val_Step:740
Loader:5.149172782897949
Inference:0.26535654067993164
Val_Step:741
Loader:5.243167161941528
Inference:0.25617027282714844
Val_Step:742
Loader:5.205844163894653
Inference:0.28212714195251465
Val_Step:743
Loader:5.434542417526245
Inference:0.25869321823120117
Val_Step:744
Loader:5.283932447433472
Inference:0.24956226348876953
Val_Step:745
Loader:5.446509122848511
Inference:0.2493584156036377
Val_Step:746
Loader:5.214984178543091
Inference:0.25081849098205566
Val_Step:747
Loader:5.14432692527771
Inference:0.24810409545898438
Val_Step:748
Loader:5.196

In [17]:
df_prob = pd.DataFrame(result)
df_img = pd.DataFrame({'image_id': img})

In [18]:
df_prob.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7163,7164,7165,7166,7167,7168,7169,7170,7171,7172
0,1.9e-05,1.9e-05,2e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,...,1.9e-05,2.1e-05,1.9e-05,1.9e-05,1.9e-05,2e-05,1.9e-05,1.9e-05,1.9e-05,2e-05
1,1.8e-05,1.8e-05,1.9e-05,1.8e-05,1.8e-05,1.9e-05,1.8e-05,1.8e-05,1.8e-05,1.9e-05,...,1.8e-05,2.3e-05,1.8e-05,1.9e-05,1.8e-05,2e-05,1.8e-05,1.8e-05,1.8e-05,1.9e-05
2,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.5e-05,...,1.4e-05,1.9e-05,1.4e-05,1.4e-05,1.4e-05,1.6e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05
3,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,...,1.3e-05,1.5e-05,1.3e-05,1.3e-05,1.3e-05,1.4e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05
4,3.8e-05,3.8e-05,3.9e-05,3.8e-05,3.8e-05,3.9e-05,3.8e-05,3.8e-05,3.8e-05,4e-05,...,3.8e-05,4.9e-05,3.8e-05,3.9e-05,3.8e-05,4.2e-05,3.8e-05,3.8e-05,3.9e-05,4.1e-05


In [19]:
df_img.head()

Unnamed: 0,image_id
0,6e4c794d707761744877453d
1,44683973366a3546784d773d
2,45526a6d51764c4e4633383d
3,66563932463636774a786b3d
4,4845534e6a546a76704f383d


In [20]:
df = df_prob.join(other=df_img, how='left').set_index(keys='image_id')

In [None]:
# df.to_csv('stage_1_probs.csv')

In [22]:
df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,7163,7164,7165,7166,7167,7168,7169,7170,7171,7172
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6e4c794d707761744877453d,1.9e-05,1.9e-05,2e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,1.9e-05,...,1.9e-05,2.1e-05,1.9e-05,1.9e-05,1.9e-05,2e-05,1.9e-05,1.9e-05,1.9e-05,2e-05
44683973366a3546784d773d,1.8e-05,1.8e-05,1.9e-05,1.8e-05,1.8e-05,1.9e-05,1.8e-05,1.8e-05,1.8e-05,1.9e-05,...,1.8e-05,2.3e-05,1.8e-05,1.9e-05,1.8e-05,2e-05,1.8e-05,1.8e-05,1.8e-05,1.9e-05
45526a6d51764c4e4633383d,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.5e-05,...,1.4e-05,1.9e-05,1.4e-05,1.4e-05,1.4e-05,1.6e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05
66563932463636774a786b3d,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,...,1.3e-05,1.5e-05,1.3e-05,1.3e-05,1.3e-05,1.4e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05
4845534e6a546a76704f383d,3.8e-05,3.8e-05,3.9e-05,3.8e-05,3.8e-05,3.9e-05,3.8e-05,3.8e-05,3.8e-05,4e-05,...,3.8e-05,4.9e-05,3.8e-05,3.9e-05,3.8e-05,4.2e-05,3.8e-05,3.8e-05,3.9e-05,4.1e-05


In [23]:
df_bool = df>=0.95

In [24]:
(df_bool.sum()!=0).sum()

148

In [25]:
sub = pd.read_csv('input/stage_2_sample_submission.csv')
sub.set_index('image_id', inplace=True)

In [26]:
sub.head()

Unnamed: 0_level_0,labels
image_id,Unnamed: 1_level_1
2b2b2f64445950756265493d,/m/0sgh53y /m/0g4cd0
2b2b30717675476653584d3d,/m/0sgh53y /m/0g4cd0
2b2b362f6b57704e4b31773d,/m/0sgh53y /m/0g4cd0
2b2b3950504f34493336733d,/m/0sgh53y /m/0g4cd0
2b2b43386a5769574d50513d,/m/0sgh53y /m/0g4cd0


In [27]:
for i in range(df_bool.shape[0]):
    tmp_labels = ' '.join([imgset_valid.idx_to_class[j] for j in np.where(df_bool.iloc[i,:])[0] ])
    sub.loc[df_bool.index[i], 'labels'] = tmp_labels

In [28]:
sub.head()

Unnamed: 0_level_0,labels
image_id,Unnamed: 1_level_1
2b2b2f64445950756265493d,/m/019sc6 /m/01cd9 /m/01g317 /m/02cwm /m/02rfd...
2b2b30717675476653584d3d,/m/01g317 /m/01lcwm /m/01prls /m/021mp2 /m/07y...
2b2b362f6b57704e4b31773d,/m/019sc6 /m/01cd9 /m/01g317 /m/01kr8f /m/02cs...
2b2b3950504f34493336733d,/m/01g317 /m/01ykh /m/0270h /m/02q08p0 /m/02wb...
2b2b43386a5769574d50513d,/m/015p6 /m/01g317 /m/02cqfm /m/03rbf6 /m/03vt...


In [29]:
sub.to_csv('submission.csv')

In [None]:
# imgset_valid.class_to_desc['/m/05s2s']

In [None]:
# label='/m/01g317 /m/09j2d /m/0dzct /m/07j7r /m/05s2s /m/07yv9'
# for i in range(sub.shape[0]):
#     sub.iloc[i, 0] = label

In [None]:
# sub.head()

In [None]:
# sub.to_csv('submission.csv')