In [12]:
#!pip install pretrainedmodels
#!pip install --upgrade pip
# !pip install -U torch_nightly -f https://download.pytorch.org/whl/nightly/cu90/torch_nightly.html
#!pip install -U --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
# !pip install --upgrade --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
!pip install efficientnet_pytorch
# !conda install -c conda-forge nvidia-apex --yes
# !conda install -c conda-forge/label/cf202003 nvidia-apex --yes
#!pip install numpy



In [13]:
%autosave 20
import os
import gc
gc.enable()
import time
from glob import glob
import random
from datetime import datetime

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
from PIL import Image
from tqdm import tqdm 

import torch
import torchvision
# import pretrainedmodels
import efficientnet_pytorch
from torchvision import transforms, models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# from torch.cuda import amp
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import BatchSampler, SequentialSampler
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations import (
    Compose, HorizontalFlip, CLAHE, HueSaturationValue,
    RandomBrightness, RandomContrast, RandomGamma, OneOf, Resize,
    ToFloat, ShiftScaleRotate, GridDistortion, RandomRotate90, Cutout,
    RGBShift, RandomBrightness, RandomContrast, Blur, MotionBlur, MedianBlur, GaussNoise, CoarseDropout,
    IAAAdditiveGaussianNoise, GaussNoise, OpticalDistortion, RandomSizedCrop, VerticalFlip
)
from catalyst.data.sampler import DistributedSamplerWrapper, BalanceClassSampler
# from apex import amp

import sklearn
from sklearn import metrics
from sklearn.model_selection import GroupKFold

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings("ignore")

print(torch.__version__)
print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))

Autosaving every 20 seconds
1.5.1

CUDNN VERSION: 7603



In [14]:
def fix_seed(seed=2020):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

fix_seed()
print('Seeding Completed.')

Seeding Completed.


In [15]:
dataset = []
for label, kind in enumerate(['Cover', 'JMiPOD', 'JUNIWARD', 'UERD']):
    for path in glob('../input/alaska2-image-steganalysis/Cover/*.jpg'):
        dataset.append({
            'kind':kind,
            'label':label,
            'image_id':path.split('/')[-1]
        })
        
dataset = pd.DataFrame(dataset)
dataset = dataset.sample(frac=1).reset_index(drop=True)

In [16]:
gkf = GroupKFold(n_splits=5)
dataset.loc[:, 'fold'] = 0
for fold_number, (train_index, val_index) in enumerate(gkf.split(X=dataset.index, y=dataset['label'], groups=dataset['image_id'])):
    dataset.loc[dataset.iloc[val_index].index, 'fold'] = fold_number

In [17]:
FLAGS = {}
FLAGS['batch_size'] = 16
FLAGS['epochs'] = 3
FLAGS['lr'] = 1e-3
FLAGS['num_workers'] = 4
FLAGS['verbose'] = True
FLAGS['log_step'] = 1
FLAGS['step_schedule'] = False
FLAGS['validation_schedule'] = True

In [18]:
def get_train_transforms():
    return A.Compose([
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [19]:
def one_hot(size, target):
    vec = torch.zeros(size, dtype=torch.float32)
    vec[target] = 1.
    return vec

class Alaska2Dataset(Dataset):
    
    def __init__(self, kinds, labels, image_ids, transforms=None):
        super().__init__()
        self.kinds = kinds
        self.labels = labels 
        self.image_ids = image_ids
        self.transforms = transforms
        
    def __getitem__(self, index):
        kind = self.kinds[index]
        label = self.labels[index]
        target = one_hot(4, label)
        image_id = self.image_ids[index]
        image = cv2.cvtColor(cv2.imread(f'../input/alaska2-image-steganalysis/{kind}/{image_id}', cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms is not None:
            sample = {'image':image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, target
    
    def __len__(self):
        return self.image_ids.shape[0]
    
    def get_labels(self):
        return list(self.labels)

In [20]:
fold = 0
train_dataset = Alaska2Dataset(kinds = dataset[dataset['fold']!=fold].kind.values,
                               labels = dataset[dataset['fold']!=fold].label.values,
                               image_ids = dataset[dataset['fold']!=fold].image_id.values,
                               transforms = get_train_transforms()
                              )

valid_dataset = Alaska2Dataset(kinds = dataset[dataset['fold']==fold].kind.values,
                               labels = dataset[dataset['fold']==fold].label.values,
                               image_ids = dataset[dataset['fold']==fold].image_id.values,
                               transforms = get_train_transforms()
                              )

In [21]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
        
def alaska_weighted_auc(y_true, y_valid):
    """
    https://www.kaggle.com/anokas/weighted-auc-metric-updated
    """
    tpr_thresholds = [0.0, 0.4, 1.0]
    weights = [2, 1]

    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_valid, pos_label=1)

    # size of subsets
    areas = np.array(tpr_thresholds[1:]) - np.array(tpr_thresholds[:-1])

    # The total area is normalized by the sum of weights such that the final weighted AUC is between 0 and 1.
    normalization = np.dot(areas, weights)

    competition_metric = 0
    for idx, weight in enumerate(weights):
        y_min = tpr_thresholds[idx]
        y_max = tpr_thresholds[idx + 1]
        mask = (y_min < tpr) & (tpr < y_max)
        # pdb.set_trace()

        x_padding = np.linspace(fpr[mask][-1], 1, 100)

        x = np.concatenate([fpr[mask], x_padding])
        y = np.concatenate([tpr[mask], [y_max] * len(x_padding)])
        y = y - y_min  # normalize such that curve starts at y=0
        score = metrics.auc(x, y)
        submetric = score * weight
        best_subscore = (y_max - y_min) * weight
        competition_metric += submetric

    return competition_metric / normalization
        
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1).clip(min=0, max=1).astype(int)
        y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = alaska_weighted_auc(self.y_true, self.y_pred)
    
    @property
    def avg(self):
        return self.score

In [22]:
class LabelSmoothing(nn.Module):
    def __init__(self, smoothing = 0.05):
        super(LabelSmoothing, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        
    def forward(self, x, target):
        if self.training:
            x = x.float()
            target = target.float()
            logprobs = torch.nn.functional.log_softmax(x, dim = -1)
            nll_loss = -logprobs * target
            nll_loss = nll_loss.sum(-1)
            smooth_loss = -logprobs.mean(dim=-1)
            loss = self.confidence * nll_loss + self.smoothing * smooth_loss
            return loss.mean()
        else:
            return torch.nn.functional.cross_entropy(x, target)

In [32]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.model = torchvision.models.resnet34(pretrained=True)
        modules=list(self.model.children())[:-1]
        self.resnet34=nn.Sequential(*modules)
        self.dropout = nn.Dropout(p=0.1)
        self.classifier = nn.Linear(in_features=512, out_features=4)
    def forward(self, images):
        batch_size, _, _, _ = images.shape
        features = self.resnet34(images)
        avg_pool = features.reshape(batch_size, -1)
#         Multi Sample Dropout
#         logits = torch.mean(
#             torch.stack(
#                 [self.classifier(self.dropout(avg_pool)) for _ in range(5)],
#                 dim=0,
#             ),
#             dim=0,
#         )
        logits = self.classifier(avg_pool)
        return logits

In [None]:
class EfficientNetB2(nn.Module):
    def __init__(self, pretrained=None):
        super(EfficientNetB2, self).__init__()
        self.model = efficientnet_pytorch.EfficientNet.from_name('efficientnet-b2')
        if pretrained is not None:
            self.model.load_state_dict(
                torch.load('../input/efficientnet-pytorch/efficientnet-b2-27687264.pth')
            )
        self.dropout = nn.Dropout(p=0.1)
        self.classifier = nn.Linear(in_features=1408, out_features=4)
    
    def forward(self, images):
        batch_size, _, _, _ = images.shape
        features = self.model.extract_features(images)
        avg_pool = F.adaptive_avg_pool2d(features, 1).reshape(batch_size, -1)
        
#         Multi Sample Dropout
#         logits = torch.mean(
#             torch.stack(
#                 [self.classifier(self.dropout(avg_pool)) for _ in range(5)],
#                 dim=0,
#             ),
#             dim=0,
#         )
        logits = self.classifier(avg_pool)
        return logits

In [35]:
class Runner:
    def __init__(self, model):
        self.device = torch.device('cuda:0')
        self.model = model.to(self.device)       
        self.criterion = LabelSmoothing().to(self.device)
#         self.criterion = torch.nn.CrossEntropyLoss().to(self.device)
        optimizer_parameters = list(self.model.named_parameters())
        no_decay = ['LayerNorm.bias', 'LayerNorm.weight', 'bias']
        optimizer_grouped_parameters = [
            {'params': [param for name, param in optimizer_parameters if any(nd in name for nd in no_decay) and 'classifier' not in name], 'weight_decay':0.00, 'lr':FLAGS['lr']},
            {'params': [param for name, param in optimizer_parameters if not any(nd in name for nd in no_decay) and 'classifier' not in name], 'weight_decay':0.001, 'lr':FLAGS['lr']},
            {'params': [param for name, param in optimizer_parameters if 'classifier' in name], 'weight_decay':0.001 ,'lr':FLAGS['lr']*5}
        ]
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=FLAGS['lr'])
#         Mixed Precision Training
#         self.model, self.optimizer = amp.initialize(self.model, 
#                                                     self.optimizer, 
#                                                     opt_level="O0",
#                                                     keep_batchnorm_fp32=True, 
#                                                     loss_scale="dynamic"
#                                                    )
        self.model = nn.DataParallel(self.model) 
        scheduler_params = dict(mode='min',
                                factor=0.5,
                                patience=1,
                                verbose=False, 
                                threshold=0.0001,
                                threshold_mode='abs',
                                cooldown=0, 
                                min_lr=1e-8,
                                eps=1e-08)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, **scheduler_params)
        self.epoch = 0
        self.base = './'
        self.log_path = f'{self.base}/log.txt'
        self.best_loss = 10**5
        self.log(f'Runner prepared. Device is {self.device}.')
        pytorch_total_params = sum(p.numel() for p in self.model.parameters())
        pytorch_total_trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        self.checkpoints = sorted(glob('../input/alaska-2-checkpoints/**.bin')[:-1])
        self.log(f'Total trainable parameters {pytorch_total_params}, Total model parameters {pytorch_total_trainable_params}.')
        
    def train_one_epoch(self, train_loader):
        self.model.train()
        total_score = RocAucMeter()
        total_loss = AverageMeter()
        start_time = time.time()
        for step, (images, targets) in enumerate(train_loader):
            batch_size, _, _, _ = images.shape
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.float32)
            self.optimizer.zero_grad()
            logits = self.model(images)
            loss = self.criterion(logits, targets)
#             with amp.scale_loss(loss, self.optimizer) as scaled_loss:
#                 scaled_loss.backward()
            loss.backward()
            self.optimizer.step()
            total_score.update(targets, logits)
            total_loss.update(loss.detach().item(), batch_size)
            if FLAGS['verbose']:
                if step%FLAGS['log_step']==0:
                    print(f'Train step: {step}/{len(train_loader)}, \
                          Total Loss: {total_loss.avg:.3f}, \
                          RoC Auc Score: {total_score.avg:.3f}, \
                          Total Time: {time.time()-start_time:.2f}secs.', 
                          end='\r'
                         )
            if FLAGS['step_schedule']:
                self.scheduler.step(metrics=total_loss.avg)
        return total_loss, total_score
            
    def validate(self, val_loader):
        self.model.eval()
        total_score = RocAucMeter()
        total_loss = AverageMeter()
        start_time = time.time()
        for step, (images, targets) in enumerate(val_loader):
            batch_size, _, _, _ = images.shape
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.float32)
            with torch.no_grad():
                logits = self.model(images)
                loss = self.criterion(logits, targets)
                total_loss.update(loss.detach().item(), batch_size)
                total_score.update(targets, logits)
            if FLAGS['verbose']:
                if step%FLAGS['log_step']==0:
                    print(f'Validation step: {step}/{len(val_loader)}, \
                          Total Loss: {total_loss.avg:.3f}, \
                          RoC Auc Score: {total_score.avg:.3f}, \
                          Total Time: {time.time()-start_time:.2f}secs.', 
                          end='\r'
                         )
        return total_loss, total_score
    
    def engine(self, train_loader, validation_loader):
        for n_epoch in range(FLAGS['epochs']):
            if FLAGS['verbose']:
                lr1 = self.optimizer.param_groups[0]['lr']
                lr2 = self.optimizer.param_groups[-1]['lr']
                timestamp = datetime.utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR Backbone: {lr1}, LR Head: {lr2}')
#             if not self.checkpoints.__len__()==0:
#                 self.load(self.checkpoints[-1])
#                 self.log(f'Checkpoint {self.checkpoints[-1]} loaded sucessfully.!')
            # Train
            start_time = time.time()
            total_loss, total_score = self.train_one_epoch(train_loader)
            self.log(f'[RESULT]: Train. Epoch: {self.epoch}, \
                     Total Loss: {total_loss.avg:.3f}, \
                     RoC Auc Score: {total_score.avg:.3f}, \
                     Time: {(time.time() - start_time):.2f} secs.')
            self.save(f'{self.base}/last-checkpoint.bin')
            
            # Validation
            start_time = time.time()
            total_loss, total_score = self.validate(validation_loader)
            self.log(f'[RESULT]: Validation. Epoch: {self.epoch}, \
                     Total Loss: {total_loss.avg:.3f}, \
                     RoC Auc Score: {total_score.avg:.3f}, \
                     Time: {(time.time() - start_time):.2f} secs.')
            self.save(f'{self.base}/last-checkpoint.bin')
            
            if total_loss.avg < self.best_loss:
                self.best_loss = total_loss.avg
                self.model.eval()
                self.save(f'{self.base}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob(f'{self.base}/best-checkpoint-*epoch.bin'))[:-3]:
                    os.remove(path)
            if FLAGS['validation_schedule']:
                self.scheduler.step(metrics=total_loss.avg)
            
            self.epoch +=1
            
    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.scheduler.state_dict(),
            'best_summary_loss': self.best_loss,
            'epoch': self.epoch,
        }, path)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        self.loss = checkpoint['best_summary_loss']
        self.epoch = checkpoint['epoch'] + 1
        
    def log(self, message):
        if FLAGS['verbose']:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [38]:
def process():
    train_loader = DataLoader(train_dataset,
                              batch_size=FLAGS['batch_size'],
                              sampler=BalanceClassSampler(train_dataset.get_labels(), mode='downsampling'),
                              num_workers=FLAGS['num_workers'],
                              pin_memory=False,
                              drop_last=True
                             )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=FLAGS['batch_size'],
                              num_workers=FLAGS['num_workers'],
                              shuffle=False,
                              pin_memory=False,
                              sampler=SequentialSampler(valid_dataset)
                             )
    
    runner = Runner(model=ResNet())
    runner.engine(train_loader, valid_loader)

In [39]:
process()

Runner prepared. Device is cuda:0.
Total trainable parameters 21799724, Total model parameters 21799724.

2020-07-14T07:28:26.450541
LR Backbone: 0.001, LR Head: 0.001


RuntimeError: Error(s) in loading state_dict for DataParallel:
	Missing key(s) in state_dict: "module.model.conv1.weight", "module.model.bn1.weight", "module.model.bn1.bias", "module.model.bn1.running_mean", "module.model.bn1.running_var", "module.model.layer1.0.conv1.weight", "module.model.layer1.0.bn1.weight", "module.model.layer1.0.bn1.bias", "module.model.layer1.0.bn1.running_mean", "module.model.layer1.0.bn1.running_var", "module.model.layer1.0.conv2.weight", "module.model.layer1.0.bn2.weight", "module.model.layer1.0.bn2.bias", "module.model.layer1.0.bn2.running_mean", "module.model.layer1.0.bn2.running_var", "module.model.layer1.1.conv1.weight", "module.model.layer1.1.bn1.weight", "module.model.layer1.1.bn1.bias", "module.model.layer1.1.bn1.running_mean", "module.model.layer1.1.bn1.running_var", "module.model.layer1.1.conv2.weight", "module.model.layer1.1.bn2.weight", "module.model.layer1.1.bn2.bias", "module.model.layer1.1.bn2.running_mean", "module.model.layer1.1.bn2.running_var", "module.model.layer1.2.conv1.weight", "module.model.layer1.2.bn1.weight", "module.model.layer1.2.bn1.bias", "module.model.layer1.2.bn1.running_mean", "module.model.layer1.2.bn1.running_var", "module.model.layer1.2.conv2.weight", "module.model.layer1.2.bn2.weight", "module.model.layer1.2.bn2.bias", "module.model.layer1.2.bn2.running_mean", "module.model.layer1.2.bn2.running_var", "module.model.layer2.0.conv1.weight", "module.model.layer2.0.bn1.weight", "module.model.layer2.0.bn1.bias", "module.model.layer2.0.bn1.running_mean", "module.model.layer2.0.bn1.running_var", "module.model.layer2.0.conv2.weight", "module.model.layer2.0.bn2.weight", "module.model.layer2.0.bn2.bias", "module.model.layer2.0.bn2.running_mean", "module.model.layer2.0.bn2.running_var", "module.model.layer2.0.downsample.0.weight", "module.model.layer2.0.downsample.1.weight", "module.model.layer2.0.downsample.1.bias", "module.model.layer2.0.downsample.1.running_mean", "module.model.layer2.0.downsample.1.running_var", "module.model.layer2.1.conv1.weight", "module.model.layer2.1.bn1.weight", "module.model.layer2.1.bn1.bias", "module.model.layer2.1.bn1.running_mean", "module.model.layer2.1.bn1.running_var", "module.model.layer2.1.conv2.weight", "module.model.layer2.1.bn2.weight", "module.model.layer2.1.bn2.bias", "module.model.layer2.1.bn2.running_mean", "module.model.layer2.1.bn2.running_var", "module.model.layer2.2.conv1.weight", "module.model.layer2.2.bn1.weight", "module.model.layer2.2.bn1.bias", "module.model.layer2.2.bn1.running_mean", "module.model.layer2.2.bn1.running_var", "module.model.layer2.2.conv2.weight", "module.model.layer2.2.bn2.weight", "module.model.layer2.2.bn2.bias", "module.model.layer2.2.bn2.running_mean", "module.model.layer2.2.bn2.running_var", "module.model.layer2.3.conv1.weight", "module.model.layer2.3.bn1.weight", "module.model.layer2.3.bn1.bias", "module.model.layer2.3.bn1.running_mean", "module.model.layer2.3.bn1.running_var", "module.model.layer2.3.conv2.weight", "module.model.layer2.3.bn2.weight", "module.model.layer2.3.bn2.bias", "module.model.layer2.3.bn2.running_mean", "module.model.layer2.3.bn2.running_var", "module.model.layer3.0.conv1.weight", "module.model.layer3.0.bn1.weight", "module.model.layer3.0.bn1.bias", "module.model.layer3.0.bn1.running_mean", "module.model.layer3.0.bn1.running_var", "module.model.layer3.0.conv2.weight", "module.model.layer3.0.bn2.weight", "module.model.layer3.0.bn2.bias", "module.model.layer3.0.bn2.running_mean", "module.model.layer3.0.bn2.running_var", "module.model.layer3.0.downsample.0.weight", "module.model.layer3.0.downsample.1.weight", "module.model.layer3.0.downsample.1.bias", "module.model.layer3.0.downsample.1.running_mean", "module.model.layer3.0.downsample.1.running_var", "module.model.layer3.1.conv1.weight", "module.model.layer3.1.bn1.weight", "module.model.layer3.1.bn1.bias", "module.model.layer3.1.bn1.running_mean", "module.model.layer3.1.bn1.running_var", "module.model.layer3.1.conv2.weight", "module.model.layer3.1.bn2.weight", "module.model.layer3.1.bn2.bias", "module.model.layer3.1.bn2.running_mean", "module.model.layer3.1.bn2.running_var", "module.model.layer3.2.conv1.weight", "module.model.layer3.2.bn1.weight", "module.model.layer3.2.bn1.bias", "module.model.layer3.2.bn1.running_mean", "module.model.layer3.2.bn1.running_var", "module.model.layer3.2.conv2.weight", "module.model.layer3.2.bn2.weight", "module.model.layer3.2.bn2.bias", "module.model.layer3.2.bn2.running_mean", "module.model.layer3.2.bn2.running_var", "module.model.layer3.3.conv1.weight", "module.model.layer3.3.bn1.weight", "module.model.layer3.3.bn1.bias", "module.model.layer3.3.bn1.running_mean", "module.model.layer3.3.bn1.running_var", "module.model.layer3.3.conv2.weight", "module.model.layer3.3.bn2.weight", "module.model.layer3.3.bn2.bias", "module.model.layer3.3.bn2.running_mean", "module.model.layer3.3.bn2.running_var", "module.model.layer3.4.conv1.weight", "module.model.layer3.4.bn1.weight", "module.model.layer3.4.bn1.bias", "module.model.layer3.4.bn1.running_mean", "module.model.layer3.4.bn1.running_var", "module.model.layer3.4.conv2.weight", "module.model.layer3.4.bn2.weight", "module.model.layer3.4.bn2.bias", "module.model.layer3.4.bn2.running_mean", "module.model.layer3.4.bn2.running_var", "module.model.layer3.5.conv1.weight", "module.model.layer3.5.bn1.weight", "module.model.layer3.5.bn1.bias", "module.model.layer3.5.bn1.running_mean", "module.model.layer3.5.bn1.running_var", "module.model.layer3.5.conv2.weight", "module.model.layer3.5.bn2.weight", "module.model.layer3.5.bn2.bias", "module.model.layer3.5.bn2.running_mean", "module.model.layer3.5.bn2.running_var", "module.model.layer4.0.conv1.weight", "module.model.layer4.0.bn1.weight", "module.model.layer4.0.bn1.bias", "module.model.layer4.0.bn1.running_mean", "module.model.layer4.0.bn1.running_var", "module.model.layer4.0.conv2.weight", "module.model.layer4.0.bn2.weight", "module.model.layer4.0.bn2.bias", "module.model.layer4.0.bn2.running_mean", "module.model.layer4.0.bn2.running_var", "module.model.layer4.0.downsample.0.weight", "module.model.layer4.0.downsample.1.weight", "module.model.layer4.0.downsample.1.bias", "module.model.layer4.0.downsample.1.running_mean", "module.model.layer4.0.downsample.1.running_var", "module.model.layer4.1.conv1.weight", "module.model.layer4.1.bn1.weight", "module.model.layer4.1.bn1.bias", "module.model.layer4.1.bn1.running_mean", "module.model.layer4.1.bn1.running_var", "module.model.layer4.1.conv2.weight", "module.model.layer4.1.bn2.weight", "module.model.layer4.1.bn2.bias", "module.model.layer4.1.bn2.running_mean", "module.model.layer4.1.bn2.running_var", "module.model.layer4.2.conv1.weight", "module.model.layer4.2.bn1.weight", "module.model.layer4.2.bn1.bias", "module.model.layer4.2.bn1.running_mean", "module.model.layer4.2.bn1.running_var", "module.model.layer4.2.conv2.weight", "module.model.layer4.2.bn2.weight", "module.model.layer4.2.bn2.bias", "module.model.layer4.2.bn2.running_mean", "module.model.layer4.2.bn2.running_var", "module.model.fc.weight", "module.model.fc.bias", "module.resnet34.0.weight", "module.resnet34.1.weight", "module.resnet34.1.bias", "module.resnet34.1.running_mean", "module.resnet34.1.running_var", "module.resnet34.4.0.conv1.weight", "module.resnet34.4.0.bn1.weight", "module.resnet34.4.0.bn1.bias", "module.resnet34.4.0.bn1.running_mean", "module.resnet34.4.0.bn1.running_var", "module.resnet34.4.0.conv2.weight", "module.resnet34.4.0.bn2.weight", "module.resnet34.4.0.bn2.bias", "module.resnet34.4.0.bn2.running_mean", "module.resnet34.4.0.bn2.running_var", "module.resnet34.4.1.conv1.weight", "module.resnet34.4.1.bn1.weight", "module.resnet34.4.1.bn1.bias", "module.resnet34.4.1.bn1.running_mean", "module.resnet34.4.1.bn1.running_var", "module.resnet34.4.1.conv2.weight", "module.resnet34.4.1.bn2.weight", "module.resnet34.4.1.bn2.bias", "module.resnet34.4.1.bn2.running_mean", "module.resnet34.4.1.bn2.running_var", "module.resnet34.4.2.conv1.weight", "module.resnet34.4.2.bn1.weight", "module.resnet34.4.2.bn1.bias", "module.resnet34.4.2.bn1.running_mean", "module.resnet34.4.2.bn1.running_var", "module.resnet34.4.2.conv2.weight", "module.resnet34.4.2.bn2.weight", "module.resnet34.4.2.bn2.bias", "module.resnet34.4.2.bn2.running_mean", "module.resnet34.4.2.bn2.running_var", "module.resnet34.5.0.conv1.weight", "module.resnet34.5.0.bn1.weight", "module.resnet34.5.0.bn1.bias", "module.resnet34.5.0.bn1.running_mean", "module.resnet34.5.0.bn1.running_var", "module.resnet34.5.0.conv2.weight", "module.resnet34.5.0.bn2.weight", "module.resnet34.5.0.bn2.bias", "module.resnet34.5.0.bn2.running_mean", "module.resnet34.5.0.bn2.running_var", "module.resnet34.5.0.downsample.0.weight", "module.resnet34.5.0.downsample.1.weight", "module.resnet34.5.0.downsample.1.bias", "module.resnet34.5.0.downsample.1.running_mean", "module.resnet34.5.0.downsample.1.running_var", "module.resnet34.5.1.conv1.weight", "module.resnet34.5.1.bn1.weight", "module.resnet34.5.1.bn1.bias", "module.resnet34.5.1.bn1.running_mean", "module.resnet34.5.1.bn1.running_var", "module.resnet34.5.1.conv2.weight", "module.resnet34.5.1.bn2.weight", "module.resnet34.5.1.bn2.bias", "module.resnet34.5.1.bn2.running_mean", "module.resnet34.5.1.bn2.running_var", "module.resnet34.5.2.conv1.weight", "module.resnet34.5.2.bn1.weight", "module.resnet34.5.2.bn1.bias", "module.resnet34.5.2.bn1.running_mean", "module.resnet34.5.2.bn1.running_var", "module.resnet34.5.2.conv2.weight", "module.resnet34.5.2.bn2.weight", "module.resnet34.5.2.bn2.bias", "module.resnet34.5.2.bn2.running_mean", "module.resnet34.5.2.bn2.running_var", "module.resnet34.5.3.conv1.weight", "module.resnet34.5.3.bn1.weight", "module.resnet34.5.3.bn1.bias", "module.resnet34.5.3.bn1.running_mean", "module.resnet34.5.3.bn1.running_var", "module.resnet34.5.3.conv2.weight", "module.resnet34.5.3.bn2.weight", "module.resnet34.5.3.bn2.bias", "module.resnet34.5.3.bn2.running_mean", "module.resnet34.5.3.bn2.running_var", "module.resnet34.6.0.conv1.weight", "module.resnet34.6.0.bn1.weight", "module.resnet34.6.0.bn1.bias", "module.resnet34.6.0.bn1.running_mean", "module.resnet34.6.0.bn1.running_var", "module.resnet34.6.0.conv2.weight", "module.resnet34.6.0.bn2.weight", "module.resnet34.6.0.bn2.bias", "module.resnet34.6.0.bn2.running_mean", "module.resnet34.6.0.bn2.running_var", "module.resnet34.6.0.downsample.0.weight", "module.resnet34.6.0.downsample.1.weight", "module.resnet34.6.0.downsample.1.bias", "module.resnet34.6.0.downsample.1.running_mean", "module.resnet34.6.0.downsample.1.running_var", "module.resnet34.6.1.conv1.weight", "module.resnet34.6.1.bn1.weight", "module.resnet34.6.1.bn1.bias", "module.resnet34.6.1.bn1.running_mean", "module.resnet34.6.1.bn1.running_var", "module.resnet34.6.1.conv2.weight", "module.resnet34.6.1.bn2.weight", "module.resnet34.6.1.bn2.bias", "module.resnet34.6.1.bn2.running_mean", "module.resnet34.6.1.bn2.running_var", "module.resnet34.6.2.conv1.weight", "module.resnet34.6.2.bn1.weight", "module.resnet34.6.2.bn1.bias", "module.resnet34.6.2.bn1.running_mean", "module.resnet34.6.2.bn1.running_var", "module.resnet34.6.2.conv2.weight", "module.resnet34.6.2.bn2.weight", "module.resnet34.6.2.bn2.bias", "module.resnet34.6.2.bn2.running_mean", "module.resnet34.6.2.bn2.running_var", "module.resnet34.6.3.conv1.weight", "module.resnet34.6.3.bn1.weight", "module.resnet34.6.3.bn1.bias", "module.resnet34.6.3.bn1.running_mean", "module.resnet34.6.3.bn1.running_var", "module.resnet34.6.3.conv2.weight", "module.resnet34.6.3.bn2.weight", "module.resnet34.6.3.bn2.bias", "module.resnet34.6.3.bn2.running_mean", "module.resnet34.6.3.bn2.running_var", "module.resnet34.6.4.conv1.weight", "module.resnet34.6.4.bn1.weight", "module.resnet34.6.4.bn1.bias", "module.resnet34.6.4.bn1.running_mean", "module.resnet34.6.4.bn1.running_var", "module.resnet34.6.4.conv2.weight", "module.resnet34.6.4.bn2.weight", "module.resnet34.6.4.bn2.bias", "module.resnet34.6.4.bn2.running_mean", "module.resnet34.6.4.bn2.running_var", "module.resnet34.6.5.conv1.weight", "module.resnet34.6.5.bn1.weight", "module.resnet34.6.5.bn1.bias", "module.resnet34.6.5.bn1.running_mean", "module.resnet34.6.5.bn1.running_var", "module.resnet34.6.5.conv2.weight", "module.resnet34.6.5.bn2.weight", "module.resnet34.6.5.bn2.bias", "module.resnet34.6.5.bn2.running_mean", "module.resnet34.6.5.bn2.running_var", "module.resnet34.7.0.conv1.weight", "module.resnet34.7.0.bn1.weight", "module.resnet34.7.0.bn1.bias", "module.resnet34.7.0.bn1.running_mean", "module.resnet34.7.0.bn1.running_var", "module.resnet34.7.0.conv2.weight", "module.resnet34.7.0.bn2.weight", "module.resnet34.7.0.bn2.bias", "module.resnet34.7.0.bn2.running_mean", "module.resnet34.7.0.bn2.running_var", "module.resnet34.7.0.downsample.0.weight", "module.resnet34.7.0.downsample.1.weight", "module.resnet34.7.0.downsample.1.bias", "module.resnet34.7.0.downsample.1.running_mean", "module.resnet34.7.0.downsample.1.running_var", "module.resnet34.7.1.conv1.weight", "module.resnet34.7.1.bn1.weight", "module.resnet34.7.1.bn1.bias", "module.resnet34.7.1.bn1.running_mean", "module.resnet34.7.1.bn1.running_var", "module.resnet34.7.1.conv2.weight", "module.resnet34.7.1.bn2.weight", "module.resnet34.7.1.bn2.bias", "module.resnet34.7.1.bn2.running_mean", "module.resnet34.7.1.bn2.running_var", "module.resnet34.7.2.conv1.weight", "module.resnet34.7.2.bn1.weight", "module.resnet34.7.2.bn1.bias", "module.resnet34.7.2.bn1.running_mean", "module.resnet34.7.2.bn1.running_var", "module.resnet34.7.2.conv2.weight", "module.resnet34.7.2.bn2.weight", "module.resnet34.7.2.bn2.bias", "module.resnet34.7.2.bn2.running_mean", "module.resnet34.7.2.bn2.running_var". 
	Unexpected key(s) in state_dict: "module.model._conv_stem.weight", "module.model._bn0.weight", "module.model._bn0.bias", "module.model._bn0.running_mean", "module.model._bn0.running_var", "module.model._bn0.num_batches_tracked", "module.model._blocks.0._depthwise_conv.weight", "module.model._blocks.0._bn1.weight", "module.model._blocks.0._bn1.bias", "module.model._blocks.0._bn1.running_mean", "module.model._blocks.0._bn1.running_var", "module.model._blocks.0._bn1.num_batches_tracked", "module.model._blocks.0._se_reduce.weight", "module.model._blocks.0._se_reduce.bias", "module.model._blocks.0._se_expand.weight", "module.model._blocks.0._se_expand.bias", "module.model._blocks.0._project_conv.weight", "module.model._blocks.0._bn2.weight", "module.model._blocks.0._bn2.bias", "module.model._blocks.0._bn2.running_mean", "module.model._blocks.0._bn2.running_var", "module.model._blocks.0._bn2.num_batches_tracked", "module.model._blocks.1._depthwise_conv.weight", "module.model._blocks.1._bn1.weight", "module.model._blocks.1._bn1.bias", "module.model._blocks.1._bn1.running_mean", "module.model._blocks.1._bn1.running_var", "module.model._blocks.1._bn1.num_batches_tracked", "module.model._blocks.1._se_reduce.weight", "module.model._blocks.1._se_reduce.bias", "module.model._blocks.1._se_expand.weight", "module.model._blocks.1._se_expand.bias", "module.model._blocks.1._project_conv.weight", "module.model._blocks.1._bn2.weight", "module.model._blocks.1._bn2.bias", "module.model._blocks.1._bn2.running_mean", "module.model._blocks.1._bn2.running_var", "module.model._blocks.1._bn2.num_batches_tracked", "module.model._blocks.2._expand_conv.weight", "module.model._blocks.2._bn0.weight", "module.model._blocks.2._bn0.bias", "module.model._blocks.2._bn0.running_mean", "module.model._blocks.2._bn0.running_var", "module.model._blocks.2._bn0.num_batches_tracked", "module.model._blocks.2._depthwise_conv.weight", "module.model._blocks.2._bn1.weight", "module.model._blocks.2._bn1.bias", "module.model._blocks.2._bn1.running_mean", "module.model._blocks.2._bn1.running_var", "module.model._blocks.2._bn1.num_batches_tracked", "module.model._blocks.2._se_reduce.weight", "module.model._blocks.2._se_reduce.bias", "module.model._blocks.2._se_expand.weight", "module.model._blocks.2._se_expand.bias", "module.model._blocks.2._project_conv.weight", "module.model._blocks.2._bn2.weight", "module.model._blocks.2._bn2.bias", "module.model._blocks.2._bn2.running_mean", "module.model._blocks.2._bn2.running_var", "module.model._blocks.2._bn2.num_batches_tracked", "module.model._blocks.3._expand_conv.weight", "module.model._blocks.3._bn0.weight", "module.model._blocks.3._bn0.bias", "module.model._blocks.3._bn0.running_mean", "module.model._blocks.3._bn0.running_var", "module.model._blocks.3._bn0.num_batches_tracked", "module.model._blocks.3._depthwise_conv.weight", "module.model._blocks.3._bn1.weight", "module.model._blocks.3._bn1.bias", "module.model._blocks.3._bn1.running_mean", "module.model._blocks.3._bn1.running_var", "module.model._blocks.3._bn1.num_batches_tracked", "module.model._blocks.3._se_reduce.weight", "module.model._blocks.3._se_reduce.bias", "module.model._blocks.3._se_expand.weight", "module.model._blocks.3._se_expand.bias", "module.model._blocks.3._project_conv.weight", "module.model._blocks.3._bn2.weight", "module.model._blocks.3._bn2.bias", "module.model._blocks.3._bn2.running_mean", "module.model._blocks.3._bn2.running_var", "module.model._blocks.3._bn2.num_batches_tracked", "module.model._blocks.4._expand_conv.weight", "module.model._blocks.4._bn0.weight", "module.model._blocks.4._bn0.bias", "module.model._blocks.4._bn0.running_mean", "module.model._blocks.4._bn0.running_var", "module.model._blocks.4._bn0.num_batches_tracked", "module.model._blocks.4._depthwise_conv.weight", "module.model._blocks.4._bn1.weight", "module.model._blocks.4._bn1.bias", "module.model._blocks.4._bn1.running_mean", "module.model._blocks.4._bn1.running_var", "module.model._blocks.4._bn1.num_batches_tracked", "module.model._blocks.4._se_reduce.weight", "module.model._blocks.4._se_reduce.bias", "module.model._blocks.4._se_expand.weight", "module.model._blocks.4._se_expand.bias", "module.model._blocks.4._project_conv.weight", "module.model._blocks.4._bn2.weight", "module.model._blocks.4._bn2.bias", "module.model._blocks.4._bn2.running_mean", "module.model._blocks.4._bn2.running_var", "module.model._blocks.4._bn2.num_batches_tracked", "module.model._blocks.5._expand_conv.weight", "module.model._blocks.5._bn0.weight", "module.model._blocks.5._bn0.bias", "module.model._blocks.5._bn0.running_mean", "module.model._blocks.5._bn0.running_var", "module.model._blocks.5._bn0.num_batches_tracked", "module.model._blocks.5._depthwise_conv.weight", "module.model._blocks.5._bn1.weight", "module.model._blocks.5._bn1.bias", "module.model._blocks.5._bn1.running_mean", "module.model._blocks.5._bn1.running_var", "module.model._blocks.5._bn1.num_batches_tracked", "module.model._blocks.5._se_reduce.weight", "module.model._blocks.5._se_reduce.bias", "module.model._blocks.5._se_expand.weight", "module.model._blocks.5._se_expand.bias", "module.model._blocks.5._project_conv.weight", "module.model._blocks.5._bn2.weight", "module.model._blocks.5._bn2.bias", "module.model._blocks.5._bn2.running_mean", "module.model._blocks.5._bn2.running_var", "module.model._blocks.5._bn2.num_batches_tracked", "module.model._blocks.6._expand_conv.weight", "module.model._blocks.6._bn0.weight", "module.model._blocks.6._bn0.bias", "module.model._blocks.6._bn0.running_mean", "module.model._blocks.6._bn0.running_var", "module.model._blocks.6._bn0.num_batches_tracked", "module.model._blocks.6._depthwise_conv.weight", "module.model._blocks.6._bn1.weight", "module.model._blocks.6._bn1.bias", "module.model._blocks.6._bn1.running_mean", "module.model._blocks.6._bn1.running_var", "module.model._blocks.6._bn1.num_batches_tracked", "module.model._blocks.6._se_reduce.weight", "module.model._blocks.6._se_reduce.bias", "module.model._blocks.6._se_expand.weight", "module.model._blocks.6._se_expand.bias", "module.model._blocks.6._project_conv.weight", "module.model._blocks.6._bn2.weight", "module.model._blocks.6._bn2.bias", "module.model._blocks.6._bn2.running_mean", "module.model._blocks.6._bn2.running_var", "module.model._blocks.6._bn2.num_batches_tracked", "module.model._blocks.7._expand_conv.weight", "module.model._blocks.7._bn0.weight", "module.model._blocks.7._bn0.bias", "module.model._blocks.7._bn0.running_mean", "module.model._blocks.7._bn0.running_var", "module.model._blocks.7._bn0.num_batches_tracked", "module.model._blocks.7._depthwise_conv.weight", "module.model._blocks.7._bn1.weight", "module.model._blocks.7._bn1.bias", "module.model._blocks.7._bn1.running_mean", "module.model._blocks.7._bn1.running_var", "module.model._blocks.7._bn1.num_batches_tracked", "module.model._blocks.7._se_reduce.weight", "module.model._blocks.7._se_reduce.bias", "module.model._blocks.7._se_expand.weight", "module.model._blocks.7._se_expand.bias", "module.model._blocks.7._project_conv.weight", "module.model._blocks.7._bn2.weight", "module.model._blocks.7._bn2.bias", "module.model._blocks.7._bn2.running_mean", "module.model._blocks.7._bn2.running_var", "module.model._blocks.7._bn2.num_batches_tracked", "module.model._blocks.8._expand_conv.weight", "module.model._blocks.8._bn0.weight", "module.model._blocks.8._bn0.bias", "module.model._blocks.8._bn0.running_mean", "module.model._blocks.8._bn0.running_var", "module.model._blocks.8._bn0.num_batches_tracked", "module.model._blocks.8._depthwise_conv.weight", "module.model._blocks.8._bn1.weight", "module.model._blocks.8._bn1.bias", "module.model._blocks.8._bn1.running_mean", "module.model._blocks.8._bn1.running_var", "module.model._blocks.8._bn1.num_batches_tracked", "module.model._blocks.8._se_reduce.weight", "module.model._blocks.8._se_reduce.bias", "module.model._blocks.8._se_expand.weight", "module.model._blocks.8._se_expand.bias", "module.model._blocks.8._project_conv.weight", "module.model._blocks.8._bn2.weight", "module.model._blocks.8._bn2.bias", "module.model._blocks.8._bn2.running_mean", "module.model._blocks.8._bn2.running_var", "module.model._blocks.8._bn2.num_batches_tracked", "module.model._blocks.9._expand_conv.weight", "module.model._blocks.9._bn0.weight", "module.model._blocks.9._bn0.bias", "module.model._blocks.9._bn0.running_mean", "module.model._blocks.9._bn0.running_var", "module.model._blocks.9._bn0.num_batches_tracked", "module.model._blocks.9._depthwise_conv.weight", "module.model._blocks.9._bn1.weight", "module.model._blocks.9._bn1.bias", "module.model._blocks.9._bn1.running_mean", "module.model._blocks.9._bn1.running_var", "module.model._blocks.9._bn1.num_batches_tracked", "module.model._blocks.9._se_reduce.weight", "module.model._blocks.9._se_reduce.bias", "module.model._blocks.9._se_expand.weight", "module.model._blocks.9._se_expand.bias", "module.model._blocks.9._project_conv.weight", "module.model._blocks.9._bn2.weight", "module.model._blocks.9._bn2.bias", "module.model._blocks.9._bn2.running_mean", "module.model._blocks.9._bn2.running_var", "module.model._blocks.9._bn2.num_batches_tracked", "module.model._blocks.10._expand_conv.weight", "module.model._blocks.10._bn0.weight", "module.model._blocks.10._bn0.bias", "module.model._blocks.10._bn0.running_mean", "module.model._blocks.10._bn0.running_var", "module.model._blocks.10._bn0.num_batches_tracked", "module.model._blocks.10._depthwise_conv.weight", "module.model._blocks.10._bn1.weight", "module.model._blocks.10._bn1.bias", "module.model._blocks.10._bn1.running_mean", "module.model._blocks.10._bn1.running_var", "module.model._blocks.10._bn1.num_batches_tracked", "module.model._blocks.10._se_reduce.weight", "module.model._blocks.10._se_reduce.bias", "module.model._blocks.10._se_expand.weight", "module.model._blocks.10._se_expand.bias", "module.model._blocks.10._project_conv.weight", "module.model._blocks.10._bn2.weight", "module.model._blocks.10._bn2.bias", "module.model._blocks.10._bn2.running_mean", "module.model._blocks.10._bn2.running_var", "module.model._blocks.10._bn2.num_batches_tracked", "module.model._blocks.11._expand_conv.weight", "module.model._blocks.11._bn0.weight", "module.model._blocks.11._bn0.bias", "module.model._blocks.11._bn0.running_mean", "module.model._blocks.11._bn0.running_var", "module.model._blocks.11._bn0.num_batches_tracked", "module.model._blocks.11._depthwise_conv.weight", "module.model._blocks.11._bn1.weight", "module.model._blocks.11._bn1.bias", "module.model._blocks.11._bn1.running_mean", "module.model._blocks.11._bn1.running_var", "module.model._blocks.11._bn1.num_batches_tracked", "module.model._blocks.11._se_reduce.weight", "module.model._blocks.11._se_reduce.bias", "module.model._blocks.11._se_expand.weight", "module.model._blocks.11._se_expand.bias", "module.model._blocks.11._project_conv.weight", "module.model._blocks.11._bn2.weight", "module.model._blocks.11._bn2.bias", "module.model._blocks.11._bn2.running_mean", "module.model._blocks.11._bn2.running_var", "module.model._blocks.11._bn2.num_batches_tracked", "module.model._blocks.12._expand_conv.weight", "module.model._blocks.12._bn0.weight", "module.model._blocks.12._bn0.bias", "module.model._blocks.12._bn0.running_mean", "module.model._blocks.12._bn0.running_var", "module.model._blocks.12._bn0.num_batches_tracked", "module.model._blocks.12._depthwise_conv.weight", "module.model._blocks.12._bn1.weight", "module.model._blocks.12._bn1.bias", "module.model._blocks.12._bn1.running_mean", "module.model._blocks.12._bn1.running_var", "module.model._blocks.12._bn1.num_batches_tracked", "module.model._blocks.12._se_reduce.weight", "module.model._blocks.12._se_reduce.bias", "module.model._blocks.12._se_expand.weight", "module.model._blocks.12._se_expand.bias", "module.model._blocks.12._project_conv.weight", "module.model._blocks.12._bn2.weight", "module.model._blocks.12._bn2.bias", "module.model._blocks.12._bn2.running_mean", "module.model._blocks.12._bn2.running_var", "module.model._blocks.12._bn2.num_batches_tracked", "module.model._blocks.13._expand_conv.weight", "module.model._blocks.13._bn0.weight", "module.model._blocks.13._bn0.bias", "module.model._blocks.13._bn0.running_mean", "module.model._blocks.13._bn0.running_var", "module.model._blocks.13._bn0.num_batches_tracked", "module.model._blocks.13._depthwise_conv.weight", "module.model._blocks.13._bn1.weight", "module.model._blocks.13._bn1.bias", "module.model._blocks.13._bn1.running_mean", "module.model._blocks.13._bn1.running_var", "module.model._blocks.13._bn1.num_batches_tracked", "module.model._blocks.13._se_reduce.weight", "module.model._blocks.13._se_reduce.bias", "module.model._blocks.13._se_expand.weight", "module.model._blocks.13._se_expand.bias", "module.model._blocks.13._project_conv.weight", "module.model._blocks.13._bn2.weight", "module.model._blocks.13._bn2.bias", "module.model._blocks.13._bn2.running_mean", "module.model._blocks.13._bn2.running_var", "module.model._blocks.13._bn2.num_batches_tracked", "module.model._blocks.14._expand_conv.weight", "module.model._blocks.14._bn0.weight", "module.model._blocks.14._bn0.bias", "module.model._blocks.14._bn0.running_mean", "module.model._blocks.14._bn0.running_var", "module.model._blocks.14._bn0.num_batches_tracked", "module.model._blocks.14._depthwise_conv.weight", "module.model._blocks.14._bn1.weight", "module.model._blocks.14._bn1.bias", "module.model._blocks.14._bn1.running_mean", "module.model._blocks.14._bn1.running_var", "module.model._blocks.14._bn1.num_batches_tracked", "module.model._blocks.14._se_reduce.weight", "module.model._blocks.14._se_reduce.bias", "module.model._blocks.14._se_expand.weight", "module.model._blocks.14._se_expand.bias", "module.model._blocks.14._project_conv.weight", "module.model._blocks.14._bn2.weight", "module.model._blocks.14._bn2.bias", "module.model._blocks.14._bn2.running_mean", "module.model._blocks.14._bn2.running_var", "module.model._blocks.14._bn2.num_batches_tracked", "module.model._blocks.15._expand_conv.weight", "module.model._blocks.15._bn0.weight", "module.model._blocks.15._bn0.bias", "module.model._blocks.15._bn0.running_mean", "module.model._blocks.15._bn0.running_var", "module.model._blocks.15._bn0.num_batches_tracked", "module.model._blocks.15._depthwise_conv.weight", "module.model._blocks.15._bn1.weight", "module.model._blocks.15._bn1.bias", "module.model._blocks.15._bn1.running_mean", "module.model._blocks.15._bn1.running_var", "module.model._blocks.15._bn1.num_batches_tracked", "module.model._blocks.15._se_reduce.weight", "module.model._blocks.15._se_reduce.bias", "module.model._blocks.15._se_expand.weight", "module.model._blocks.15._se_expand.bias", "module.model._blocks.15._project_conv.weight", "module.model._blocks.15._bn2.weight", "module.model._blocks.15._bn2.bias", "module.model._blocks.15._bn2.running_mean", "module.model._blocks.15._bn2.running_var", "module.model._blocks.15._bn2.num_batches_tracked", "module.model._blocks.16._expand_conv.weight", "module.model._blocks.16._bn0.weight", "module.model._blocks.16._bn0.bias", "module.model._blocks.16._bn0.running_mean", "module.model._blocks.16._bn0.running_var", "module.model._blocks.16._bn0.num_batches_tracked", "module.model._blocks.16._depthwise_conv.weight", "module.model._blocks.16._bn1.weight", "module.model._blocks.16._bn1.bias", "module.model._blocks.16._bn1.running_mean", "module.model._blocks.16._bn1.running_var", "module.model._blocks.16._bn1.num_batches_tracked", "module.model._blocks.16._se_reduce.weight", "module.model._blocks.16._se_reduce.bias", "module.model._blocks.16._se_expand.weight", "module.model._blocks.16._se_expand.bias", "module.model._blocks.16._project_conv.weight", "module.model._blocks.16._bn2.weight", "module.model._blocks.16._bn2.bias", "module.model._blocks.16._bn2.running_mean", "module.model._blocks.16._bn2.running_var", "module.model._blocks.16._bn2.num_batches_tracked", "module.model._blocks.17._expand_conv.weight", "module.model._blocks.17._bn0.weight", "module.model._blocks.17._bn0.bias", "module.model._blocks.17._bn0.running_mean", "module.model._blocks.17._bn0.running_var", "module.model._blocks.17._bn0.num_batches_tracked", "module.model._blocks.17._depthwise_conv.weight", "module.model._blocks.17._bn1.weight", "module.model._blocks.17._bn1.bias", "module.model._blocks.17._bn1.running_mean", "module.model._blocks.17._bn1.running_var", "module.model._blocks.17._bn1.num_batches_tracked", "module.model._blocks.17._se_reduce.weight", "module.model._blocks.17._se_reduce.bias", "module.model._blocks.17._se_expand.weight", "module.model._blocks.17._se_expand.bias", "module.model._blocks.17._project_conv.weight", "module.model._blocks.17._bn2.weight", "module.model._blocks.17._bn2.bias", "module.model._blocks.17._bn2.running_mean", "module.model._blocks.17._bn2.running_var", "module.model._blocks.17._bn2.num_batches_tracked", "module.model._blocks.18._expand_conv.weight", "module.model._blocks.18._bn0.weight", "module.model._blocks.18._bn0.bias", "module.model._blocks.18._bn0.running_mean", "module.model._blocks.18._bn0.running_var", "module.model._blocks.18._bn0.num_batches_tracked", "module.model._blocks.18._depthwise_conv.weight", "module.model._blocks.18._bn1.weight", "module.model._blocks.18._bn1.bias", "module.model._blocks.18._bn1.running_mean", "module.model._blocks.18._bn1.running_var", "module.model._blocks.18._bn1.num_batches_tracked", "module.model._blocks.18._se_reduce.weight", "module.model._blocks.18._se_reduce.bias", "module.model._blocks.18._se_expand.weight", "module.model._blocks.18._se_expand.bias", "module.model._blocks.18._project_conv.weight", "module.model._blocks.18._bn2.weight", "module.model._blocks.18._bn2.bias", "module.model._blocks.18._bn2.running_mean", "module.model._blocks.18._bn2.running_var", "module.model._blocks.18._bn2.num_batches_tracked", "module.model._blocks.19._expand_conv.weight", "module.model._blocks.19._bn0.weight", "module.model._blocks.19._bn0.bias", "module.model._blocks.19._bn0.running_mean", "module.model._blocks.19._bn0.running_var", "module.model._blocks.19._bn0.num_batches_tracked", "module.model._blocks.19._depthwise_conv.weight", "module.model._blocks.19._bn1.weight", "module.model._blocks.19._bn1.bias", "module.model._blocks.19._bn1.running_mean", "module.model._blocks.19._bn1.running_var", "module.model._blocks.19._bn1.num_batches_tracked", "module.model._blocks.19._se_reduce.weight", "module.model._blocks.19._se_reduce.bias", "module.model._blocks.19._se_expand.weight", "module.model._blocks.19._se_expand.bias", "module.model._blocks.19._project_conv.weight", "module.model._blocks.19._bn2.weight", "module.model._blocks.19._bn2.bias", "module.model._blocks.19._bn2.running_mean", "module.model._blocks.19._bn2.running_var", "module.model._blocks.19._bn2.num_batches_tracked", "module.model._blocks.20._expand_conv.weight", "module.model._blocks.20._bn0.weight", "module.model._blocks.20._bn0.bias", "module.model._blocks.20._bn0.running_mean", "module.model._blocks.20._bn0.running_var", "module.model._blocks.20._bn0.num_batches_tracked", "module.model._blocks.20._depthwise_conv.weight", "module.model._blocks.20._bn1.weight", "module.model._blocks.20._bn1.bias", "module.model._blocks.20._bn1.running_mean", "module.model._blocks.20._bn1.running_var", "module.model._blocks.20._bn1.num_batches_tracked", "module.model._blocks.20._se_reduce.weight", "module.model._blocks.20._se_reduce.bias", "module.model._blocks.20._se_expand.weight", "module.model._blocks.20._se_expand.bias", "module.model._blocks.20._project_conv.weight", "module.model._blocks.20._bn2.weight", "module.model._blocks.20._bn2.bias", "module.model._blocks.20._bn2.running_mean", "module.model._blocks.20._bn2.running_var", "module.model._blocks.20._bn2.num_batches_tracked", "module.model._blocks.21._expand_conv.weight", "module.model._blocks.21._bn0.weight", "module.model._blocks.21._bn0.bias", "module.model._blocks.21._bn0.running_mean", "module.model._blocks.21._bn0.running_var", "module.model._blocks.21._bn0.num_batches_tracked", "module.model._blocks.21._depthwise_conv.weight", "module.model._blocks.21._bn1.weight", "module.model._blocks.21._bn1.bias", "module.model._blocks.21._bn1.running_mean", "module.model._blocks.21._bn1.running_var", "module.model._blocks.21._bn1.num_batches_tracked", "module.model._blocks.21._se_reduce.weight", "module.model._blocks.21._se_reduce.bias", "module.model._blocks.21._se_expand.weight", "module.model._blocks.21._se_expand.bias", "module.model._blocks.21._project_conv.weight", "module.model._blocks.21._bn2.weight", "module.model._blocks.21._bn2.bias", "module.model._blocks.21._bn2.running_mean", "module.model._blocks.21._bn2.running_var", "module.model._blocks.21._bn2.num_batches_tracked", "module.model._blocks.22._expand_conv.weight", "module.model._blocks.22._bn0.weight", "module.model._blocks.22._bn0.bias", "module.model._blocks.22._bn0.running_mean", "module.model._blocks.22._bn0.running_var", "module.model._blocks.22._bn0.num_batches_tracked", "module.model._blocks.22._depthwise_conv.weight", "module.model._blocks.22._bn1.weight", "module.model._blocks.22._bn1.bias", "module.model._blocks.22._bn1.running_mean", "module.model._blocks.22._bn1.running_var", "module.model._blocks.22._bn1.num_batches_tracked", "module.model._blocks.22._se_reduce.weight", "module.model._blocks.22._se_reduce.bias", "module.model._blocks.22._se_expand.weight", "module.model._blocks.22._se_expand.bias", "module.model._blocks.22._project_conv.weight", "module.model._blocks.22._bn2.weight", "module.model._blocks.22._bn2.bias", "module.model._blocks.22._bn2.running_mean", "module.model._blocks.22._bn2.running_var", "module.model._blocks.22._bn2.num_batches_tracked", "module.model._conv_head.weight", "module.model._bn1.weight", "module.model._bn1.bias", "module.model._bn1.running_mean", "module.model._bn1.running_var", "module.model._bn1.num_batches_tracked", "module.model._fc.weight", "module.model._fc.bias". 
	size mismatch for module.classifier.weight: copying a param with shape torch.Size([4, 1408]) from checkpoint, the shape in current model is torch.Size([4, 512]).

In [None]:
# multi sample dropout - x
# mixed precision training - x
# differential learning rate - x