In [37]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from mpl_toolkits.axes_grid1 import ImageGrid
plt.ion()
from torch.utils.data.sampler import SubsetRandomSampler
#%matplotlib inline

In [85]:
# Data augmentation and normalization for training 
# Just normalization for validation

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224), 
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
    ]),
    'val':transforms.Compose([
        transforms.Resize(320),
        transforms.CenterCrop(280),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229, 0.224, 0.225])
    ])    
}

In [86]:
#image_dataset Attributes:
# classes (list): List of the class names.
# class_to_idx (dict): Dict with items (class_name, class_index).
# imgs (list): List of (image path, class_index) tuples
            
#data_dir='/home/mnt/jupyter_dir/hymenoptera_data'
data_dir='/home/mnt/jupyter_dir/imagewoof'
#data_dir='/home/mnt/jupyter_dir/tiny-imagenet-200'
#data_dir='/home/mnt/jupyter_dir/stanford-cars'
image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), 
                        data_transforms[x]) for x in ['train', 'val'] }

image_datasets

{'train': Dataset ImageFolder
     Number of datapoints: 12454
     Root location: /home/mnt/jupyter_dir/imagewoof/train,
 'val': Dataset ImageFolder
     Number of datapoints: 500
     Root location: /home/mnt/jupyter_dir/imagewoof/val}

In [87]:
dataloaders= {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16, 
                                             shuffle=True, num_workers=0)
              for x in ['train', 'val'] }
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [88]:
# Get a batch of training data
inputs, classes = next(iter(dataloaders['val']))

inputs.size()

torch.Size([16, 3, 280, 280])

In [None]:
#plot each image using grid axis 
def imshow(axis, inp):
    """Denormalize and show"""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    axis.imshow((inp * 255).astype(np.uint8))

In [None]:
train = dataloaders['train']
# Get a batch of training data with labels
img, label = next(iter(train))
print(img.size(), label.size())
fig = plt.figure(1, figsize=(16, 4))
#make a grid of (2,8)
grid = ImageGrid(fig, 111, nrows_ncols=(2, 8), axes_pad=0.05)    
for i in range(img.size()[0]):
    ax = grid[i]
    imshow(ax, img[i])

In [16]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, flag_Load=True):
    
    since= time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    
    
    for epoch in range(num_epochs-1):
        print('Epoch {}/{}'.format(epoch, num_epochs-1))
        print('-'* 10)
        # load the model from disk if flag_Load is True
        if flag_Load==True:
            print('Loading model from disk')
            flag_Load=False
            checkpoint = torch.load('/home/mnt/jupyter_dir/saved_model_dog/resnet101_28.pth.tar')
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epoch = checkpoint['epoch']
            loss = checkpoint['loss']
            best_acc = checkpoint['best_prec1']
            
        # Each epoch has a training and validation phase
        for phase in ['train','val']:
            
            if phase == 'train':
                
                scheduler.step()
                model.train() # set model to training mode
            else:
                
                model.eval() # Set model to evaluate mode
            
            running_loss = 0.0
            running_corrects = 0
            
            #Iterate over data
            for inputs, labels in dataloaders[phase]:
                
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                #zero the parameter gradients
                optimizer.zero_grad()
                
                #forward
                # track history if only in train
                
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    out = torch.squeeze(outputs, dim=2)
                    out2 = torch.squeeze(out, dim=2)
                    #print(labels.shape)
                    _, preds = torch.max(out2,1)
                    loss = criterion(out2, labels)
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item()*inputs.size(0)
                running_corrects += torch.sum(preds==labels.data)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc> best_acc:
                #print('input shape for validation', inputs.shape)
                best_acc= epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
            print()
            #save number of epochs, model weights, optimization hyper-parameters, loss and best_acc each epoch 
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                'best_prec1': best_acc}, '/home/mnt/jupyter_dir/saved_model_dog/resnet101_'+str(epoch)+'.pth.tar')
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed//60, time_elapsed % 60))
    print('Best val Acc : {:4f}'.format(best_acc))
    
    #save entire model
    torch.save(model,'/home/mnt/jupyter_dir/saved_model_dog/resnet101.pth.tar')
    
    # load best weights
    model.load_state_dict(best_model_wts)
    
    return model

In [46]:
class ResNet50Bottom(nn.Module):
    def __init__(self, original_model):
        super(ResNet50Bottom, self).__init__()
        self.features = nn.Sequential(*list(original_model.children())[:-2])
        self.conv = nn.Conv2d(2048, 10, kernel_size=(7, 7), stride=(1, 1), bias=False)
        
    def forward(self, x):
        x = self.features(x)
        x = self.conv(x)
        return x

res50_model = torchvision.models.resnet101(pretrained=True)
for param in res50_model.parameters():
    param.requires_grad = False
res_fcn = ResNet50Bottom(res50_model)
inputs, classes = next(iter(dataloaders['train']))
res_fcn = res_fcn.to(device)
inputs  = inputs.to(device)
classes = classes.to(device)
outputs = res_fcn(inputs)
outputs.data.shape  

torch.Size([16, 10, 1, 1])

In [47]:
criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.RMSprop(res_fcn.conv.parameters(), lr=5e-5, momentum=0.9, weight_decay=2e-4)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=20, gamma=0.5)

In [32]:
res_conv2 = train_model(res_conv2, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=100, flag_Load=False)

Epoch 0/29
----------
train Loss: 510.4882 Acc: 0.8323

val Loss: 276.2523 Acc: 0.9220

Epoch 1/29
----------
train Loss: 527.1313 Acc: 0.8338

val Loss: 388.4131 Acc: 0.9100

Epoch 2/29
----------
train Loss: 554.1853 Acc: 0.8376

val Loss: 241.1769 Acc: 0.9140

Epoch 3/29
----------
train Loss: 526.0953 Acc: 0.8442

val Loss: 660.0433 Acc: 0.8880

Epoch 4/29
----------
train Loss: 573.8999 Acc: 0.8408

val Loss: 269.2233 Acc: 0.9200

Epoch 5/29
----------
train Loss: 565.9997 Acc: 0.8393

val Loss: 371.0456 Acc: 0.9080

Epoch 6/29
----------
train Loss: 425.9030 Acc: 0.8676

val Loss: 319.7437 Acc: 0.9260

Epoch 7/29
----------
train Loss: 391.7818 Acc: 0.8760

val Loss: 239.3853 Acc: 0.9360

Epoch 8/29
----------
train Loss: 383.0352 Acc: 0.8716

val Loss: 218.2293 Acc: 0.9400

Epoch 9/29
----------
train Loss: 363.0457 Acc: 0.8763

val Loss: 243.4358 Acc: 0.9360

Epoch 10/29
----------
train Loss: 365.0151 Acc: 0.8722

val Loss: 220.4575 Acc: 0.9380

Epoch 11/29
----------
train Lo

  "type " + obj.__name__ + ". It won't be checked "


In [48]:
res_fcn= train_model(res_fcn, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=100, flag_Load=False)

Epoch 0/99
----------
train Loss: 4.9282 Acc: 0.7879

val Loss: 4.2401 Acc: 0.8860

Epoch 1/99
----------
train Loss: 5.1532 Acc: 0.8165

val Loss: 3.0396 Acc: 0.9160

Epoch 2/99
----------
train Loss: 5.5796 Acc: 0.8258

val Loss: 2.7954 Acc: 0.9040

Epoch 3/99
----------
train Loss: 5.5537 Acc: 0.8373

val Loss: 2.9296 Acc: 0.9280

Epoch 4/99
----------
train Loss: 5.7970 Acc: 0.8356

val Loss: 4.6166 Acc: 0.9060

Epoch 5/99
----------
train Loss: 5.8081 Acc: 0.8383

val Loss: 3.7867 Acc: 0.9120

Epoch 6/99
----------
train Loss: 5.9344 Acc: 0.8421

val Loss: 4.0190 Acc: 0.9220

Epoch 7/99
----------
train Loss: 5.7662 Acc: 0.8465

val Loss: 4.9635 Acc: 0.9080

Epoch 8/99
----------
train Loss: 6.1165 Acc: 0.8426

val Loss: 3.4955 Acc: 0.9140

Epoch 9/99
----------
train Loss: 5.9119 Acc: 0.8478

val Loss: 3.9294 Acc: 0.9180

Epoch 10/99
----------
train Loss: 6.2463 Acc: 0.8444

val Loss: 3.0142 Acc: 0.9240

Epoch 11/99
----------
train Loss: 5.8789 Acc: 0.8507

val Loss: 2.9462 Acc

val Loss: 1.2864 Acc: 0.9100

Epoch 97/99
----------
train Loss: 1.2720 Acc: 0.9020

val Loss: 1.0633 Acc: 0.9200

Epoch 98/99
----------
train Loss: 1.2892 Acc: 0.8976

val Loss: 0.8843 Acc: 0.9360

Training complete in 385m 47s
Best val Acc : 0.940000


  "type " + obj.__name__ + ". It won't be checked "


In [None]:
optimizer = optim.RMSprop(fcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

In [None]:
n_class    = 20

batch_size = 6
epochs     = 500
lr         = 1e-4
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5

In [None]:
# hyperparmas:
seed = 42
workers = 8
epochs = 2  # 100
crop_size = 512
lr = 5e-5
weight_decay = 2e-4
batch_size = 2

In [33]:
from torchvision.models.resnet import BasicBlock, ResNet

In [None]:
checkpoint = torch.load('/home/mnt/jupyter_dir/saved_model_dog/resnet101_28.pth.tar')
            model.load_state_dict(checkpoint['model_state_dict'])

In [163]:
res_fcn.eval()
running_corrects = 0.0
since = time.time()
for inputs, labels in dataloaders['val']:
    
    inputs = inputs.to(device)
    labels = labels.to(device)
    outputs = res_fcn(inputs)
    out = torch.squeeze(outputs, dim=2)
    out2 = torch.squeeze(out, dim=2)
    _, preds = torch.max(out2,1)
    print(preds)
    # statistics
    #preds =torch.mean(preds.float(), dim=2,keepdim=True)
    #preds =torch.mean(preds.float(), dim=1,keepdim=True)
    #print(preds)
    running_corrects += torch.sum(preds.long()==labels.data)

acc = running_corrects.double() / dataset_sizes['val']
print('{} Acc: {:.4f}'.format("Testing", acc))
print()
time_elapsed = time.time() - since
print('Testing complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))    

tensor([[[8, 8, 8],
         [8, 8, 8],
         [8, 8, 8]],

        [[9, 9, 9],
         [9, 9, 9],
         [9, 9, 9]],

        [[4, 4, 4],
         [4, 4, 4],
         [4, 4, 4]],

        [[5, 5, 5],
         [5, 5, 5],
         [5, 5, 5]],

        [[6, 6, 6],
         [6, 6, 6],
         [6, 6, 6]],

        [[4, 4, 4],
         [0, 1, 1],
         [0, 8, 8]],

        [[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]],

        [[4, 4, 4],
         [4, 4, 4],
         [4, 4, 4]],

        [[0, 0, 7],
         [0, 0, 5],
         [0, 0, 7]],

        [[2, 2, 2],
         [2, 2, 2],
         [0, 2, 2]],

        [[5, 5, 5],
         [5, 5, 5],
         [5, 5, 5]],

        [[6, 6, 6],
         [6, 6, 6],
         [6, 6, 6]],

        [[6, 6, 6],
         [6, 6, 6],
         [6, 6, 6]],

        [[6, 6, 6],
         [6, 6, 6],
         [6, 6, 6]],

        [[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]],

        [[6, 6, 6],
         [6, 6, 6],
         [6, 6, 6]]], device='

RuntimeError: The size of tensor a (3) must match the size of tensor b (16) at non-singleton dimension 2

In [186]:
a = np.random.randint(low = 0,high = 9, size =(16, 3,3))

In [191]:
for key in range(a.shape[0]):
    print(a[key,:,:])
    unique, counts = numpy.unique(a[key,:,:], return_counts=True)
    print(dict(zip(unique, counts)))

[[8 8 0]
 [3 2 0]
 [8 7 7]]
{0: 2, 2: 1, 3: 1, 7: 2, 8: 3}
[[3 2 8]
 [3 1 8]
 [4 1 5]]
{1: 2, 2: 1, 3: 2, 4: 1, 5: 1, 8: 2}
[[6 0 7]
 [3 8 2]
 [2 6 0]]
{0: 2, 2: 2, 3: 1, 6: 2, 7: 1, 8: 1}
[[0 3 6]
 [2 4 1]
 [3 4 4]]
{0: 1, 1: 1, 2: 1, 3: 2, 4: 3, 6: 1}
[[8 1 8]
 [0 4 4]
 [1 0 5]]
{0: 2, 1: 2, 4: 2, 5: 1, 8: 2}
[[2 4 1]
 [1 1 0]
 [4 1 8]]
{0: 1, 1: 4, 2: 1, 4: 2, 8: 1}
[[1 1 5]
 [4 4 2]
 [8 5 5]]
{1: 2, 2: 1, 4: 2, 5: 3, 8: 1}
[[8 1 7]
 [2 5 0]
 [4 6 8]]
{0: 1, 1: 1, 2: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 2}
[[5 5 5]
 [7 8 2]
 [7 8 7]]
{2: 1, 5: 3, 7: 3, 8: 2}
[[3 0 2]
 [5 6 4]
 [0 5 1]]
{0: 2, 1: 1, 2: 1, 3: 1, 4: 1, 5: 2, 6: 1}
[[7 5 4]
 [6 0 1]
 [8 5 1]]
{0: 1, 1: 2, 4: 1, 5: 2, 6: 1, 7: 1, 8: 1}
[[6 7 6]
 [0 8 0]
 [2 1 4]]
{0: 2, 1: 1, 2: 1, 4: 1, 6: 2, 7: 1, 8: 1}
[[8 4 1]
 [3 0 4]
 [5 1 3]]
{0: 1, 1: 2, 3: 2, 4: 2, 5: 1, 8: 1}
[[2 6 4]
 [4 0 6]
 [2 6 6]]
{0: 1, 2: 2, 4: 2, 6: 4}
[[1 7 3]
 [4 0 4]
 [4 7 8]]
{0: 1, 1: 1, 3: 1, 4: 3, 7: 2, 8: 1}
[[7 1 4]
 [7 1 1]
 [6 2 1]]
{1: 4, 2: 1, 4:

In [188]:
 unique, counts = numpy.unique(a, return_counts=True)

In [189]:
counts

array([17, 23, 14, 10, 22, 14, 13, 13, 18])

In [176]:
import collections, numpy
collections.Counter(a[0,:,:])
Counter({0: 7, 1: 4, 3: 2, 2: 1, 4: 1})

TypeError: unhashable type: 'numpy.ndarray'

In [192]:
res_fcn

ResNet50Bottom(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (downsample): Sequential(
          (0): Conv2d(64, 25