In [62]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

from sklearn.model_selection import KFold

In [56]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = os.path.join("C:\\", "Hackathons", "Oracle", "Reto1", "hackathon-nuwe-oracle")
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x))
                  for x in ['train']}
# image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
#                                           data_transforms[x])
#                   for x in ['train']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [57]:
type(image_datasets['train'])

torchvision.datasets.folder.ImageFolder

In [55]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line 54, in fetch
    return self.collate_fn(data)
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line 264, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line 142, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line 142, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
  File "C:\Users\locoxsoco\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line 150, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>


In [84]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    k_folds = 5
    kfold = KFold(n_splits=k_folds, shuffle=True)
    
    for fold, (train_ids, valid_ids) in enumerate(kfold.split(image_datasets['train'])):
        print(f'FOLD {fold}')
        print('--------------------------------')
        train_subsampler = torch.utils.data.Subset(image_datasets['train'],indices=train_ids)
        valid_subsampler = torch.utils.data.Subset(image_datasets['train'],indices=valid_ids)
        
        print(type(train_subsampler))
        train_subsampler.dataset.transform = data_transforms['train']
        valid_subsampler.dataset.transform = data_transforms['val']
        
        train_loader = torch.utils.data.DataLoader(train_subsampler, batch_size=4, shuffle=True, num_workers=4)
        valid_loader = torch.utils.data.DataLoader(valid_subsampler, batch_size=4, shuffle=True, num_workers=4)
        
        kfold_dataloaders = {'train': train_loader,'val': valid_loader}
        dataset_sizes = {'train': len(train_ids),'val': len(valid_ids)}
    
        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in kfold_dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

            print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [85]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [86]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 8)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [87]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=25)

FOLD 0
--------------------------------
<class 'torch.utils.data.dataset.Subset'>
Epoch 0/24
----------
train Loss: 1.0865 Acc: 0.6562
val Loss: 0.5738 Acc: 0.8289

Epoch 1/24
----------
train Loss: 0.7944 Acc: 0.7523
val Loss: 0.5742 Acc: 0.8387

Epoch 2/24
----------
train Loss: 0.5951 Acc: 0.8168
val Loss: 0.6495 Acc: 0.8155

Epoch 3/24
----------
train Loss: 0.4940 Acc: 0.8455
val Loss: 0.5086 Acc: 0.8566

Epoch 4/24
----------
train Loss: 0.3758 Acc: 0.8847
val Loss: 0.5061 Acc: 0.8668

Epoch 5/24
----------
train Loss: 0.3755 Acc: 0.8904
val Loss: 0.4739 Acc: 0.8664

Epoch 6/24
----------
train Loss: 0.2661 Acc: 0.9205
val Loss: 0.4611 Acc: 0.8740

Epoch 7/24
----------
train Loss: 0.1466 Acc: 0.9590
val Loss: 0.3995 Acc: 0.8945

Epoch 8/24
----------
train Loss: 0.0978 Acc: 0.9724
val Loss: 0.3875 Acc: 0.8968

Epoch 9/24
----------
train Loss: 0.0822 Acc: 0.9783
val Loss: 0.3860 Acc: 0.8932

Epoch 10/24
----------
train Loss: 0.0604 Acc: 0.9841
val Loss: 0.3789 Acc: 0.8981

Epoc

train Loss: 0.1858 Acc: 0.9542
val Loss: 0.0033 Acc: 0.9991

Epoch 20/24
----------
train Loss: 0.1813 Acc: 0.9534
val Loss: 0.0028 Acc: 0.9991

Epoch 21/24
----------
train Loss: 0.1790 Acc: 0.9520
val Loss: 0.0033 Acc: 0.9991

Epoch 22/24
----------
train Loss: 0.1913 Acc: 0.9534
val Loss: 0.0035 Acc: 0.9991

Epoch 23/24
----------
train Loss: 0.1750 Acc: 0.9548
val Loss: 0.0034 Acc: 0.9991

Epoch 24/24
----------
train Loss: 0.1863 Acc: 0.9539
val Loss: 0.0032 Acc: 0.9991

FOLD 4
--------------------------------
<class 'torch.utils.data.dataset.Subset'>
Epoch 0/24
----------
train Loss: 0.1725 Acc: 0.9562
val Loss: 0.0028 Acc: 0.9996

Epoch 1/24
----------
train Loss: 0.1809 Acc: 0.9533
val Loss: 0.0023 Acc: 0.9996

Epoch 2/24
----------
train Loss: 0.1864 Acc: 0.9535
val Loss: 0.0022 Acc: 0.9996

Epoch 3/24
----------
train Loss: 0.1728 Acc: 0.9551
val Loss: 0.0025 Acc: 0.9996

Epoch 4/24
----------
train Loss: 0.1795 Acc: 0.9550
val Loss: 0.0022 Acc: 0.9996

Epoch 5/24
----------


In [90]:
torch.save(model_ft.state_dict(), os.path.join(data_dir,'model_resnet18_25epoch_per_5fold.pth'))

In [128]:
def test_model(model):
    was_training = model.training
    model.eval()

    total_preds = torch.empty(0)
    total_preds = total_preds.to(device)
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader_test):
            inputs = inputs.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            total_preds = torch.cat((total_preds,preds),0)

        model.train(mode=was_training)
        return total_preds

In [129]:
image_dataset_test = datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['val'])
dataloader_test = torch.utils.data.DataLoader(image_dataset_test, batch_size=4,shuffle=False, num_workers=4)

In [130]:
preds = test_model(model_ft)

In [131]:
preds = preds.cpu()
preds = preds.numpy()

In [132]:
preds

array([0., 7., 1., ..., 7., 4., 2.], dtype=float32)

In [161]:
preds[0]

0.0

In [138]:
import pandas as pd

In [139]:
df_test = pd.read_csv('test.csv')
df_test.head()

Unnamed: 0,idx_test,path_img
0,0,ec4c75ba-e064-4ed0-a7b1-3ed899424110.jpeg
1,1,114fb94d-3223-4d95-b6d1-f6a6ac206ea2.jpeg
2,2,a80e7e27-a25a-40aa-a65c-a29bfa172b3d.jpeg
3,3,7848d24e-207a-4236-9cd7-df78122d8787.jpeg
4,4,57f726bc-d141-42fb-88fa-60d554499277.jpeg


In [195]:
dict_preds = dict()
for i, a in enumerate(image_dataset_test):
    contain_values = df_test[df_test['path_img'].str.contains(image_dataset_test.imgs[i][0][56:])]
    #print(contain_values['idx_test'].index[0] + " " + preds[i])
    dict_preds[contain_values['idx_test'].index[0]] = int(preds[i])



In [196]:
dict_preds

{3949: 0,
 313: 7,
 3455: 1,
 3611: 7,
 3329: 4,
 4158: 3,
 4151: 2,
 217: 3,
 518: 5,
 975: 2,
 1597: 6,
 698: 3,
 2952: 6,
 527: 3,
 4072: 5,
 132: 5,
 1152: 0,
 4739: 5,
 2550: 7,
 2900: 4,
 1592: 0,
 4785: 1,
 2961: 4,
 3200: 3,
 3541: 2,
 112: 3,
 2530: 2,
 299: 3,
 2939: 7,
 2980: 7,
 973: 7,
 1972: 5,
 3110: 7,
 1875: 0,
 1942: 3,
 3238: 5,
 3236: 2,
 1765: 3,
 931: 2,
 2977: 7,
 368: 7,
 4722: 3,
 869: 2,
 3632: 3,
 2984: 4,
 3420: 4,
 2311: 1,
 878: 5,
 826: 4,
 801: 2,
 2073: 5,
 735: 7,
 3613: 7,
 2990: 4,
 397: 6,
 2263: 1,
 1923: 6,
 1011: 0,
 3598: 2,
 1174: 5,
 812: 4,
 2710: 2,
 391: 1,
 3049: 7,
 405: 6,
 3246: 0,
 4309: 5,
 1932: 6,
 3701: 7,
 1046: 7,
 3843: 2,
 345: 7,
 1388: 1,
 382: 1,
 4434: 4,
 909: 2,
 3722: 2,
 4259: 0,
 4715: 5,
 2221: 1,
 304: 3,
 362: 3,
 862: 7,
 2083: 1,
 3784: 7,
 889: 1,
 1084: 3,
 3459: 4,
 89: 1,
 2130: 0,
 4103: 0,
 4125: 7,
 2266: 2,
 1490: 0,
 1966: 1,
 3716: 5,
 2291: 7,
 2066: 4,
 1952: 5,
 220: 5,
 3272: 3,
 1676: 4,
 4443: 4,
 

In [197]:
dict_preds_sorted = dict(sorted(dict_preds.items()))

In [198]:
dict_preds_sorted

{0: 2,
 1: 2,
 2: 6,
 3: 6,
 4: 5,
 5: 5,
 6: 7,
 7: 4,
 8: 3,
 9: 0,
 10: 0,
 11: 5,
 12: 5,
 13: 7,
 14: 2,
 15: 6,
 16: 2,
 17: 5,
 18: 1,
 19: 7,
 20: 3,
 21: 2,
 22: 7,
 23: 2,
 24: 1,
 25: 4,
 26: 5,
 27: 4,
 28: 0,
 29: 6,
 30: 4,
 31: 3,
 32: 4,
 33: 5,
 34: 4,
 35: 6,
 36: 5,
 37: 7,
 38: 3,
 39: 6,
 40: 4,
 41: 7,
 42: 7,
 43: 5,
 44: 5,
 45: 7,
 46: 0,
 47: 1,
 48: 6,
 49: 4,
 50: 4,
 51: 5,
 52: 7,
 53: 0,
 54: 1,
 55: 7,
 56: 3,
 57: 1,
 58: 6,
 59: 6,
 60: 6,
 61: 4,
 62: 7,
 63: 6,
 64: 7,
 65: 4,
 66: 4,
 67: 6,
 68: 4,
 69: 3,
 70: 4,
 71: 4,
 72: 7,
 73: 3,
 74: 3,
 75: 7,
 76: 2,
 77: 7,
 78: 3,
 79: 5,
 80: 7,
 81: 0,
 82: 1,
 83: 6,
 84: 6,
 85: 2,
 86: 4,
 87: 3,
 88: 5,
 89: 1,
 90: 5,
 91: 2,
 92: 0,
 93: 4,
 94: 0,
 95: 0,
 96: 5,
 97: 2,
 98: 7,
 99: 7,
 100: 4,
 101: 6,
 102: 5,
 103: 0,
 104: 4,
 105: 3,
 106: 2,
 107: 6,
 108: 0,
 109: 0,
 110: 7,
 111: 2,
 112: 3,
 113: 7,
 114: 5,
 115: 4,
 116: 3,
 117: 7,
 118: 6,
 119: 7,
 120: 6,
 121: 7,
 122: 1,
 12

In [202]:
dict_preds_sorted_str = dict()
for key in dict_preds_sorted.keys():
    dict_preds_sorted_str[str(key)] = dict_preds_sorted[key]

In [203]:
dict_preds_sorted_str

{'0': 2,
 '1': 2,
 '2': 6,
 '3': 6,
 '4': 5,
 '5': 5,
 '6': 7,
 '7': 4,
 '8': 3,
 '9': 0,
 '10': 0,
 '11': 5,
 '12': 5,
 '13': 7,
 '14': 2,
 '15': 6,
 '16': 2,
 '17': 5,
 '18': 1,
 '19': 7,
 '20': 3,
 '21': 2,
 '22': 7,
 '23': 2,
 '24': 1,
 '25': 4,
 '26': 5,
 '27': 4,
 '28': 0,
 '29': 6,
 '30': 4,
 '31': 3,
 '32': 4,
 '33': 5,
 '34': 4,
 '35': 6,
 '36': 5,
 '37': 7,
 '38': 3,
 '39': 6,
 '40': 4,
 '41': 7,
 '42': 7,
 '43': 5,
 '44': 5,
 '45': 7,
 '46': 0,
 '47': 1,
 '48': 6,
 '49': 4,
 '50': 4,
 '51': 5,
 '52': 7,
 '53': 0,
 '54': 1,
 '55': 7,
 '56': 3,
 '57': 1,
 '58': 6,
 '59': 6,
 '60': 6,
 '61': 4,
 '62': 7,
 '63': 6,
 '64': 7,
 '65': 4,
 '66': 4,
 '67': 6,
 '68': 4,
 '69': 3,
 '70': 4,
 '71': 4,
 '72': 7,
 '73': 3,
 '74': 3,
 '75': 7,
 '76': 2,
 '77': 7,
 '78': 3,
 '79': 5,
 '80': 7,
 '81': 0,
 '82': 1,
 '83': 6,
 '84': 6,
 '85': 2,
 '86': 4,
 '87': 3,
 '88': 5,
 '89': 1,
 '90': 5,
 '91': 2,
 '92': 0,
 '93': 4,
 '94': 0,
 '95': 0,
 '96': 5,
 '97': 2,
 '98': 7,
 '99': 7,
 '100': 4,

In [204]:
import json 
      
# Data to be written 
dictionary ={ "target" : dict_preds_sorted_str} 
      
# Serializing json  
json_object = json.dumps(dictionary, indent = 4) 
print(json_object)

{
    "target": {
        "0": 2,
        "1": 2,
        "2": 6,
        "3": 6,
        "4": 5,
        "5": 5,
        "6": 7,
        "7": 4,
        "8": 3,
        "9": 0,
        "10": 0,
        "11": 5,
        "12": 5,
        "13": 7,
        "14": 2,
        "15": 6,
        "16": 2,
        "17": 5,
        "18": 1,
        "19": 7,
        "20": 3,
        "21": 2,
        "22": 7,
        "23": 2,
        "24": 1,
        "25": 4,
        "26": 5,
        "27": 4,
        "28": 0,
        "29": 6,
        "30": 4,
        "31": 3,
        "32": 4,
        "33": 5,
        "34": 4,
        "35": 6,
        "36": 5,
        "37": 7,
        "38": 3,
        "39": 6,
        "40": 4,
        "41": 7,
        "42": 7,
        "43": 5,
        "44": 5,
        "45": 7,
        "46": 0,
        "47": 1,
        "48": 6,
        "49": 4,
        "50": 4,
        "51": 5,
        "52": 7,
        "53": 0,
        "54": 1,
        "55": 7,
        "56": 3,
        "57": 1,
      

In [187]:
dictionary

{'target': {217: 3,
  313: 7,
  518: 5,
  975: 2,
  3329: 4,
  3455: 1,
  3611: 7,
  3949: 0,
  4151: 2,
  4158: 3}}

In [22]:
base_model = torchvision.models.efficientnet_b5(weights='IMAGENET1K_V1')

In [20]:
base_model.requires_grad_ = False
for layer in base_model.parameters()[-18:]:
    print(layer)
    #if not isinstance(layer, nn.BatchNorm2d):
    #    layer.trainable = True

TypeError: 'generator' object is not subscriptable

In [23]:
print(dir(base_model))

['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_backward_pre_hooks', '_buffers', '_call_impl', '_forward_hooks', '_forward_hooks_with_kwargs', '_forward_impl', '_forward_pre_hooks', '_forward_pre_hooks_with_kwargs', '_get_backward_hooks', '_get_backward_pre_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_post_hooks', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_

In [28]:
print(base_model.parameters())

<generator object Module.parameters at 0x00000227FC99A0B0>
