 I have run the model with 25 samples, it works fine but I couldn't run it on the whole dataset because it takes too long.

In [None]:
import numpy as np
import shutil
import seaborn as sns
from tqdm import tqdm
import torch
import torchvision
from torch import nn
import torch.optim as optim
from torch.utils import data
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix
from torch.utils.data import Subset
from torchvision import models
from collections import OrderedDict
from torchvision import transforms

In [None]:
train_data_path = '../input/animals10/raw-img/'
translate = {"cane": "dog", "cavallo": "horse", "elefante": "elephant", "farfalla": "butterfly",
             "gallina": "chicken", "gatto": "cat", "mucca": "cow", "pecora": "sheep", 
             "ragno": "spider", "scoiattolo": "squirrel" }

In [None]:
# prepare transforms

my_transform = transforms.Compose([transforms.RandomRotation(45),
                                     transforms.RandomRotation(30),
                                     transforms.RandomResizedCrop(1080),
                                     transforms.Resize(512),
                                     transforms.Resize(224),
                                     transforms.RandomRotation(45),
                                      transforms.ToTensor()])

# train_data is whole data for now
train_data = torchvision.datasets.ImageFolder(root=train_data_path,
                                              transform=my_transform)


def create_subsets(train_set): # create a simple list which holds nums from 1 to length of data
    temp_list = list()
    for i in range(len(train_data)):
        temp_list.append(i)
    temp_list = np.array(temp_list)
    np.random.shuffle(temp_list) # shuffle the list and by using this list create subsets (train and test data)
    
    fold = len(train_data)/5
    
    subset_train = Subset(train_data, temp_list[0:int(fold*4)]) # train set
    subset_test = Subset(train_data, temp_list[int(fold*4):]) # test set
    
    return subset_train, subset_test


def create_data_loaders(mini_batch_size, subset_trian, subset_test):
    # create data loaders with given values
    train_data_loader = data.DataLoader(subset_train, shuffle=True, batch_size=mini_batch_size) 
    test_data_loader  = data.DataLoader(subset_test, shuffle=True, batch_size=mini_batch_size)
    
    return train_data_loader, test_data_loader

In [None]:
def create_model(unfreeze_all_layers): # if unfreeze_all_layers = True unfreeze_all else unfreeze only fully connecteds
    model = torchvision.models.vgg19(pretrained=True)

        

    model.classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(25088, 6000)),('relu', nn.ReLU()), # add fully connected 1
                                         ('dropout', nn.Dropout(.5)), 
                                         ('fc2', nn.Linear(6000, 10)), # add fully connected 2
                                         ('output', nn.Softmax(dim=1) )])) # outputs
    
    
    
    if(unfreeze_all_layers): # Case unfreeze all layers
        for name, param in model.named_parameters():
            param.requires_grad = True 
    else: # # unfreeze only fully connected layers
        for name, param in model.named_parameters():
            if(name=='classifier.fc1.weight' or name=='classifier.fc1.bias' or name=='classifier.fc2.weight' 
               or name=='classifier.fc2.bias'):
                param.requires_grad = True
        
    
    return model

In [None]:
def gpu_optimizer(model):
    train_on_gpu = torch.cuda.is_available()
    # add optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.008, amsgrad=True)
    loss_fn = torch.nn.CrossEntropyLoss()
    loss_lst, loss_val_lst = [], []
    
    return train_on_gpu, optimizer, loss_fn, loss_lst, loss_val_lst

In [None]:
def seq (model, df, name, optimizer): 
    train_loss = 0.0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    for batch_i, (data, target) in enumerate(df):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
            model.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = loss_fn(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        if name == 'train': 
            loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss 
        train_loss += loss.item()
        _, pred = torch.max(output, 1) 
        # compare predictions to true label
        correct_tensor = pred.eq(target.data.view_as(pred))
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        for i in range(len(target.data)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
        
    return class_correct, class_total, train_loss

In [None]:
def trainModel(model, train_loader,valid_loader, optimizer, num_epochs): 
    # number of epochs to train the model
    n_epochs = num_epochs
    print('started')
    for epoch in range(1, n_epochs+1):     
        train_loss = 0.0
        class_correct = list(0. for i in range(10))
        class_total = list(0. for i in range(10))
        ###################
        # train the model #
        ###################
       # Repeat for each batch in the training set
        model.train()
        class_correct, class_total, train_loss= seq(model,  train_loader, 'train', optimizer)
        printdata(class_correct, class_total, train_loss, epoch, 'train', train_loader)
        # Repeat for each validation batch 
        ###################
        # validate the model #
        ###################
        model.eval()
        class_correct, class_total, train_loss= seq(model, valid_loader, 'validation', optimizer)
        printdata(class_correct, class_total, train_loss, epoch, 'validation', valid_loader)
    torch.save(model.state_dict(), 'model.pt')        
    print(f'model saved ')

In [None]:
def printdata(class_correct, class_total, train_loss, epoch, name, df ): 
    print(f'Epoch %d, loss: %.8f \t{name} Accuracy (Overall): %2d%% (%2d/%2d)' %(epoch,
        train_loss / len(df), 100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))

In [None]:
subset_train, subset_test = create_subsets(train_data)
train_data_loader, test_data_loader = create_data_loaders(128, subset_train, subset_test)

In [None]:
# CASE 1: MODEL CREATED, ALL LAYERS UNFREEZED
model1 = create_model(True) 
print(model1)
train_on_gpu, optimizer, loss_fn, loss_lst, loss_val_lst = gpu_optimizer(model1)
trainModel(model1, train_data_loader, test_data_loader, optimizer, 20)

In [None]:
#cASE 2: MODEL CREATED, ONLY FULLY CONNECTED LAYERS UNFREEZED
model2 = create_model(False) # only fully connected unfreezed 
print(model2)
train_on_gpu, optimizer, loss_fn, loss_lst, loss_val_lst = gpu_optimizer(model2)
trainModel(model2, train_data_loader, test_data_loader, optimizer, 20)