In [20]:
#--Pytorch 
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.models import resnet50
from torch import nn
import torch
#--other
from imutils import paths
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm 
import time
import os
import cv2

### The dataset is already built. You can check the "Transfer_learning_via_feature_extraction" file to get more details on how to build it

### Set the configurations

In [3]:
class Config:

    DATA_PATH = "flower_photos" #where the pictures are
    BASE_PATH = "dataset" #the new path of the dataset. Ex: dataset/flower_class/picture1.jpg

    VAL_SPLIT = 0.1
    TEST_SPLIT = 0.1
    TRAIN = os.path.join(BASE_PATH, "train")
    VAL = os.path.join(BASE_PATH, "val")
    TEST = os.path.join(BASE_PATH, "test")

    #This mean and standard deviation values were used to train the resnet50 model. 
    # they are defined for the each of the RGB channels
    # We need them to be the same so we don't encounter problems when normalizing the data and then feeding it to the model
    MEAN = [0.485, 0.456, 0.406]
    STD = [0.229, 0.224, 0.225]
    IMAGE_SIZE = 224

    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    FEATURE_EXTRACTION_BATCH_SIZE = 256
    FINETUNE_BATCH_SIZE = 64
    PRED_BATCH_SIZE = 4
    EPOCHS = 20
    LR = 0.001
    LR_FINETUNE = 0.0005

    WARMUP_PLOT = os.path.join("output", "warmup.png")
    FINETUNE_PLOT = os.path.join("output", "finetune.png")
    WARMUP_MODEL = os.path.join("output", "warmup_model.pth")
    FINETUNE_MODEL = os.path.join("output", "finetune_model.pth")

#Instantiate the class (so we can access all the parameters as methods)
config = Config()

### Quickly check some images

In [4]:
imagePaths = list(paths.list_images(config.TRAIN))
random_i = random.choices(imagePaths, k=5)

for path in random_i:
    im = cv2.imread(path)
    #im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) #due to linux
    label = str.split(path, "/")[2]
    cv2.imshow(label, im)
    cv2.waitKey()
    
cv2.destroyAllWindows() 

### Define function for loading data, DataLaoder and pass transformations

In [4]:
def get_data_loader(rootDir, transforms, batch_size, shuffle=True):
    ds = datasets.ImageFolder(rootDir, transform=transforms)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers=os.cpu_count(), pin_memory=True if config.DEVICE == "cuda" else False)
    return(ds, loader)

### Define transformations

In [5]:
#For training data
trainTransform = transforms.Compose([
    transforms.RandomResizedCrop(config.IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(90),
    transforms.ToTensor(),
    transforms.Normalize(mean=config.MEAN, std=config.STD)
])
#For validation
valTransform = transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=config.MEAN, std=config.STD)
])
#For test
testTransform = transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=config.MEAN, std=config.STD)
])

### create the dataloaders

In [6]:
#Train
(trainDS, trainLoader) = get_data_loader(config.TRAIN, transforms=trainTransform, batch_size=config.FINETUNE_BATCH_SIZE)
#Val
(valDS, valLoader) = get_data_loader(config.VAL, transforms=valTransform, batch_size=config.FINETUNE_BATCH_SIZE, shuffle=False)
#test
(testDS, testLoader) = get_data_loader(config.TEST, transforms=testTransform, batch_size=config.FINETUNE_BATCH_SIZE, shuffle=False)

### Fine tuning

In [9]:
#get themodel

model = resnet50(weights=True)
numfeatures = model.fc.in_features



In [10]:
# loop over the modules of the model and set the parameters of  batch normalization modules as not trainable
for module, param in zip(model.modules(), model.parameters()):
    if isinstance(module, nn.BatchNorm2d):
        param.requires_grad = False

In [12]:
#Define the new head to attach to the model
headModel = nn.Sequential(
    nn.Linear(numfeatures, 512), #here we connect the previous paramaters
    nn.ReLU(),
    nn.Dropout(0.25),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, len(trainDS.classes))
)
model.fc = headModel

In [13]:
model =model.to(config.DEVICE)

In [14]:
#Loss function
lossFunc = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=config.LR)

#Steps per epochs
trainSteps = len(trainDS)//config.FINETUNE_BATCH_SIZE
valSteps = len(valDS)//config.FINETUNE_BATCH_SIZE

In [15]:
# initialize a dictionary to store training history
H = {"train_loss": [], "train_acc": [], "val_loss": [],
	"val_acc": []}

In [22]:
startTime = time.time()

for e in tqdm(range(20)):
    model.train()

    totalTrainLoss = 0
    totalValloss = 0

    trainCorrect = 0
    valCorrect = 0

    for (i, (x,y)) in enumerate(trainLoader):
        (x,y) = (x.to(config.DEVICE), y.to(config.DEVICE))

        pred = model(x)
        loss = lossFunc(pred, y)

    #Define 3 important steps

        loss.backward()

        if (i+2) %2 ==0:
            opt.step()
            opt.zero_grad()

        totalTrainLoss += loss
        trainCorrect += (pred.argmax(1)==y).type(torch.float).sum().item()

    with torch.no_grad():
        model.eval()

        for (x, y) in valLoader:
            (x, y) = (x.to(config.DEVICE), y.to(config.DEVICE))

            pred = model(x)
            totalValloss += lossFunc(pred, y)

            valCorrect += (pred.argmax(1) == y).type(torch.flat).sum().item()

    avgTrainLoss = totalTrainLoss / trainSteps
    avgValLoss = totalValloss / valSteps

    trainCorrect = trainCorrect/len(trainDS)
    valCorrect =valCorrect/len(valDS)


    H["train_loss"].append(avgTrainLoss.cpu().detach().numpy()) #only for losses
    H["train_acc"].append(trainCorrect)
    H["train_loss"].append(avgValLoss.cpu().detach().numpy()) #only for losses
    H["train_acc"].append(valCorrect)

    print("Epoch: {}/{}".format(e+1, config.EPOCHS))
    print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(avgTrainLoss, trainCorrect))
    print("Val loss {:.6f}, Val accuracy: {:.4f}".format(avgValLoss, valCorrect))

endTime = time.time()

print("[INFO] total time taken to train the model: {:.2f}s".format(endTime - startTime))

  0%|          | 0/20 [00:09<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 38.00 MiB (GPU 0; 5.94 GiB total capacity; 5.24 GiB already allocated; 3.75 MiB free; 5.31 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
plt.plot(H["train_loss"], label="train_loss")
plt.plot(H["val_loss"], label="val_loss")
plt.plot(H["train_acc"], label="train_acc")
plt.plot(H["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(config.FINETUNE_PLOT)

# serialize the model to disk
torch.save(model, config.FINETUNE_MODEL)

In [17]:
config.EPOCHS

20