In [1]:
#---Pytorch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.models import resnet50
from torch import nn
import torch
#---Others
from imutils import paths
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import shutil
import os
import time 

### Define the configuration parameters to use

In [2]:
class Config:

    DATA_PATH = "flower_photos" #where the pictures are
    BASE_PATH = "dataset" #the new path of the dataset. Ex: dataset/flower_class/picture1.jpg

    VAL_SPLIT = 0.1
    TEST_SPLIT = 0.1
    TRAIN = os.path.join(BASE_PATH, "train")
    VAL = os.path.join(BASE_PATH, "val")
    TEST = os.path.join(BASE_PATH, "test")

    #This mean and standard deviation values were used to train the resnet50 model. 
    # they are defined for the each of the RGB channels
    # We need them to be the same so we don't encounter problems when normalizing the data and then feeding it to the model
    MEAN = [0.485, 0.456, 0.406]
    STD = [0.229, 0.224, 0.225]
    IMAGE_SIZE = 224

    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    FEATURE_EXTRACTION_BATCH_SIZE = 256
    FINETUNE_BATCH_SIZE = 64
    PRED_BATCH_SIZE = 4
    EPOCHS = 20
    LR = 0.001
    LR_FINETUNE = 0.0005

    WARMUP_PLOT = os.path.join("output", "warmup.png")
    FINETUNE_PLOT = os.path.join("output", "finetune.png")
    WARMUP_MODEL = os.path.join("output", "warmup_model.pth")
    FINETUNE_MODEL = os.path.join("output", "finetune_model.pth")

#Instantiate the class (so we can access al the parameters as methods)
config = Config()

### First and foremost: get the data and organize it in train, validation and test

In [3]:
imagePaths = list(paths.list_images(Config.DATA_PATH))
np.random.shuffle(imagePaths)

In [4]:
#check 
str.split(imagePaths[0], '/')[1]

'roses'

In [5]:
def copy_images(imagePath, folder):
    #for a folder (train, val or test) that doesn't exist, create it
    if not os.path.exists(folder):
        os.makedirs(folder)

    for path in imagePath:
        imagename = os.path.split(os.path.sep)[-1]
        label = str.split(path, "/")[1]
        new_path = os.path.join(folder, label)
        print(new_path)

        if not os.path.exists(new_path):
            os.makedirs(new_path)
        
        #Copy each image to the new destination
        destination = os.path.join(new_path, imagename)
        shutil.copy(path, destination)

In [6]:
#Obtain the proportions of how much data goes to each folder
testPathlen = int(len(imagePaths) * config.TEST_SPLIT)
difference_test = int(len(imagePaths) - testPathlen)
trainPaths = imagePaths[:difference_test]
testPaths = imagePaths[difference_test:]

#Now validation
valPathlen = int(len(trainPaths)*config.VAL_SPLIT)
difference_val = int(len(trainPaths) - valPathlen)
trainpaths = trainPaths[:difference_val]
valPaths = trainPaths[difference_val:]

In [7]:
# copy_images(trainPaths, config.TRAIN)
# copy_images(trainPaths, config.VAL)
# copy_images(trainPaths, config.TEST)

### Load the data, define DataLoader and pass transformations

In [8]:
def get_data_loader(rootDir, transforms, batchSize, shuffle=True):
    ds = datasets.ImageFolder(rootDir, transform=transforms) #get the data and make transformations(ex: normalize, resize, augmentations)
    loader = DataLoader(ds, batch_size=batchSize, shuffle=shuffle, num_workers=os.cpu_count(), pin_memory= True if config.DEVICE == "cuda" else False) #move the dataset to dataloader
    return(ds, loader)

In [21]:
#Define the tranformations to do

#For training data
trainTransform = transforms.Compose([
    transforms.RandomResizedCrop(config.IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(90),
    transforms.ToTensor(), #necessary for all3 datasets
    transforms.Normalize(mean=config.MEAN, std=config.STD)]) #necessary for all 3 datasets

#For validation
valTransform = transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=config.MEAN, std=config.STD),
    ])

#For test
testTransform =  transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=config.MEAN, std = config.STD)
])

In [22]:
#Use the function for data loader defined previously and pass the datasets

#For training
(trainDS, trainLoader) = get_data_loader(config.TRAIN, transforms=trainTransform, batchSize=config.FINETUNE_BATCH_SIZE, shuffle=True) 
#For validation
(valDS, valLoader) = get_data_loader(config.VAL, transforms= valTransform, batchSize= config.FINETUNE_BATCH_SIZE, shuffle=False) #remember to not shuffle
#For test
(testDS, testLoader) = get_data_loader(config.TEST, transforms= testTransform, batchSize= config.FINETUNE_BATCH_SIZE, shuffle= False) #same here

### Transfer learning via feature extraction

In [23]:
#load model
model = resnet50(weights=True)

#Due to this being feature extraction, we set the parameters as non trainable. This is done in order to use the already trained filters/parameters to analyze the new data
for param in model.parameters():
    param.requires_grad = False



In [24]:
#Now, we append a new fully connected layer(s) on top of the model
modelOutputFeats = model.fc.in_features #get the output
model.fc = nn.Linear(modelOutputFeats, len(trainDS.classes))
model = model.to(config.DEVICE)

In [25]:
#Initialize the loss function and optimizer
lossFunc = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.fc.parameters(), lr=config.LR) #use only the parameters of the top part of the NN. the one we are defining

In [26]:
#Calculate steps per epoch 
trainSteps = len(trainDS)//config.FEATURE_EXTRACTION_BATCH_SIZE
valSteps = len(valDS)//config.FEATURE_EXTRACTION_BATCH_SIZE
testSteps = len(testDS)//config.FEATURE_EXTRACTION_BATCH_SIZE

### Train the model 

In [27]:
startTime = time.time()

for e in tqdm(range(config.EPOCHS)):
    model.train()

    totalTrainloss = 0
    totalValloss = 0

    trainCorrect = 0
    valCorrect = 0

    for (i, (x, y)) in enumerate(trainLoader):
        (x,y) = (x.to(config.DEVICE), y.to(config.DEVICE))

        pred = model(x)
        loss = lossFunc(pred, y)

        #Code the 3 important steps
        loss.backward()

        # check if we are updating the model parameters and if so
		# update them
        if (i+2)%2==0:
            opt.step()
            opt.zero_grad() 
         
        totalTrainloss += loss
        trainCorrect += (pred.argmax(1)==y).type(torch.float).sum().item()
    
    with torch.no_grad():
        model.eval()

        for (x,y) in valLoader:
            (x,y) = (x.to(config.DEVICE), y.to(config.DEVICE))
            
            pred = model(x)
            totalValloss += lossFunc(pred, y)

            valCorrect += (pred.argmax(1) == y).type(torch.float).sum().item()

    # calculate the average training and validation loss
    avgTrainLoss = totalTrainloss / trainSteps
    avgValLoss = totalValloss / valSteps

	# calculate the training and validation accuracy
    trainCorrect = trainCorrect / len(trainDS)
    valCorrect = valCorrect / len(valDS)


	# print the model training and validation information
    print("[INFO] EPOCH: {}/{}".format(e + 1, config.EPOCHS))
    print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(	avgTrainLoss, trainCorrect))
    print("Val loss: {:.6f}, Val accuracy: {:.4f}".format(avgValLoss, valCorrect))

# display the total time needed to perform the training
endTime = time.time()
print("[INFO] total time taken to train the model: {:.2f}s".format(endTime - startTime))

torch.save(model, config.FINETUNE_MODEL)

  5%|▌         | 1/20 [02:42<51:32, 162.78s/it]

[INFO] EPOCH: 1/20
Train loss: 4.762776, Train accuracy: 0.6076
Val loss: 2.927257, Val accuracy: 0.7829


 10%|█         | 2/20 [03:14<25:43, 85.76s/it] 

[INFO] EPOCH: 2/20
Train loss: 3.042628, Train accuracy: 0.7620
Val loss: 2.340154, Val accuracy: 0.8093
