I used DenseNet-169, and got 92% test accuracy.

Since PyTorch is easy to read, you would be able to understand what I did reading the following code. So let me explain Pros/Cons of my implementation/results.

Pros:

* Simple training loop, with unified training/validation/test code.
* Basic data augmentation (random rotation and flip).
    * Considering the characteristic of the images, I thought rotation can be applied in addition to flipping.

Cons/Possible improvements:

* Tried only DenseNet-169 this time
    * If we have much resources at inference stage, we may be able to use ensemble too.
* Somehow accuracy drops after 20 epochs or so
    * Maybe able to solve by dropping learning rate after loss converges (`ReduceLROnPlateau`)

I wasn't able to do these because I ran the code below in Google Colab, which limits training hour.

In [0]:
import copy
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.data as data
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import Image
from tqdm import tqdm

In [0]:
def get_train_val_loader(root_dir, batch_size, num_workers, pin_memory, n_splits=100, shuffle=True):
    # compose transform
    train_dataset_transform = transforms.Compose([
        #  Random resized crop may be beneficial for real data, whose sizes of the star images are different.
        #  transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(180),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    val_dataset_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    train_dataset = datasets.ImageFolder(root_dir, transform=train_dataset_transform)
    val_dataset = datasets.ImageFolder(root_dir, transform=val_dataset_transform)
    test_dataset = datasets.ImageFolder(root_dir, transform=val_dataset_transform)

    indices = list(range(len(train_dataset)))
    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, val_idx, test_idx = indices[:-n_splits*2], indices[:n_splits], indices[n_splits:n_splits*2]

    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    print("train:", len(train_sampler), 'items')
    print("val:", len(val_sampler), 'items')
    print("val:", len(test_sampler), 'items')

    # define dataloaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size, sampler=val_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=batch_size, sampler=test_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    return train_loader, val_loader, test_loader

In [11]:
train_loader, val_loader, test_loader= get_train_val_loader("/content/drive/My Drive/images",batch_size=32,num_workers=4,pin_memory=True,n_splits=500)
dataloaders = {'train':train_loader, 'val':val_loader, 'test': test_loader}

train: 9000 items
val: 500 items
val: 500 items


In [0]:
# create a 2-class densenet classifier
CNN_model = models.densenet169(pretrained=True, progress=True)
CNN_model.classifier = torch.nn.Linear(CNN_model.classifier.in_features, 2)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
CNN_model.to(device)
optimizer = torch.optim.SGD(CNN_model.parameters(), lr=0.007, weight_decay=0.03, momentum=0.9)
# I didn't peek test score when deciding these hyperparameters

In [0]:
best_loss = 300000
num_epochs=30
criterion=torch.nn.CrossEntropyLoss()
running_loss=0
score=0
best_loss = 1000000

In [14]:
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-' * 10)
    for phase in ['train', 'val', 'test']:
        if phase == 'train':
            CNN_model.train()
        else:
            CNN_model.eval()

        # Iterate over each dataset
        #  for inputs, labels in tqdm(dataloaders[phase]):
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)
            # zero out the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if in train phase
            with torch.set_grad_enabled(phase == 'train'):
                outputs = CNN_model(inputs)
                loss = criterion(outputs, labels)

                # backward + optimize in train phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # predict
            _, pred = outputs.max(dim=1)
            score += sum(pred.eq(labels).float())

            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(dataloaders[phase].sampler)
        print('{} accuracy: {:.4f}'.format(phase, score/len(dataloaders[phase].sampler)))
        print('{} loss: {:.4f}'.format(phase, epoch_loss))
        running_loss=0
        score=0
        # keep the weight if highest val_acc is achieved
        if phase == 'val' and epoch_loss < best_loss:
            best_model_wts = copy.deepcopy(CNN_model.state_dict())
            best_loss = epoch_loss

Epoch 1/30
----------
train accuracy: 0.6920
train loss: 0.5512
val accuracy: 0.8560
val loss: 0.3662
test accuracy: 0.8660
test loss: 0.3919
Epoch 2/30
----------
train accuracy: 0.8553
train loss: 0.3043
val accuracy: 0.5480
val loss: 0.8124
test accuracy: 0.5620
test loss: 0.8241
Epoch 3/30
----------
train accuracy: 0.8640
train loss: 0.2990
val accuracy: 0.5060
val loss: 0.7765
test accuracy: 0.5120
test loss: 0.7143
Epoch 4/30
----------
train accuracy: 0.8412
train loss: 0.3436
val accuracy: 0.5100
val loss: 4.7954
test accuracy: 0.5280
test loss: 4.3630
Epoch 5/30
----------
train accuracy: 0.8472
train loss: 0.3305
val accuracy: 0.8900
val loss: 0.3160
test accuracy: 0.8900
test loss: 0.3173
Epoch 6/30
----------
train accuracy: 0.8284
train loss: 0.3749
val accuracy: 0.5100
val loss: 1.4582
test accuracy: 0.5100
test loss: 1.4451
Epoch 7/30
----------
train accuracy: 0.8198
train loss: 0.3863
val accuracy: 0.5060
val loss: 1.9719
test accuracy: 0.5100
test loss: 1.8772
Epoch 

The highest validation accuracy is achieved in epoch 9 (89.20%). The test accuracy of the epoch is 92.00%.