In [16]:
import random
from pandas.core.common import flatten

In [50]:
import optuna

import torch
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler

In [18]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

from TomatoGrayscaleDataset import TomatoGrayscaleDataset
import glob

In [52]:
# this ensures that the current MacOS version is at least 12.3+
print(torch.backends.mps.is_available())
# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

True
True


In [20]:
train_transforms = A.Compose(
    [
        A.SmallestMaxSize(max_size=350),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
        A.RandomCrop(height=256, width=256),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.2),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
        ToTensorV2(),
    ]
)

test_transforms = A.Compose(
    [
        A.SmallestMaxSize(max_size=350),
        A.CenterCrop(height=256, width=256),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

In [21]:
train_data_path = './data/grayscale/train'
test_data_path = './data/grayscale/test'

train_image_paths = []
classes = []

for data_path in glob.glob(train_data_path + '/*'):
    classes.append(data_path.split('/')[-1])
    train_image_paths.append(glob.glob(data_path + '/*'))

train_image_paths = list(flatten(train_image_paths))
random.shuffle(train_image_paths)

print('train_image_path example: ', train_image_paths[0])
print('class example: ', classes[0])

test_image_paths = []
for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*'))

test_image_paths = list(flatten(test_image_paths))

print("Train size: {}\nTest size: {}".format(len(train_image_paths), len(test_image_paths)))

In [23]:
class TomatoGrayscaleDataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]
        if self.transform is not None:
            image = self.transform(image=image)["image"]

        return image, label

train_dataset = TomatoGrayscaleDataset(train_image_paths,train_transforms)
test_dataset = TomatoGrayscaleDataset(test_image_paths,test_transforms)

In [53]:
def get_data_loaders(batch_size):

    trainloader = DataLoader(train_dataset, batch_size=batch_size,
                              num_workers=2, shuffle=True)

    testloader = DataLoader(test_dataset, batch_size=batch_size,
                             num_workers=2, shuffle=True)

    return trainloader, testloader

In [54]:
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

In [55]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    test_loss = running_loss / len(dataloader.dataset)
    test_acc = running_corrects.double() / len(dataloader.dataset)
    return test_loss, test_acc.item()

In [56]:
def objective(trial):
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    num_epochs = trial.suggest_int('num_epochs', 10, 100)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0, 0.5)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)

    device = torch.device("cpu")
    model = models.squeezenet1_1(pretrained=True)
    model.classifier[1] = torch.nn.Conv2d(512, 10, kernel_size=(1,1), stride=(1,1))
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = torch.nn.CrossEntropyLoss()
    trainloader, testloader = get_data_loaders(batch_size)

    for epoch in range(num_epochs):
        train(model, trainloader, optimizer, criterion, device)
        test_loss, test_acc = evaluate(model, testloader, criterion, device)
        trial.report(test_acc, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return test_acc

In [57]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[32m[I 2023-04-15 05:52:14,643][0m A new study created in memory with name: no-name-fae62cac-e7f0-41a2-9f24-10820b23f8d2[0m
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0, 0.5)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)


train_image_path example:  ./data/grayscale/train/yellowleafcurlvirus/40120ef4-16d4-4178-a76c-2de54651c37c___YLCV_NREC 2722.JPG
class example:  lateblight
Train size: 14468
Test size: 3692
train_image_path example:  ./data/grayscale/train/yellowleafcurlvirus/5ab6543c-e9ec-4b47-b8c5-ab41e27dffb1___YLCV_NREC 2139.JPG
class example:  lateblight
Train size: 14468
Test size: 3692
train_image_path example:  ./data/grayscale/train/yellowleafcurlvirus/4d69dd8a-409d-4f7f-841d-a07fdcb718ac___YLCV_GCREC 2071.JPG
class example:  lateblight
Train size: 14468
Test size: 3692
train_image_path example:  ./data/grayscale/train/bacterialspot/bc87927b-7858-4d02-ada3-d8dc075bd82b___UF.GRC_BS_Lab Leaf 9048.JPG
class example:  lateblight
Train size: 14468
Test size: 3692
train_image_path example:  ./data/grayscale/train/spidermites/51e4eabc-fe2b-44cc-88c8-bc95dafb5a57___Com.G_SpM_FL 8528.JPG
class example:  lateblight
Train size: 14468
Test size: 3692
train_image_path example:  ./data/grayscale/train/latebl

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x169eaaa60>
Traceback (most recent call last):
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1475, in __del__
    self._shutdown_workers()
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1439, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/Users/aritrar/miniforge3/envs/torch-gpu/lib/python3.8/selectors

KeyboardInterrupt: 

In [None]:
print("Best trial:")
best_trial = study.best_trial
print(f"  Value: {best_trial.value}")
print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

In [None]:
best_lr = best_trial.params['lr']
best_batch_size = best_trial.params['batch_size']
best_num_epochs = best_trial.params['num_epochs']
best_dropout_rate = best_trial.params['dropout_rate']
best_weight_decay = best_trial.params['weight_decay']

In [None]:
model = models.squeezenet1_1(pretrained=True)
model.classifier[1] = torch.nn.Conv2d(512, 10, kernel_size=(1,1), stride=(1,1))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
criterion = torch.nn.CrossEntropyLoss()
trainloader, testloader = get_data_loaders(best_batch_size)

In [None]:
for epoch in range(best_num_epochs):
    train(model, trainloader, optimizer, criterion, device)
    test_loss, test_acc = evaluate(model, testloader, criterion, device)
    print(f"Epoch {epoch+1}: Test accuracy = {test_acc:.4f}")