In [1]:
from tqdm import tqdm

import torch
from torch import nn
from torch.utils.data import DataLoader, random_split

from torchvision import transforms, datasets
from sklearn.metrics import accuracy_score

import PIL

In [2]:
IMAGE_SIZE = 100

EPOCHES = 5
BATCH_SIZE = 256
LEARNING_RATE = 0.000001

PATH_TRAIN = 'Train_data'
PATH_TESTS = 'Tests_data'

In [3]:
class CNN2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),    
            
            nn.Conv2d(16, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),    
            
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
                
            nn.Flatten(),
            nn.Linear(64*10*10, 1000),
            nn.ReLU(),
            nn.Linear(1000, 500),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(500, 131))
        
    def forward(self, xb):
        return self.network(xb)

In [22]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(128, 150, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(150, 200, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(200, 250, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(250, 250, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Flatten(), 
            nn.Linear(250, 4096),
            nn.ReLU(),            
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 131))
        
    def forward(self, xb):
        return self.network(xb)

In [23]:
def random_split_ratio(dataset, test_size=.2, random_state=None):
    second_part = int(len(dataset) * test_size)
    first_part = int(len(dataset)) - second_part

    if random_state:
        first_split, second_split = random_split(dataset, lengths=[first_part, second_part], 
                                                 generator=torch.Generator().manual_seed(random_state))
    else:
        first_split, second_split = random_split(dataset, lengths=[first_part, second_part])

    return first_split, second_split

In [24]:
def verify_image(fp):
    try:
        PIL.Image.open(fp).verify()
        return True
    except:
        return False

In [25]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f'CNN training on {device}')

CNN training on mps


In [26]:
model = CNN()

In [27]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=120),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor()
])

In [28]:
dataset = datasets.ImageFolder(root=PATH_TRAIN, transform=transform, is_valid_file=verify_image)

train_dataset, valid_dataset = random_split_ratio(dataset, random_state=42)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)


tests_dataset = datasets.ImageFolder(root=PATH_TESTS, transform=transform, is_valid_file=verify_image)
tests_loader = DataLoader(tests_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f'Train data length: {len(train_loader.dataset)}\n'
      f'Valid data length: {len(valid_loader.dataset)}\n'
      f'Tests data length: {len(tests_loader.dataset)}')

Train data length: 54154
Valid data length: 13538
Tests data length: 22688


In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [40]:
print(f'GPU memory allocated: {(torch._C._mps_driverAllocatedMemory() / 1073741824):.3f}')
print(f'GPU memory occupied : {(torch._C._mps_currentAllocatedMemory() / 1073741824):.3f}')  
torch.mps.empty_cache()

GPU memory allocated: 3.602
GPU memory occupied : 1.800


In [38]:
for epoch in range(EPOCHES):

    model.to(device)
    
    train_running_loss = 0
    valid_running_loss = 0
    bar = tqdm(train_loader, total=len(train_loader), ncols=100)
    bar.set_description(f"Training:     ")
    
    for x_batch, y_batch in bar:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward() 
        optimizer.step()
        optimizer.zero_grad()

        train_running_loss += (loss.item() * x_batch.size(0))

    with torch.no_grad():
        
        model.eval()

        valid_predict = []  
        valid_targets = []  
        
        vbar = tqdm(valid_loader, total=len(valid_loader), ncols=100)
        vbar.set_description(f"Validation:   ")
        vbar.reset()
        
        for x_batch, y_batch in vbar:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            
            valid_running_loss += (loss.item() * x_batch.size(0))

            valid_predict.extend(torch.argmax(outputs, dim=1).cpu().numpy())
            valid_targets.extend(y_batch.cpu().numpy())

        model.train()

    print(f'Epoch:               {epoch + 1} / {EPOCHES}\n'
          f'Avarage train loss:  {(train_running_loss / len(train_loader.dataset)):.3f}\n'
          f'Avarage valid loss:  {(valid_running_loss / len(valid_loader.dataset)):.3f}\n'
          f'Validation accuracy: {(accuracy_score(valid_targets, valid_predict)):.3f}')
    print(f'GPU memory allocated: {(torch._C._mps_driverAllocatedMemory() / 1073741824):.3f}')
    print(f'GPU memory occupied : {(torch._C._mps_currentAllocatedMemory() / 1073741824):.3f}')    

Training:     :   0%|                                                       | 0/212 [00:01<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 7.79 GB, other allocations: 1.22 GB, max allowed: 9.07 GB). Tried to allocate 78.12 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [None]:
model.eval()

test_predict = []  
test_targets = []  

bar = tqdm(enumerate(tests_loader), total=len(tests_loader), ncols=100)
bar.set_description(f"Testing: ")

for idx_batch, (x_batch, y_batch) in bar:
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    outputs = model(x_batch)
    test_predict.extend(torch.argmax(outputs, dim=1).cpu().numpy())
    test_targets.extend(y_batch.cpu().numpy())
    
accuracy = accuracy_score(test_targets, test_predict)
print(f'Test Accuracy: {100 * accuracy:.2f}%')

In [45]:
torch.save(model.state_dict(), str('fruits_classifier_new.pth'))

In [61]:
model.load_state_dict(torch.load('fruits_classifier_99-76.pth'))

<All keys matched successfully>

In [3]:
torch._C._mps_emptyCache()