In [13]:
import torch
import glob
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from torchinfo import summary


In [14]:
# Check whether Nvidia GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():  # Multi-Process Service
    device = torch.device('mps')
else:
    device = torch.device('cpu')

Set paths for the training and testing data. The training data is located in the seg_train folder, and the testing data is located in the seg_test folder.

In [15]:
# Paths
train_path = '../input/intel-image-classification/seg_train/seg_train/'
test_path = '../input/intel-image-classification/seg_test/seg_test/'

The transforms.Compose function is a convenient way to chain together multiple transformations. In this case, two transformations are being applied:  
- transforms.Resize((150, 150)): This resizes each image to be 150x150 pixels. This is necessary because neural networks typically require that all inputs have the same size.  
- transforms.ToTensor(): This converts the image data from a PIL Image object into a PyTorch tensor, which is the data type expected by PyTorch's neural network classes. It also scales the image's pixel intensity values from 0-255 to 0-1.

In [16]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
])


The ImageFolder class is a PyTorch dataset class that is used to load data from a directory containing subdirectories of images. Each subdirectory represents a different class, and the images within that subdirectory are examples of that class.

Split the training data into training and validation sets using the random_split function. This function takes the dataset to be split and a list of split sizes as input. The split sizes should add up to the length of the dataset. In this case, the training data is split into a 50-50 train-validation split.

Create DataLoader objects for the training, validation, and testing data using the DataLoader class. This class takes the dataset to be loaded, the batch size, and a flag indicating whether to shuffle the data as input. The DataLoader class is used to load data in batches during training and evaluation.

In [17]:
# Load data
train_data = ImageFolder(train_path, transform=transform)
test_data = ImageFolder(test_path, transform=transform)

# Split data
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

## Model
The model is defined using the nn.Sequential class, which allows you to create a neural network by chaining together layers. The model consists of the following layers:

- Convolutional layer with 3 input channels, 32 output channels, a kernel size of 3, and padding of 1
- ReLU activation function
- Max pooling layer with a kernel size of 2 and a stride of 2
- Convolutional layer with 32 input channels, 64 output channels, a kernel size of 3, and padding of 1
- ReLU activation function
- Max pooling layer with a kernel size of 2 and a stride of 2
- Convolutional layer with 64 input channels, 128 output channels, a kernel size of 3, and padding of 1
- ReLU activation function
- Max pooling layer with a kernel size of 2 and a stride of 2
- Fully connected layer with 128 * 18 * 18 input features and 512 output features
- ReLU activation function
- Fully connected layer with 512 input features and 6 output features (one for each class)

In [18]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 18 * 18, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, input_data):
        output = self.pool(nn.functional.relu(self.conv1(input_data)))
        output = self.pool(nn.functional.relu(self.conv2(output)))
        output = self.pool(nn.functional.relu(self.conv3(output)))
        output = output.view(-1, 128 * 18 * 18)
        output = nn.functional.relu(self.fc1(output))
        output = self.fc2(output)
        return output


model = CNN(num_classes=6).to(device)
summary(model, input_size=(64, 3, 150, 150))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [64, 6]                   --
├─Conv2d: 1-1                            [64, 32, 150, 150]        896
├─MaxPool2d: 1-2                         [64, 32, 75, 75]          --
├─Conv2d: 1-3                            [64, 64, 75, 75]          18,496
├─MaxPool2d: 1-4                         [64, 64, 37, 37]          --
├─Conv2d: 1-5                            [64, 128, 37, 37]         73,856
├─MaxPool2d: 1-6                         [64, 128, 18, 18]         --
├─Linear: 1-7                            [64, 512]                 21,234,176
├─Linear: 1-8                            [64, 6]                   3,078
Total params: 21,330,502
Trainable params: 21,330,502
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 15.78
Input size (MB): 17.28
Forward/backward pass size (MB): 642.94
Params size (MB): 85.32
Estimated Total Size (MB): 745.55

In [19]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training & Validation

The training loop consists of two main parts: training and validation. In the training part, the model is set to training mode using model.train(), and the training data is passed through the model in batches. The loss is calculated using the cross-entropy loss function, and the gradients are computed and updated using the optimizer. The training accuracy is also calculated by comparing the model's predictions to the ground truth labels.

In [20]:
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))
print(train_count, test_count)

# Training
num_of_epochs = 25
for epoch in range(num_of_epochs):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):
        images, labels = data

        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_loss += loss.cpu().data * images.size(0)
        _, prediction = torch.max(outputs.data, 1)

        train_accuracy += int(torch.sum(prediction == labels.data))

        if i % 100 == 99:
            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 100}')
            running_loss = 0.0

    train_accuracy = train_accuracy / train_count
    train_loss = train_loss / train_count

    print(f'Epoch: {epoch + 1}, Training Accuracy: {train_accuracy}, Training Loss: {train_loss}')

    # Validation
    model.eval()
    val_accuracy = 0.0
    for i, data in enumerate(val_loader):

        images, labels = data
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        val_accuracy += int(torch.sum(prediction == labels.data))

    val_accuracy = val_accuracy / val_size
    print(f'Epoch: {epoch + 1}, Validation Accuracy: {val_accuracy}')

    if val_accuracy > 0.9:
        torch.save(model.state_dict(), 'best_model.pth')
        break

    torch.save(model.state_dict(), 'best_model.pth')


14034 3000


RuntimeError: Input type (MPSFloatType) and weight type (torch.FloatTensor) should be the same