In [12]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torch.optim as optim

In [13]:
device = torch.device("cuda")
device

device(type='cuda')

In [14]:
train_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

dataset = torchvision.datasets.ImageFolder(root ="data/train/", transform=train_transform)

total_size = len(trainset)
train_size = int(0.95 * total_size)
val_size = total_size - train_size

# Split the dataset
train_dataset, val_dataset = torch.utils.data.random_split(trainset, [train_size, val_size])

batch_size = 32

# DataLoader for the training set
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# DataLoader for the validation set
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [15]:
len(dataset.classes)

50

In [73]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ImageClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        ## Convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=100, kernel_size=17, stride=1, padding=8)
        self.dropout1 = nn.Dropout2d(0.3)  # Dropout for conv layers, adjust p as needed
        self.conv1_2 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=5, stride=1, padding=2)
        self.dropout1_2 = nn.Dropout2d(0.3)  # Adjust the dropout rate as necessary
        self.conv1_3 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=3, stride=1, padding=1)
        self.dropout1_3 = nn.Dropout2d(0.3)
        ## Max pooling layer
        self.pool1 = nn.MaxPool2d(kernel_size=5, stride=2)
        self.conv2 = nn.Conv2d(in_channels=100, out_channels=90, kernel_size=5, stride=1, padding=2)
        self.dropout2 = nn.Dropout2d(0.3)  # Again, adjust as necessary
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=90, out_channels=80, kernel_size=5, stride=1, padding=2)
        self.dropout3 = nn.Dropout2d(0.3)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
        ## Fully connected layers
        self.fc1 = nn.Linear(2880, 120)
        self.dropout4 = nn.Dropout(0.5)  # Higher dropout rate for fully connected layers
        self.fc2 = nn.Linear(120, 84)
        self.dropout5 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(84, 50)

    def forward(self, x):
        x = F.gelu(self.conv1(x))
        x = self.dropout1(x)
        x = F.gelu(self.conv1_2(x))
        x = self.dropout1_2(x)
        x = self.pool1(F.gelu(self.conv1_3(x)))
        x = self.dropout1_3(x)
        x = self.pool2(F.gelu(self.conv2(x)))
        x = self.dropout2(x)
        x = self.pool3(F.gelu(self.conv3(x)))
        x = self.dropout3(x)
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.gelu(self.fc1(x))
        x = self.dropout4(x)
        x = F.gelu(self.fc2(x))
        x = self.dropout5(x)
        x = self.fc3(x)
        return x



net = ImageClasifier().to(device)
net

ImageClasifier(
  (conv1): Conv2d(3, 100, kernel_size=(17, 17), stride=(1, 1), padding=(8, 8))
  (conv1_2): Conv2d(100, 100, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv1_3): Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=5, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(100, 90, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(90, 80, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2880, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=50, bias=True)
)

In [74]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(net.parameters(), lr=0.0001, weight_decay=0.00001)

In [75]:
epochs = 1000

for epoch in range(epochs):  # loop over the dataset multiple times
    net.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if(i > 256):
            break

    correct = 0
    total = 0
    val_loss = 0
    net.eval()
    with torch.no_grad():
        for j, data in enumerate(valloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # calculate outputs by running images through the network
            outputs = net(inputs)
            # the class with the highest energy is what we choose as prediction
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'[{epoch+1}/{epochs}] train loss: {running_loss / i} val loss: {val_loss / j} accuracy: {100 * correct / total}%')
    running_loss = 0.0

print('Finished Training')

[1/1000] train loss: 3.7405829197701777 val loss: 3.5499185071374377 accuracy: 9.429675073846854%
[2/1000] train loss: 3.466511676283662 val loss: 3.4355580441273044 accuracy: 12.610770279481937%
[3/1000] train loss: 3.284396978667738 val loss: 3.2456580078514823 accuracy: 15.700977050670302%
[4/1000] train loss: 3.2010662908220104 val loss: 3.2035493955124905 accuracy: 17.496023630992955%
[5/1000] train loss: 3.0962981964363663 val loss: 3.095564871808908 accuracy: 19.359236537150647%
[6/1000] train loss: 3.0435985804531827 val loss: 3.0310152099080327 accuracy: 21.1542831174733%
[7/1000] train loss: 2.959446026657342 val loss: 2.9660080088316088 accuracy: 22.3585548738923%
[8/1000] train loss: 2.893462430642273 val loss: 2.9292412726548465 accuracy: 22.7902749375142%
[9/1000] train loss: 2.8514799515097056 val loss: 2.8906882261707834 accuracy: 24.062713019768236%
[10/1000] train loss: 2.8125644708885758 val loss: 2.8350760988945507 accuracy: 25.403317427857306%
[11/1000] train loss:

In [76]:
correct = 0
total = 0
with torch.no_grad():
    for j, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(inputs)
        # the class with the highest energy is what we choose as prediction
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {(100 * correct / total)}')

Accuracy of the network on the 10000 test images: 97.52421959095803
