In [82]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn 

In [83]:
import torch.utils
import torch.utils.data


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4552, 0.4562, 0.4567],
        std=[0.1928, 0.1929, 0.1895]
    )
])

train_dataset = datasets.ImageFolder('./data/ex7-carTypes/carTypes/train', transform=transform)
test_dataset = datasets.ImageFolder('data/ex7-carTypes/carTypes/val', transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [84]:

for data, target in train_loader:
    print(f'image size: {data.size()}')
    print(f"target: {target}")
    break

image size: torch.Size([32, 3, 224, 224])
target: tensor([3, 0, 0, 3, 1, 4, 0, 5, 5, 4, 5, 4, 0, 1, 0, 4, 3, 1, 1, 5, 2, 5, 0, 5,
        3, 2, 3, 3, 2, 5, 0, 0])


In [85]:
mean = 0.0
std = 0.0
n_samples = 0
for data, _ in train_loader:
    n_samples += data.shape[0]
    data = data.view(data.shape[0], data.shape[1], -1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
mean = mean / n_samples
std = std / n_samples
print(f"Mean: {mean}")
print(f"Std: {std}")

Mean: tensor([-1.5595e-04,  3.1006e-05,  1.9247e-04])
Std: tensor([1.0000, 1.0001, 1.0001])


In [86]:
class Residual(nn.Module):
    def __init__(self, input_channels, output_channels, use_conv3=False ,stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=3, stride=stride, padding=1)
        self.conv2 = nn.Conv2d(in_channels=output_channels, out_channels=output_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(output_channels)
        self.bn2 = nn.BatchNorm2d(output_channels)
        if use_conv3:
            self.conv3 = nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=1, stride=2)
        else:
            self.conv3 = None
    
    def forward(self, X):
        Y = self.bn1(self.conv1(X))
        Y = self.relu(Y)
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(X + Y)

In [87]:
class ResNet(nn.Module):
    def __init__(self, block, layer, num_classes=6):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)

        self.layer1 = nn.Sequential(*self._make_layer(block, 64, layer[0], first_block=True))
        self.layer2 = nn.Sequential(*self._make_layer(block, 128, layer[1]))
        self.layer3 = nn.Sequential(*self._make_layer(block, 256, layer[2]))
        self.layer4 = nn.Sequential(*self._make_layer(block, 512, layer[3]))

        self.adpavgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, block, out_channels, blocks, stride=1, first_block=False):
        layers = []
        for i in range(blocks):
            if i == 0 and not first_block:
                layers.append(block(self.in_channels, out_channels, use_conv3=True, stride=2))
                self.in_channels = out_channels
            else:
                layers.append(block(self.in_channels, out_channels, stride=1))
        return layers
    
    def forward(self, x):
        x = self.bn1(self.conv1(x))
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.adpavgpool(x)
        x = torch.flatten(x, 1)
        out = self.fc(x)
        return out

In [88]:
model = ResNet(Residual, [2,2,2,2])
X = torch.rand((1, 3, 224, 224))
model(X)

tensor([[-1.0141,  0.1729, -0.3993,  0.7125, -0.4176, -0.6316]],
       grad_fn=<AddmmBackward0>)

In [89]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet(Residual, [3,4,23,3])
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),  lr=0.0001)
best_val_loss = float('inf')  
patience = 10                  
counter = 0  
for epoch in range(200):
    model.train()
    train_loss = 0.0
    for X, y in train_loader:
        X = X.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    

    model.eval()
    with torch.no_grad():
        acc = 0.0
        test_loss = 0.0
        for X, y in test_loader:
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            loss = criterion(pred, y)
            test_loss += loss.item()
            acc += (pred.argmax(dim=1) == y).sum().item()
        
    print(f"Epoch: {epoch + 1}, train_loss: {train_loss / len(train_loader):.4f}, test_loss: {test_loss / len(test_loader):.4f}, Accuracy: {acc / len(test_loader.dataset)}")
    if test_loss < best_val_loss:
        best_val_loss = test_loss
        counter = 0
        torch.save(model.state_dict(), 'ResNet101_car.pth')
    else:
        counter += 1
        if counter >= patience:
            print("EarlyStopping")
            break


Epoch: 1, train_loss: 1.1141, test_loss: 1.1695, Accuracy: 0.65
Epoch: 2, train_loss: 0.5152, test_loss: 0.9735, Accuracy: 0.6666666666666666
Epoch: 3, train_loss: 0.3229, test_loss: 0.6224, Accuracy: 0.8
Epoch: 4, train_loss: 0.2586, test_loss: 0.2939, Accuracy: 0.9166666666666666
Epoch: 5, train_loss: 0.1995, test_loss: 0.4335, Accuracy: 0.8666666666666667
Epoch: 6, train_loss: 0.1994, test_loss: 0.2233, Accuracy: 0.9166666666666666
Epoch: 7, train_loss: 0.0988, test_loss: 0.5231, Accuracy: 0.8833333333333333
Epoch: 8, train_loss: 0.1583, test_loss: 0.3243, Accuracy: 0.9166666666666666
Epoch: 9, train_loss: 0.1197, test_loss: 0.1427, Accuracy: 0.9583333333333334
Epoch: 10, train_loss: 0.1825, test_loss: 0.4482, Accuracy: 0.875
Epoch: 11, train_loss: 0.1158, test_loss: 0.1914, Accuracy: 0.9
Epoch: 12, train_loss: 0.1189, test_loss: 0.1641, Accuracy: 0.9583333333333334
Epoch: 13, train_loss: 0.0195, test_loss: 0.1164, Accuracy: 0.9833333333333333
Epoch: 14, train_loss: 0.0116, test_los

In [91]:
model.load_state_dict(torch.load('ResNet101_car.pth'))
model.eval()
with torch.no_grad():
    acc = 0.0
    test_loss = 0.0
    for X, y in test_loader:
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = criterion(pred, y)
        test_loss += loss.item()
        acc += (pred.argmax(dim=1) == y).sum().item()
    print(f"test_loss: {test_loss / len(test_loader):.4f}, Accuracy: {acc / len(test_loader.dataset)}")


test_loss: 0.0174, Accuracy: 1.0
