In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [2]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels=in_channels, out_channels=self.expansion*out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*out_channels)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=102):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        # Calculate the input size for the fully connected layer
        fc_input_size = self._get_fc_input_size()

        self.linear = nn.Linear(fc_input_size, num_classes)

    def _get_fc_input_size(self):
        dummy_input = torch.zeros(1, 3, 224, 224)
        output = self.forward_features(dummy_input)
        return output.view(-1).shape[0]

    def forward_features(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        return out

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.forward_features(x)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

    
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [3]:
# HYPERPARAMETERS
learning_rate = 0.003 # 0.0001 = 41.5% accuracy
batch_size = 32
num_epochs = 6
momentum = 0.9
weight_decay = 1e-3
show_accuracy_every = 3


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [4]:
import torchvision
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform2 = transforms.Compose([
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

])

transform3 = transforms.Compose([
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Load dataset

train_set = torchvision.datasets.Flowers102(root="data", download=True, transform=transform)
train_set2 = torchvision.datasets.Flowers102(root="data", download=True, transform=transform2)
train_set3 = torchvision.datasets.Flowers102(root="data", download=True, transform=transform3)
test_set = torchvision.datasets.Flowers102(root="data", download=True, split="test", transform=test_transform)
val_set = torchvision.datasets.Flowers102(root="data", download=True, split="val", transform=test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
train_loader2 = DataLoader(train_set2, batch_size=batch_size, shuffle=True)
train_loader3 = DataLoader(train_set3, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
print(len(train_set), len(test_set), len(val_set))


1020 6149 1020


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
model = ResNet18().to(device)


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


In [8]:
import requests
def post_discord(epoch, loss, accuracy, val_loss, val_accuracy):
    data = {
        "username": "Flower Classifier"
    }
    data["embeds"] = [
        {
            "title": "Epoch {} Results".format(epoch),
            "color": 0x00ff00,
            "fields": [
                {
                    "name": "Training Loss",
                    "value": f"{loss}",
                    "inline": False
                },
                {
                    "name": "Train Accuracy",
                    "value": f"{accuracy}",
                    "inline": False
                },
                {
                    "name": "Validation Loss",
                    "value": f"{val_loss}",
                    "inline": False
                },
                {
                    "name": "Validation Accuracy",
                    "value": f"{val_accuracy}",
                    "inline": False
                }
            ]
        }
    ]
    r = requests.post("https://discord.com/api/webhooks/1052587856343867392/SnR2U4HIdeF6ShECr0k1Mit6yrl3HgjtCMk_LykGD8eQ1qsZViY8HLeYsoBUXOHYfbgP", json=data)
    print(r.status_code)
post_discord(0, 0, 0, 0, 0)

204


In [9]:

val_size = 1020
train_size = 1020
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        train_correct += predicted.eq(labels).sum().item()

    scheduler.step()
    if epoch % show_accuracy_every == 0 or epoch == 0 or epoch == num_epochs - 1:
    # Evaluate the model on the validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        train_accuracy = 100 * train_correct / train_size
        val_accuracy = 100 * val_correct / val_size

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/train_size:.4f}, Train Accuracy: {train_accuracy:.2f}%, Val Loss: {val_loss/val_size:.4f}, Val Accuracy: {val_accuracy:.2f}%')
        post_discord(epoch+1, train_loss/train_size, train_accuracy, val_loss/val_size, val_accuracy)
    else:
        print(f'Epoch [{epoch+1}/{num_epochs}]')

Epoch [1/100], Train Loss: 0.5099, Train Accuracy: 1.76%, Val Loss: 4.4585, Val Accuracy: 1.47%
204
Epoch [2/100]
Epoch [3/100]
Epoch [4/100], Train Loss: 0.1401, Train Accuracy: 4.51%, Val Loss: 0.1383, Val Accuracy: 2.84%
204
Epoch [5/100]


KeyboardInterrupt: 