In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, models
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.utils.data as data
import torchvision.utils as utils
import os

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [2]:
#dataset path
data_path_train = "data/training"
data_path_test = "data/testing"

In [3]:
# data transform, you can add different transform methods and resize image to any size
img_size = 224
transform = transforms.Compose([
                       transforms.Resize((img_size,img_size)),
                       transforms.ToTensor()
                       ])


#build dataset
dataset = datasets.ImageFolder(root=data_path_train,transform=transform)

# spilt your data into train and val
TOTAL_SIZE = len(dataset)
ratio = 0.9
train_len = round(TOTAL_SIZE * ratio)
valid_len = round(TOTAL_SIZE * (1-ratio))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_len, valid_len])

#build dataloader
train_data_loader = data.DataLoader(train_dataset, batch_size=8, shuffle=True,  num_workers=4)
val_data_loader = data.DataLoader(val_dataset, batch_size=8, shuffle=True,  num_workers=4)

#check dataset
print(dataset)
print(dataset.class_to_idx)

Dataset ImageFolder
    Number of datapoints: 1646
    Root location: data/training
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )
{'Baked Potato': 0, 'Crispy Chicken': 1, 'Donut': 2, 'Fries': 3}




In [4]:
#train function
def train(model, criterion, optimizer):
    model.train()
    total_loss = 0.0
    total_correct = 0
    
    # Iterate over data
    for inputs, labels in train_data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # backward + optimize
        loss.backward()
        optimizer.step()

        # statistics
        total_loss += loss.item()
        total_correct += torch.sum(preds == labels.data)
        
    avg_loss = total_loss / len(train_data_loader)
    accuracy = total_correct.double() / len(train_dataset) * 100

    print('Training Accuracy: {:.4f}% Training Loss: {:.4f}'.format(accuracy, avg_loss))
    return 

#validation function
def valid(model, criterion):
    model.eval()
    total_loss = 0.0
    total_correct = 0
    
    # Iterate over data
    for inputs, labels in val_data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # forward
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # statistics
        total_loss += loss.item()
        total_correct += torch.sum(preds == labels.data)
        
    avg_loss = total_loss / len(val_data_loader)
    accuracy = total_correct.double() / len(val_dataset) * 100

    print('Validation Accuracy: {:.4f}% Validation Loss: {:.4f}'.format(accuracy, avg_loss))
    return accuracy

In [5]:
# using gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#build your model here
#model = models.resnet18(weights = models.ResNet18_Weights.DEFAULT)
#model = models.resnet18()
#model = models.resnet18(weights = models.ResNet18_Weights.IMAGENET1K_V1)
#model = models.resnet34(weights = models.ResNet34_Weights.DEFAULT)
#model = models.resnet34(weights = models.ResNet34_Weights.IMAGENET1K_V1)
#model = models.resnet50(weights = models.ResNet50_Weights.DEFAULT)
#model = models.resnet50(weights = models.ResNet50_Weights.IMAGENET1K_V2)
#model = models.resnet101(weights = models.ResNet101_Weights.DEFAULT)
#model = models.resnet101(weights = models.ResNet101_Weights.IMAGENET1K_V2)
#model = models.resnet152(weights = models.ResNet152_Weights.DEFAULT)
#model = models.resnet152(weights = models.ResNet152_Weights.IMAGENET1K_V2)

model = models.swin_v2_s(weights=models.Swin_V2_S_Weights.DEFAULT)
#model = models.swin_v2_t(weights = models.Swin_V2_T_Weights.DEFAULT)


In [6]:
model

SwinTransformer(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): Permute()
      (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (1): Sequential(
      (0): SwinTransformerBlockV2(
        (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (attn): ShiftedWindowAttentionV2(
          (qkv): Linear(in_features=96, out_features=288, bias=True)
          (proj): Linear(in_features=96, out_features=96, bias=True)
          (cpb_mlp): Sequential(
            (0): Linear(in_features=2, out_features=512, bias=True)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=512, out_features=3, bias=False)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (0): Linear(in_features=96, out_features=384, bias=True)
          (1): GELU(approximate='

In [7]:
model.features[0][0].weight
#model.conv1.weight

Parameter containing:
tensor([[[[ 2.1654e-02, -2.6789e-02, -2.3035e-02, -2.9833e-02],
          [ 1.3321e-02, -5.0797e-02,  4.4563e-02,  2.0402e-02],
          [-1.8994e-02, -3.0538e-02,  5.6745e-02,  3.3531e-02],
          [ 3.9754e-02, -2.9815e-02,  1.8103e-02,  5.3637e-03]],

         [[-5.6664e-03, -4.7441e-02, -4.2096e-02, -7.7939e-02],
          [-3.1016e-02, -6.0178e-02,  8.4583e-02,  9.1222e-03],
          [-6.3308e-02,  2.4530e-03,  1.2256e-01,  3.7304e-02],
          [ 2.6121e-02, -2.5962e-02,  3.2726e-02, -1.1806e-02]],

         [[ 2.8203e-02,  1.2786e-02,  6.1337e-03, -1.0342e-02],
          [ 1.6456e-03, -1.1992e-02,  4.6317e-02, -9.2886e-03],
          [-3.2019e-02, -9.2724e-05,  4.9178e-02, -7.9906e-04],
          [-1.1062e-03, -3.0253e-02, -1.1510e-04, -2.4908e-02]]],


        [[[-5.7383e-03, -4.3338e-02, -5.0308e-02, -2.7943e-02],
          [ 2.8659e-03, -2.1501e-02, -4.1131e-02, -7.7814e-03],
          [ 1.9901e-02, -3.3662e-02, -3.1174e-02,  7.5468e-03],
          

In [None]:
####################  implement your optimizer ###################################
## you can use any training methods if you want (ex:lr decay, weight decay.....)
learning_rate = 0.00001
optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas = (0.99,0.999), weight_decay = 0.00001)
criterion = nn.CrossEntropyLoss()
epochs = 100

# start training
model.to(device=device)
acc_best = 0.0

print('--------------start training--------------')
for epoch in range(1, epochs+1):
    
    print('epoch:', epoch)
    train(model, criterion, optimizer)
    accuracy = valid(model, criterion)
    writer.add_scalar('Learning_rate', learning_rate, epoch)
    writer.add_histogram('Weights', model.features[0][0].weight, epoch)
    writer.add_histogram('Gradients', model.features[0][0].weight.grad, epoch)
    '''for tag, value in model.named_parameters():
        writer.add_histogram('Gradients', value.grad, epoch)'''
    if accuracy >= acc_best:
        acc_best = accuracy
        print("model saved")
        torch.save(model, "model.pth")
    #learning_rate *= 0.98

--------------start training--------------
epoch: 1
Training Accuracy: 47.5354% Training Loss: 1.9278
Validation Accuracy: 85.4545% Validation Loss: 0.4836
model saved
epoch: 2
Training Accuracy: 92.2350% Training Loss: 0.2746
Validation Accuracy: 95.7576% Validation Loss: 0.1392
model saved
epoch: 3
Training Accuracy: 96.0162% Training Loss: 0.1340
Validation Accuracy: 95.7576% Validation Loss: 0.1188
model saved
epoch: 4
Training Accuracy: 97.2991% Training Loss: 0.0941
Validation Accuracy: 95.1515% Validation Loss: 0.0964
epoch: 5
Training Accuracy: 98.1769% Training Loss: 0.0563
Validation Accuracy: 95.7576% Validation Loss: 0.1093
model saved
epoch: 6
Training Accuracy: 98.9196% Training Loss: 0.0351
Validation Accuracy: 96.9697% Validation Loss: 0.1162
model saved
epoch: 7
Training Accuracy: 99.5949% Training Loss: 0.0176
Validation Accuracy: 96.3636% Validation Loss: 0.1309
epoch: 8
Training Accuracy: 99.6624% Training Loss: 0.0137
Validation Accuracy: 96.9697% Validation Loss: 

In [None]:
transform_test = transforms.Compose([transforms.Resize((img_size,img_size)),
                                    transforms.ToTensor()
                                    ])

dataset_test = datasets.ImageFolder(root=data_path_test, transform=transform_test)
dataloader_test  = data.DataLoader(dataset_test, batch_size=8, shuffle=False, num_workers=4) 

In [None]:
# load the model so that you don't need to train the model again
test_model = torch.load("model.pth").to(device)

In [None]:
def test(model):
    with torch.no_grad():
        model.eval()
        bs = dataloader_test.batch_size
        result = []
        for i, (data, target) in enumerate(dataloader_test):
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, preds = torch.max(output, 1, keepdim=True)
            
            arr = preds.data.cpu().numpy()
            for j in range(preds.size()[0]):
                file_name = dataset_test.samples[i*bs+j][0].split('/')[-1]
                result.append((file_name,preds[j].cpu().numpy()[0]))
    return result

In [None]:
result = test(test_model)

In [None]:
with open ('ID_result.csv','w') as f:
    f.write('ID,label\n')
    for data in result:
        f.write(data[0]+','+str(data[1])+'\n')