In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import torch
from torch import from_numpy, cuda
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.optim as optim
from torchvision import transforms, models
from tqdm import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR, SequentialLR, LinearLR
from PIL import Image

In [2]:
device = torch.device("cuda" if cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
!ls /kaggle/input/food-101/food-101/food-101/

images	license_agreement.txt  meta  README.txt


In [4]:
with open("/kaggle/input/food-101/food-101/food-101/meta/classes.txt", 'r') as f:
    classes = f.read().splitlines()

In [5]:
class Label_encoder:
    def __init__(self, labels):
        self.labels = {label: idx for idx, label in enumerate(labels)}
    def get_label(self, idx):
        return list(self.labels.keys())[idx]
    def get_idx(self, label):
        return self.labels.get(label)

encoder = Label_encoder(classes)

In [6]:
class Food101(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.path.iloc[idx]
        image = Image.open(img_name)
        if image.mode != 'RGB':
            image = image.convert('RGB')

        label = encoder.get_idx(self.dataframe.label.iloc[idx])

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
def prep_df(path: str) -> pd.DataFrame:
    with open(path, 'r') as f:
        names = f.read().splitlines()
    img_path = "/kaggle/input/food-101/food-101/food-101/images/"
    labels = [n.split('/')[0] for n in names]
    full_paths = [img_path + n + ".jpg" for n in names]
    df = pd.DataFrame({'label': labels, 'path': full_paths})
    df = shuffle(df).reset_index(drop=True)
    return df

In [8]:
train_imgs = prep_df('/kaggle/input/food-101/food-101/food-101/meta/train.txt')
test_imgs = prep_df('/kaggle/input/food-101/food-101/food-101/meta/test.txt')

In [9]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [10]:
trainset = Food101(train_imgs, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, pin_memory=True)

testset = Food101(test_imgs, transform=transform_test)
testloader = DataLoader(testset, batch_size=32, shuffle=False, pin_memory=True)

In [11]:
model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 330M/330M [00:01<00:00, 209MB/s] 


In [12]:
for param in model.parameters():
    param.requires_grad = False

num_classes = len(classes)

In [13]:
in_features = model.heads.head.in_features
model.heads.head = nn.Sequential(
        nn.Linear(in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(512, num_classes)
    )
trainable_params = model.heads.parameters()

In [14]:
model = model.to(device)
criterion = nn.CrossEntropyLoss(label_smoothing=0.0)
optimizer = optim.AdamW(trainable_params, lr=1e-4, weight_decay=1e-4)

warmup_epochs = 3
total_epochs = 30

scheduler = SequentialLR(
    optimizer,
    schedulers=[
        LinearLR(optimizer, start_factor=0.1, total_iters=warmup_epochs),
        CosineAnnealingLR(optimizer, T_max=max(1, total_epochs - warmup_epochs))
    ],
    milestones=[warmup_epochs]
)

In [15]:
def train(epoch):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    loop = tqdm(trainloader, desc=f"Training Epoch {epoch + 1}", leave=False)

    for inputs, targets in loop:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)
    
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_train += targets.size(0)
        correct_train += predicted.eq(targets).sum().item()

        loop.set_postfix(loss=loss.item())

    train_accuracy = 100. * correct_train / total_train
    avg_loss = running_loss / len(trainloader)
    print(f"Epoch {epoch+1}: Train Loss = {avg_loss:.4f}, Accuracy = {train_accuracy:.2f}%")


In [16]:
def test(epoch):
    model.eval()
    correct_test = 0
    total_test = 0

    with torch.no_grad():
        for inputs, targets in tqdm(testloader, desc=f"Testing Epoch {epoch + 1}", leave=False):
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_test += targets.size(0)
            correct_test += predicted.eq(targets).sum().item()

    test_accuracy = 100. * correct_test / total_test
    print(f"Test Accuracy after Epoch {epoch + 1}: {test_accuracy:.2f}%")
    return test_accuracy

In [None]:
best_acc = 0.0
print("Starting training...\n")

for epoch in range(total_epochs):
    train(epoch)
    acc = test(epoch)
    scheduler.step()

    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), "food101_vit_best.pt")
        print(f"‚úÖ Best model saved with accuracy: {best_acc:.2f}%")
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'rng_state': torch.get_rng_state()
        }
        torch.save(checkpoint, f'model_vit_epoch_{epoch}.pth')
print("\nüéâ Training completed.") 

Starting training...



                                                                                

Epoch 1: Train Loss = 4.2940, Accuracy = 13.31%


                                                                  

Test Accuracy after Epoch 1: 36.04%
‚úÖ Best model saved with accuracy: 36.04%


                                                                                

Epoch 2: Train Loss = 2.8397, Accuracy = 35.71%


                                                                  

Test Accuracy after Epoch 2: 56.27%
‚úÖ Best model saved with accuracy: 56.27%


                                                                                 

Epoch 3: Train Loss = 2.1389, Accuracy = 46.95%




Test Accuracy after Epoch 3: 63.42%
‚úÖ Best model saved with accuracy: 63.42%


                                                                                

Epoch 4: Train Loss = 1.8790, Accuracy = 52.55%


                                                                  

Test Accuracy after Epoch 4: 67.75%
‚úÖ Best model saved with accuracy: 67.75%


                                                                                 

Epoch 5: Train Loss = 1.7163, Accuracy = 56.50%


                                                                  

Test Accuracy after Epoch 5: 70.28%
‚úÖ Best model saved with accuracy: 70.28%


                                                                                 

Epoch 6: Train Loss = 1.6172, Accuracy = 58.69%


                                                                  

Test Accuracy after Epoch 6: 72.14%
‚úÖ Best model saved with accuracy: 72.14%


                                                                                 

Epoch 7: Train Loss = 1.5474, Accuracy = 60.23%


                                                                  

Test Accuracy after Epoch 7: 73.14%
‚úÖ Best model saved with accuracy: 73.14%


                                                                                 

Epoch 8: Train Loss = 1.5050, Accuracy = 61.37%


                                                                  

Test Accuracy after Epoch 8: 74.15%
‚úÖ Best model saved with accuracy: 74.15%


                                                                                 

Epoch 9: Train Loss = 1.4698, Accuracy = 62.12%


                                                                  

Test Accuracy after Epoch 9: 74.53%
‚úÖ Best model saved with accuracy: 74.53%


                                                                                  

Epoch 10: Train Loss = 1.4315, Accuracy = 63.18%


                                                                   

Test Accuracy after Epoch 10: 75.20%
‚úÖ Best model saved with accuracy: 75.20%


Training Epoch 11:  12%|‚ñà‚ñè        | 292/2368 [02:04<14:28,  2.39it/s, loss=1.48] 