In [None]:
import zipfile
 
with zipfile.ZipFile('/home/jupyter/datasphere/project/archive_food.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [12]:
%pip install torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [13]:
import torch
print(torch.cuda.is_available())

True


In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet101
from PIL import Image
import os
import pandas as pd

# Create the dataset class
class Food101(Dataset):
    def __init__(self, dataframe, base_dir, transform=None):
        self.dataframe = dataframe
        self.base_dir = base_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.base_dir, self.dataframe.path.iloc[idx])
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.label.iloc[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

images_dir = 'archive_food/images/'

dataframe = []
for root, _, files in os.walk(images_dir):
    for file in files:
        if file.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
            file_path = os.path.join(root, file)
            label = os.path.basename(root)
            formatted_path = os.path.join(label, file)
            dataframe.append({'path': formatted_path, 'label': label})

dataframe = pd.DataFrame(dataframe)

transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
food_dataset = Food101(dataframe, images_dir, transform=transform)

train_size = int(0.7 * len(food_dataset))
val_size = int(0.15 * len(food_dataset))
test_size = len(food_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(food_dataset, [train_size, val_size, test_size])


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model
class ResNetClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ResNetClassifier, self).__init__()
        self.resnet101 = resnet101(pretrained=True)
        for param in self.resnet101.parameters():
            param.requires_grad = False
        self.resnet101.fc = nn.Linear(self.resnet101.fc.in_features, num_classes)

    def forward(self, x):
        x = self.resnet101(x)
        return x

model = ResNetClassifier(num_classes=len(food_dataset.dataframe['label'].unique())).cuda()




In [28]:
print(dataframe)

                           path         label
0         foie_gras/1244707.jpg     foie_gras
1         foie_gras/1335868.jpg     foie_gras
2         foie_gras/1176321.jpg     foie_gras
3         foie_gras/1421924.jpg     foie_gras
4         foie_gras/1051563.jpg     foie_gras
...                         ...           ...
20195  caesar_salad/1397383.jpg  caesar_salad
20196   caesar_salad/126205.jpg  caesar_salad
20197  caesar_salad/1517451.jpg  caesar_salad
20198    caesar_salad/15081.jpg  caesar_salad
20199  caesar_salad/1351688.jpg  caesar_salad

[20200 rows x 2 columns]


In [29]:

import torch
import torch.nn.functional as F
import torch.optim as optim

class_names = sorted(set(target for batch in train_loader for target in batch[1]))
class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}

def test(model, device, test_loader, class_to_idx):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data = data.to(device)
            target = torch.tensor([class_to_idx[t] for t in target], dtype=torch.long).to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            print(f'Test set: Batch {batch_idx+1} of {len(test_loader)} - Loss: {test_loss/(batch_idx+1):.4f}, '
                  f'Accuracy: {correct}/{len(test_loader.dataset)} '
                  f'({100. * correct / len(test_loader.dataset):.0f}%)')

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)')

def validate(model, device, val_loader, class_to_idx):
    model.eval()
    total_correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            data = data.to(device)
            target = torch.tensor([class_to_idx[t] for t in target], dtype=torch.long).to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == target).sum().item()

    accuracy = total_correct / len(val_loader.dataset)
    print(f'Validation Accuracy: {accuracy:.4f}')

def train(model, device, train_loader, optimizer, epoch, class_to_idx):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        target = torch.tensor([class_to_idx[t] for t in target], dtype=torch.long).to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(25):
    train(model, device, train_loader, optimizer, epoch, class_to_idx)
    validate(model, device, val_loader, class_to_idx)
    torch.save(model.state_dict(), 'lastepoch_model.pth')

test(model, device, test_loader, class_to_idx)
torch.save(model.state_dict(), 'tested_model.pth')

Validation Accuracy: 0.4063
Validation Accuracy: 0.4495
Validation Accuracy: 0.4858
Validation Accuracy: 0.4792
Validation Accuracy: 0.4891
Validation Accuracy: 0.5020
Validation Accuracy: 0.4974
Validation Accuracy: 0.4987
Validation Accuracy: 0.4987
Validation Accuracy: 0.5026
Validation Accuracy: 0.5030
Validation Accuracy: 0.4993
Validation Accuracy: 0.5083
Validation Accuracy: 0.4964
Validation Accuracy: 0.4977
Validation Accuracy: 0.4914
Validation Accuracy: 0.4954
Validation Accuracy: 0.5083
Validation Accuracy: 0.5079
Validation Accuracy: 0.4954
Validation Accuracy: 0.5053
Validation Accuracy: 0.4983
Validation Accuracy: 0.4967
Validation Accuracy: 0.4944
Validation Accuracy: 0.4970
Test set: Batch 1 of 48 - Loss: 119.3447, Accuracy: 39/3030 (1%)
Test set: Batch 2 of 48 - Loss: 96.9700, Accuracy: 80/3030 (3%)
Test set: Batch 3 of 48 - Loss: 105.0445, Accuracy: 117/3030 (4%)
Test set: Batch 4 of 48 - Loss: 108.7577, Accuracy: 150/3030 (5%)
Test set: Batch 5 of 48 - Loss: 114.911