In [None]:
import os
import sys
import gc
import pandas as pd
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import cv2
from torchinfo import summary
import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

DATA_DIR = 'combinedDataset'
BATCH_SIZE = 16
SEED = 1337

In [1]:
def get_file_paths(dir):

    path_list = os.listdir(dir)
    files_list = list()
    for entry in path_list:
        path = os.path.join(dir, entry)
        if os.path.isdir(path):
            files_list = files_list + get_file_paths(path)
        else:
            files_list.append(path)
    return sorted(files_list)

def get_class_name(path):
 
    name = path.split('\\')[1]
    name = name.upper()
    if name[-1] == 'S':
        name = name[:-1]
    return name

train_df = pd.DataFrame(get_file_paths(DATA_DIR), columns=['path'])
train_df['class'] = train_df['path'].apply(get_class_name)
train_df.head()

NameError: name 'pd' is not defined

In [None]:
classes = sorted(train_df['class'].unique())
idx_to_class = {i:j for i, j in enumerate(classes)}
class_to_idx = {value:key for key,value in idx_to_class.items()}

train_df['class'] = train_df['class'].apply(lambda x: class_to_idx[x])

train_df, test_df = train_test_split(train_df, test_size=0.2, random_state=SEED)
test_df, val_df = train_test_split(test_df, test_size=0.5, random_state=SEED)

class Dataset_Food(Dataset):

    def __init__(self, df, transform=None):
        self.df = df.reset_index()
        self.transform = transform
        
    def __len__(self):
       
        return self.df.shape[0]

    def __getitem__(self, i):
      
        image_filepath = self.df.loc[i, 'path']
        
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = self.df.loc[i, 'class']

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        return image, label, image_filepath

In [None]:
transforms = A.Compose(
    [
        A.RandomResizedCrop(height=224, width=224, scale=(0.8, 1.2), ratio=(0.8, 1.2), p=0.8),
        A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0, rotate_limit=20, p=0.8, border_mode=0),
        A.HorizontalFlip(p=0.5),
        A.MedianBlur(blur_limit=7, p=0.5),
        A.LongestMaxSize(224),
        A.PadIfNeeded(224, 224),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ]
)

In [None]:
transforms_test = A.Compose(
        [
          A.LongestMaxSize(224),
          A.PadIfNeeded(224, 224),
          A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
          ToTensorV2()
        ]
)

In [2]:
train_dataset = Dataset_Food(train_df, transforms)
val_dataset = Dataset_Food(val_df, transforms_test)
test_dataset = Dataset_Food(test_df, transforms_test)

plot_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)

images = []

for i in range(9):
    image = next(iter(plot_loader))[0][0]
    image = image.permute(1, 2, 0)
    image = image * torch.tensor([0.229, 0.224, 0.225])
    image = image + torch.tensor([0.485, 0.456, 0.406])
    images.append(image)

fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(8, 8))
for i in range(3):
    for j in range(3):
        ax[i][j].imshow(images[3 * i + j])

NameError: name 'Dataset_Food' is not defined

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
EPOCHS = 100
LEARNING_RATE = 0.0001
NUM_CLASS = 55
TRY = "Try6"
FOLDER = 'C:\\Users\Somya\\Downloads\\food\\food\\' + TRY

In [3]:

from torchvision.models.densenet import densenet121

class Model(nn.Module):
    '''
    Neural network model based on DenseNet

    Args:
        in_channels (int): Input channels
        num_classes (int): Output channels

    '''
    def __init__(self, in_channels=3, num_classes=NUM_CLASS):
        super().__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.densenet = torch.hub.load('pytorch/vision:v0.10.0', 'densenet201', weights='IMAGENET1K_V1')
        #self.densenet = densenet121()
        #self.densenet = DenseNet.from_pretrained('densenet121', num_classes=num_classes)
        self.densenet.classifier = nn.Linear(in_features=1920, out_features=num_classes)

    def forward(self, x):
        x = self.densenet(x)
        return x

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'nn' is not defined

In [None]:
model = Model().to(device)

summary(model, (BATCH_SIZE, 3, 224, 224))

In [None]:
class RunningAverage():
    '''
    Class to save and update the running average 

    '''    
    def __init__(self):
        self.count = 0
        self.total = 0
    
    def update(self, n):
        '''
        Updates running average with new value

        Args:
            n : value to add to the running average

        Returns:
            files_list (list): List of paths for each file
        '''
        self.total += n
        self.count += 1
    
    def __call__(self):
        '''
        Returns current running average

        Returns:
            (float): Running average
        '''
        return self.total/(self.count + 1e-15)

In [None]:
def train(model, optimizer, criterion, train_loader, val_loader, epochs, device, lr_scheduler, scaler):#, state, filename):
    '''
    Trains and evaluates the model over defined number of epochs
    
    Args:
        model: Defined model
        optimizer: Constructed optimizer
        criterion: Loss function
        train_loader: Training dataset iterable
        val_loader: Validation dataset iterable
        epochs: Number of epochs
        device: Device for tensor storage
        lr_scheduler: Learning rate scheduler
        state: Parameters for model saving
        filename: Path for saved model
    '''
    val_losses = []
    val_accu = []
    train_loss = []
    train_accu = []

    for e in range(epochs):
        acc_avg = RunningAverage()
        loss_avg = RunningAverage()
        val_acc_avg = RunningAverage()
        val_loss_avg = RunningAverage()
        best_val_acc = 0

        model.train()
        with tqdm(total=len(train_loader), leave=False, file=sys.stdout) as t:
            t.set_description(f'Epoch {e + 1}, LR {lr_scheduler.get_last_lr()[0]:.5f}')
            for train_batch, labels_batch, file_path in train_loader:
                train_batch, labels_batch = train_batch.to(device), labels_batch.to(device)

                with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                    output_batch = model(train_batch)
                    loss = criterion(output_batch, labels_batch)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() # set_to_none=True here can modestly improve performance
                # optimizer.zero_grad()
                # loss.backward()
                # optimizer.step()
                
                predicted = torch.argmax(output_batch, 1)
                accuracy_batch = torch.sum(predicted == labels_batch) / labels_batch.shape[0]
                acc_avg.update(accuracy_batch)
                loss_avg.update(loss.item())
                
                t.set_postfix({'stats': f'train_loss: {loss_avg():.4f}, train_acc: {acc_avg():.4f}'})
                t.update()

        epoch_train_stats = f'Epoch {e + 1}. LR {lr_scheduler.get_last_lr()[0]:.5f}, train_loss: {loss_avg():.4f}, train_acc: {acc_avg():.4f},'
        train_loss.append(loss_avg())
        train_accu.append(acc_avg())

        lr_scheduler.step()
                
        model.eval()
        with torch.no_grad(): 
            for val_batch, val_labels_batch, file_path in val_loader:
                val_batch, val_labels_batch = val_batch.to(device), val_labels_batch.to(device)

                val_output_batch = model(val_batch)
                val_loss = criterion(val_output_batch, val_labels_batch)

                val_predicted = torch.argmax(val_output_batch, 1) 
                val_accuracy_batch = torch.sum(val_predicted == val_labels_batch) / val_labels_batch.shape[0]
                val_acc_avg.update(val_accuracy_batch)
                val_loss_avg.update(loss.item())

        print(f'{epoch_train_stats} val_loss: {val_loss_avg():.4f}, val_acc: {val_acc_avg():.4f}', sep='')
        val_losses.append(val_loss_avg())
        val_accu.append(val_acc_avg())
        # if val_acc_avg() > best_val_acc:
        #     torch.save(state, filename)
        #     best_val_acc = val_acc_avg()


    with open(FOLDER + '\\train_loss.txt', 'w') as fp:
        fp.write('\n'.join('%s ' % x for x in train_loss))

    with open(FOLDER + '\\val_loss.txt', 'w') as fp:
        fp.write('\n'.join('%s ' % x for x in val_losses))

    with open(FOLDER + '\\train_accu.txt', 'w') as fp:
        fp.write('\n'.join('%s ' % x for x in train_accu))

    with open(FOLDER + '\\val_accu.txt', 'w') as fp:
        fp.write('\n'.join('%s ' % x for x in val_accu))

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
#optimizer = torch.optim.Adamax(model.parameters(), lr=LEARNING_RATE)
#lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=0.00001, last_epoch=-1)
lr_scheduler = torch.optim.lr_scheduler.ConstantLR(optimizer, last_epoch=-1)
use_amp = True
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
#checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
#path = 'model.pth.tar'

In [None]:
def plot_scheduler(epochs, lr, lr_scheduler, **kwargs):
    optimizer = torch.optim.Adam([torch.tensor(1)], lr=lr)
    lr_scheduler = lr_scheduler(optimizer, **kwargs)

    x = [i + 1 for i in range(epochs)]
    y = []
    for i in range(epochs):
        optimizer.step()
        y.append(lr_scheduler.get_last_lr())
        lr_scheduler.step()

    fig, ax = plt.subplots(figsize=(12,5))
    ax.plot(x, y)
    ax.set_title('Learning rate schedule')
    ax.set_ylabel('Learning rate')
    ax.set_xlabel('Epochs')
    ax.xaxis.get_major_locator().set_params(integer=True)

    
#plot_scheduler(EPOCHS, LEARNING_RATE, torch.optim.lr_scheduler.CosineAnnealingLR, T_max=EPOCHS, eta_min=0.00001);
plot_scheduler(EPOCHS, LEARNING_RATE, torch.optim.lr_scheduler.ConstantLR);
#torch.optim.lr_scheduler.ConstantLR(optimizer, last_epoch=-1)

In [None]:
gc.collect()
torch.cuda.empty_cache()

train(model, optimizer, criterion, train_loader, val_loader, EPOCHS, device, lr_scheduler, scaler)

In [None]:
import csv

test_acc = RunningAverage()
model.eval()
with torch.inference_mode():
    with open (FOLDER + '\\predicted.txt', 'w') as file:
        for test_batch, test_labels_batch, file_path in test_loader:
                    test_batch, test_labels_batch = test_batch.to(device), test_labels_batch.to(device)
                    test_output_batch = model(test_batch)
                    test_predicted = torch.argmax(test_output_batch, 1)
                    test_accuracy_batch = torch.sum(test_predicted == test_labels_batch) / test_labels_batch.shape[0]
                    test_acc.update(test_accuracy_batch)

                    #_,prediction = test_output_batch.max(5)
                    _,prediction = test_output_batch.topk(5,dim=1)
                    predicted_classes = prediction[0].tolist()

                    #print("Top 5 predicted class numbers:", predicted_classes)


                    str = ""
                    for i in predicted_classes:

                        str += "," + idx_to_class[i]
                        #print(str + " ")

                    print(file_path)
                    file_data = file_path[0] + "," + get_class_name(file_path[0])
                    file.write(file_data)
                    file.write(str)
                    file.write("\n")
                        #print(prediction.item())
                    #str = idx_to_class[prediction.item()]
                    #print(str)
print(f'Test dataset accuracy: {test_acc():.4f}')