In [2]:
# Load the important files
from prepare_data import *
from data import *

In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F

In [4]:
training_address="food-101/train.csv"
test_address="food-101/test.csv"

In [5]:
train,val,test=get_dataloaders(training_address, test_address, args=None)

In [6]:
class baseline(nn.Module):
    def __init__(self):
        super(baseline, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3)
        self.BN1=nn.BatchNorm2d(64)
        self.conv2= nn.Conv2d(64, 128, kernel_size=3)
        self.BN2=nn.BatchNorm2d(128)
        self.conv3= nn.Conv2d(128, 128, kernel_size=3)
        self.BN3=nn.BatchNorm2d(128)
        self.conv4= nn.Conv2d(128, 128, kernel_size=3,stride=2)
        self.BN4=nn.BatchNorm2d(128)
        self.avg_pool=nn.AdaptiveAvgPool2d(1)
        # Compute the input units using the formula given in discussion session 128*63*63
        self.fc1=nn.Linear(128, 128)
        self.drop_out=nn.Dropout()
        self.fc2=nn.Linear(128,20)
    def forward(self, x):
        # layer1
        x=self.conv1(x)
        x=self.BN1(x)
        x=F.relu(x)
        # Layer2
        x=self.conv2(x)
        x=self.BN2(x)
        x=F.relu(x)
        # Layer3
        x=self.conv3(x)
        x=self.BN3(x)
        x=F.relu(x)
        x=F.max_pool2d(x,3)
        # Layer4
        x=self.conv4(x)
        x=self.BN4(x)
        x=F.relu(x)
        x=self.avg_pool(x)
        # fully connected layer 1
        x=x.view(-1,128) # faltten the input
        x=self.fc1(x)
        x=F.relu(self.drop_out(x))
        # fully connected layer 2
        x=self.fc2(x)
        return x

In [7]:
import torch.optim as optim
import torch.nn.functional as F
def prepare_model(device, model):
    # load model, criterion, optimizer, and learning rate scheduler
    
    # Create an empty model and move it to the designated computing device
    model=model.to(device)


    return model

def train_single_model(model, criterion, optimizer, scheduler, device, dataloaders,epoch):
    
    # prepare the model
    model=prepare_model(device,model)
    
    # store the results of our training
    training_losses=[]
    training_accs=[]
    val_losses=[]
    val_accs=[]

    for i in range(epoch):
        # train the model in each epoch
        model.train()
        for batch_idx, (data, target) in enumerate(dataloaders[0]):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad() # clear the old gradients 
            output = model(data) # compute outputs of the fc layer 
            loss = criterion(output, target) 
            loss.backward() # compute gradient for every variables with requires_grad=True
            optimizer.step()
        if scheduler is not None:
            scheduler.step()
        
        print("finish a epoch of training")
            
        # test the model with the training data:
        model.eval() # sets model in evaluation (inference) mode. Q3. Why? 
        training_loss = 0 
        correct = 0
        with torch.no_grad(): # stop storing gradients for the variables
            for data, target in dataloaders[0]:
                data, target = data.to(device), target.to(device)
                output = model(data)
                training_loss+=criterion(output, target)
                pred = output.argmax(dim=1, keepdim=True) # get the index of maximum fc output. Q4. Why?
                correct += pred.eq(target.view_as(pred)).sum().item()
        training_loss /= len(dataloaders[0].dataset)
        training_losses.append(training_loss)
        training_acc=correct / len(dataloaders[0].dataset)
        training_accs.append(training_acc)
        
        
        # test the model with validation data in each epoch
        model.eval() # sets model in evaluation (inference) mode. Q3. Why? 
        val_loss = 0 
        correct = 0
        with torch.no_grad(): # stop storing gradients for the variables
            for data, target in dataloaders[1]:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss+=criterion(output, target)
                pred = output.argmax(dim=1, keepdim=True) # get the index of maximum fc output. Q4. Why?
                correct += pred.eq(target.view_as(pred)).sum().item()
        val_loss /= len(dataloaders[1].dataset)
        val_losses.append(val_loss)
        val_acc=correct / len(dataloaders[1].dataset)
        val_accs.append(val_acc)
        print("epoch_num: "+str(i))
        print("train_loss: "+str(training_loss))
        print("val_loss: "+str(val_loss))

    torch.save(model.state_dict(), "food-101/state_dict_model.pt")
    return model,training_losses,training_accs,val_losses,val_accs

In [8]:
model=baseline()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.001)
def Xavier_Initialization(m):
    if isinstance(m, nn.Conv2d):
        nn.init.xavier_uniform_(m.weight.data)
        nn.init.normal_(m.bias.data, mean=0.0, std=1.0)
model.apply(Xavier_Initialization)

baseline(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (BN1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (BN2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (BN3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
  (BN4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avg_pool): AdaptiveAvgPool2d(output_size=1)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=20, bias=True)
)

In [9]:
a=[train,val,test]

In [10]:
train_single_model(model, criterion, optimizer, scheduler=None, device="cuda:0", dataloaders=a,epoch=25)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


finish a epoch of training
epoch_num: 0
train_loss: tensor(0.0857, device='cuda:0')
val_loss: tensor(0.0514, device='cuda:0')
finish a epoch of training
epoch_num: 1
train_loss: tensor(0.0837, device='cuda:0')
val_loss: tensor(0.0503, device='cuda:0')
finish a epoch of training
epoch_num: 2
train_loss: tensor(0.0804, device='cuda:0')
val_loss: tensor(0.0482, device='cuda:0')
finish a epoch of training
epoch_num: 3
train_loss: tensor(0.0793, device='cuda:0')
val_loss: tensor(0.0478, device='cuda:0')
finish a epoch of training
epoch_num: 4
train_loss: tensor(0.0776, device='cuda:0')
val_loss: tensor(0.0470, device='cuda:0')
finish a epoch of training
epoch_num: 5
train_loss: tensor(0.0774, device='cuda:0')
val_loss: tensor(0.0467, device='cuda:0')
finish a epoch of training
epoch_num: 6
train_loss: tensor(0.0749, device='cuda:0')
val_loss: tensor(0.0452, device='cuda:0')
finish a epoch of training
epoch_num: 7
train_loss: tensor(0.0741, device='cuda:0')
val_loss: tensor(0.0450, device='c

(baseline(
   (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
   (BN1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
   (BN2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
   (BN3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
   (BN4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (avg_pool): AdaptiveAvgPool2d(output_size=1)
   (fc1): Linear(in_features=128, out_features=128, bias=True)
   (drop_out): Dropout(p=0.5, inplace=False)
   (fc2): Linear(in_features=128, out_features=20, bias=True)
 ),
 [tensor(0.0857, device='cuda:0'),
  tensor(0.0837, device='cuda:0'),
  tensor(0.0804, device='cuda:0'),
  tensor(0.0793, device='cuda:0'),
  tensor(0.0