In [None]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

# set the random seed for reproduction
SEED=190
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

#checking if GPU is available or not
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
import pandas as pd
df = pd.read_csv('predict_df.csv')

In [None]:
from torch.utils.data.dataset import Dataset

class SBCDataset(Dataset):
    def __init__(self):

    def __len__(self):
    
    def __getitem__(self):

In [None]:
BATCH_SIZE = 64
TEST_BATCH_SIZE = 1024

# create the dataset
train_ds = SBCDataset(train_df)
valid_ds = SBCDataset(valid_df)

# build the dataloader
train_loader = torch.utils.data.DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True
)
valid_loader = torch.utils.data.DataLoader(
    valid_ds, batch_size=TEST_BATCH_SIZE
)

In [None]:
class Trainer(object):
    """
        A learning pipeline to train and validate the model.
    """
    def __init__(self, model, criterion, optimizer, max_epoch):
        """
            model: nn model
            criterion: loss function
            optimizer: optimizer
            max_epoch: maximum training epoch
        """
        self.model = model.to(device)
        self.criterion = criterion.to(device)
        self.optimizer = optimizer
        self.max_epoch = max_epoch
        
    def run(self,train_loader, valid_loader):
        """
            Main entry
                train_loader: training dataset, each item is (img, label)
                valid_loader: validation dataset, each item is (img, label)
        """
        # calculate the inital loss and accu on validation set
        valid_best_loss = self.validate(-1, valid_loader, best_loss=None)
        for epoch in range(self.max_epoch):
            self.train(epoch, train_loader)
            # save the checkpoint with the lowest validation loss
            valid_best_loss = self.validate(epoch, valid_loader, valid_best_loss)
        
    def train(self, epoch, loader):
        """
            Single training loop
                epoch: int, current epoch index
                loader: training loader
        """
        # switch to the train mode, calculate the gradient
        self.model.train()
        running_loss, total, correct = 0.0, 0, 0
        with tqdm(enumerate(loader, 0), mininterval=10) as tepoch:
            for i, data in tepoch:
                # get the inputs; data is a list of [inputs, labels]
                # inputs: tensor, (batch_size, image_size, image_size)
                # labels: tensor, (batch_size, 1)
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs,labels)
                loss.backward()
                self.optimizer.step()
                
                # calculate the metric
                match, number = self.cal_metric(outputs.data, labels)
                
                # gather statistics
                total += number
                correct += match
                running_loss += loss.item()
                tepoch.set_postfix(loss=loss.item(), accuracy=100. * correct / total)

        running_loss /= len(loader)

        print('Training | Epoch: {}| Loss: {:.3f} | Accuracy on train images: {:.1f}'.format \
              (epoch+1, running_loss, 100 * correct / total))
        
    def validate(self, epoch, loader, best_loss=None):
        """
            Single evaluation loop
                epoch: int, current epoch index
                loader: validation loader
                best_loss: float, current best loss
        """
        # switch to the evaluation mode, do not need to calculate the gradient
        self.model.eval()
        running_loss, total, correct = 0.0, 0, 0
        for i, data in tqdm(enumerate(loader)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = self.model(inputs)
            loss = self.criterion(outputs,labels)

            match, number = self.cal_metric(outputs.data, labels)
            
            total += number
            correct += match
            running_loss += loss.item()

        running_loss /= len(loader)

        if best_loss is None or running_loss < best_loss:
            # if a better loss appears, save the checkpoint
            save_file = 'best_epoch{}_loss{:.2f}_accu{:.2f}.pt'.format(epoch+1, running_loss, 100 * correct / total)
            print('Save to file: ', save_file)
            torch.save(self.model, save_file)
            
            # overwrite the best_checkpoint.pt file
            torch.save(self.model, 'best_checkpoint.pt')
            
            best_loss = running_loss

        print('Validation | Epoch: {}| Loss: {:.3f} | Accuracy on val images: {:.1f}'.format \
              (epoch+1, running_loss,100 * correct / total))

        return best_loss

                
    def cal_metric(self, outputs, labels):
        """
            Calculate the accuracy
                outputs: tensor (batch_size, number_class), the output of the model
                labels: tensor (batch_size, 1), the ground truth
        """
        # compare predictions to ground truth
        _, predicted = torch.max(outputs, 1)
        number = labels.size(0)
        correct = (predicted == labels).sum().item()
        return correct, number

In [None]:
class MLP(nn.Module):
    """
        Multilayer perceptron network
    """
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(nn.Linear(1024, 2))
        
    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.layers(x)
        # F.log_softmax returns the log probabilities of each class
        # of shape (num_samples, num_classes)
        return F.log_softmax(x, dim=1)

In [None]:
import torch.optim as optim

NUM_EPOCH = 20
LEARNING_RATE = 0.001
model = MLP()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
trainer = Trainer(model, criterion, optimizer, max_epoch=NUM_EPOCH)