## Create Dataloader

In [1]:
import torch 
from torch.utils.data import Dataset, DataLoader
import cv2 
import numpy as np 

class FFTDataset(Dataset):
    def __init__(self, data, transforms = None):
        self.folder_names = ['carrying', 'threat', 'normal']
        self.data = data
        self.transforms = transforms

    def __getitem__(self, idx):
        #         im_path = self.path[idx]
        #         label = self.path[idx].split('/')[-3]
        #         label = 1 if label == 'real' else 0
        data = self.data[idx]
        label = self.folder_names.index(data.parent.name)
        img = cv2.imread(str(data))
        ft_sample = self.generate_FT(img)
        ft_sample = cv2.resize(ft_sample, (80, 80))
        ft_sample = torch.from_numpy(ft_sample).float()
        ft_sample = torch.unsqueeze(ft_sample, 0)
        img = cv2.resize(img, (512, 512))
        if self.transforms:
            img = self.transforms(img)
#             im = np.round(im, 2)
        return img, label, ft_sample

    def generate_FT(self, image):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        f = np.fft.fft2(image)
        fshift = np.fft.fftshift(f)
        fimg = np.log(np.abs(fshift)+1)
        maxx = -1
        minn = 100000
        for i in range(len(fimg)):
            if maxx < max(fimg[i]):
                maxx = max(fimg[i])
            if minn > min(fimg[i]):
                minn = min(fimg[i])
        fimg = (fimg - minn+1) / (maxx - minn+1)
        return fimg

    def __len__(self):
        return len(self.data)

In [None]:
from torch.utils.data import Dataset
import cv2 
import numpy as np 

class FFTDataset(Dataset):
    def __init__(self, data, transforms = None):
        self.folder_names = ['carrying', 'threat', 'normal']
        self.data = data
        self.transforms = transforms

    def __getitem__(self, idx):
        #         im_path = self.path[idx]
        #         label = self.path[idx].split('/')[-3]
        #         label = 1 if label == 'real' else 0
        data = self.data[idx]
        label = self.folder_names.index(data.parent.name)
        img = cv2.imread(str(data))
        ft_sample = self.generate_FT(img)
        ft_sample = cv2.resize(ft_sample, (80, 80))
        ft_sample = torch.from_numpy(ft_sample).float()
        ft_sample = torch.unsqueeze(ft_sample, 0)
        img = cv2.resize(img, (512, 512))
        if self.transforms:
            img = self.transforms(img)
#             im = np.round(im, 2)
        return img, label, ft_sample

    def generate_FT(self, image):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        f = np.fft.fft2(image)
        fshift = np.fft.fftshift(f)
        fimg = np.log(np.abs(fshift)+1)
        maxx = -1
        minn = 100000
        for i in range(len(fimg)):
            if maxx < max(fimg[i]):
                maxx = max(fimg[i])
            if minn > min(fimg[i]):
                minn = min(fimg[i])
        fimg = (fimg - minn+1) / (maxx - minn+1)
        return fimg

    def __len__(self):
        return len(self.data)

In [None]:
import torchvision.transforms as T

train_transforms = T.Compose([
    T.ToPILImage(),
    # T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(10),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = T.Compose([
    T.ToPILImage(),
    # T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transforms = T.Compose([
    T.ToPILImage(),
    # T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transforms = {
    'train': train_transforms,
    'val': val_transforms,
    'test': test_transforms
}

import random 
from pathlib import Path 

def split_data(data_dir, train_size=0.8, val_size=0.1):
    random.seed(1234)
    data = Path(data_dir).glob('*/*')
    data = [x for x in data if x.is_file() and x.suffix != '.zip']
    random.shuffle(data)
    train_size = int(len(data) * train_size)
    val_size = int(len(data) * val_size)
    train_data = data[:train_size]
    val_data = data[train_size:train_size+val_size]
    test_data = data[train_size+val_size:]

    return train_data, val_data, test_data



def get_loaders(transforms=transforms, batch_size=32, num_workers=4, pin_memory=True):

    train_data, val_data, test_data = split_data('data')
    train_dataset = FFTDataset(
        train_data, transforms=transforms['train'])
    val_dataset = FFTDataset(
        val_data, transforms=transforms['val'])
    test_dataset = FFTDataset(
        test_data, transforms=transforms['test'])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size,
                              num_workers=num_workers, pin_memory=pin_memory, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size,
                            num_workers=num_workers, pin_memory=pin_memory, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size,
                             num_workers=num_workers, pin_memory=pin_memory, shuffle=True)

    return train_loader, val_loader, test_loader

## Model Definition

In [None]:
import torch
from torch import nn
from torchvision import models

from efficientdet.model import BiFPN
from efficientnet_pytorch.efficientnet import EfficientNet_Head
from efficientdet.utils import Anchors

In [1]:
class FTGen_1(nn.Module):
    def __init__(self, in_channels=64, out_channels=1):
        super(FTGen_1, self).__init__()
        self.ft = nn.Sequential(
            nn.Conv2d(in_channels, 128, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, out_channels, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.ft(x)

In [2]:

class FourierBiFPN(nn.Module):
    def __init__(self, num_classes=3, compound_coef=0, load_weights=False, **kwargs):
        super(FourierBiFPN, self).__init__()
        
        self.compound_coef = compound_coef
        self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6]
        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384]
        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.]
        self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)])
        self.num_scales = len(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]))
        
        conv_channel_coef = {
            # the channels of P3/P4/P5.
            0: [40, 112, 320],
            1: [40, 112, 320],
            2: [48, 120, 352],
            3: [48, 136, 384],
            4: [56, 160, 448],
            5: [64, 176, 512],
            6: [72, 200, 576],
            7: [72, 200, 576],
        }

        num_anchors = len(self.aspect_ratios) * self.num_scales
        self.ftg1 = FTGen_1()
        self.upsample1 = nn.Upsample(size=(80, 80), mode="nearest")
        self.bifpn = nn.Sequential(
            *[BiFPN(self.fpn_num_filters[self.compound_coef],
                    conv_channel_coef[compound_coef],
                    True if _ == 0 else False,
                    attention=True if compound_coef < 6 else False)
              for _ in range(self.fpn_cell_repeats[compound_coef])])
        self.backbone_net = EfficientNet_Head(compound_coef=self.backbone_compound_coef[compound_coef])
    
    def forward(self, inputs):
        max_size = inputs.shape[-1]
        _, p3, p4, p5 = self.backbone_net(inputs)
        features = (p3, p4, p5)
        features = self.bifpn(features)
        
        p1,p2,p3,p4,p5 = features
        
        p3_f = nn.ReLU()(p3)
        p3_f = self.upsample1(p3_f)
        
        p4_f = nn.ReLU()(p4)
        p4_f = self.upsample1(p4_f)
        
        p5_f = nn.ReLU()(p5)
        p5_f = self.upsample1(p5_f)
        
        ft_3 = self.ftg1(p3_f)
        ft_4 = self.ftg1(p4_f)
        ft_5 = self.ftg1(p5_f)
        
        p3 = p3.reshape(p3.shape[0], -1)
        p4 = p4.reshape(p4.shape[0], -1)
        p5 = p5.reshape(p5.shape[0], -1)
        
        p3 = torch.sigmoid(p3).mean(dim=1)
        p4 = torch.sigmoid(p4).mean(dim=1)
        p5 = torch.sigmoid(p5).mean(dim=1)
        # print(p3.shape, p4.shape, p5.shape)
        out = torch.stack((p3, p4, p5), dim=1) # .mean(dim=1)
        # print(out.shape)
        out = nn.functional.softmax(out, dim=1)
        # print(out.shape)
        
        return out, ft_3, ft_4, ft_5

## Trainer

In [None]:
import copy
import os
import time
import numpy as np
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from tqdm.notebook import tqdm 
from pathlib import Path

class Trainer(object):

    def __init__(self, network, optimizer, compute_loss, learning_rate=0.001, batch_size=32,
                 device='cpu', save_interval=2, save_path=''):

        self.network = network
        self.batch_size = batch_size
        self.optimizer = optimizer
        self.compute_loss = compute_loss
        self.device = device
        self.learning_rate = learning_rate
        self.save_interval = save_interval
        self.save_path = save_path
        self.network.to(self.device)
        self.writer = SummaryWriter('runs/bifpn-14')
        
#         if torch.cuda.device_count() > 1:
#             self.network = nn.DataParallel(self.network, device_ids=[0]).cuda()

    def load_model(self, model_filename):

        cp = torch.load(model_filename)
        self.network.load_state_dict(cp['state_dict'])
        start_epoch = cp['epoch']
        start_iter = cp['iteration']
        losses = cp['loss']
        return start_epoch, start_iter, losses

    def save_model(self, output_dir, epoch=0, iteration=0, losses=None, accuracy=None):
   
        saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        saved_path = output_dir / saved_filename
        cp = {'epoch': epoch,
              'iteration': iteration,
              'loss': losses,
              'state_dict': self.network.cpu().state_dict()
              }
        torch.save(cp, saved_path)
        self.network.to(self.device)

    def train(self, dataloader, n_epochs=25, output_dir='out', model=None, freeze=False):
    

        start_epoch = 0
        start_iter = 0
        losses = []
        print('TRAINING')

        # setup optimizer
        self.network.train(True)
        best_loss = float("inf")
        best_model_wts = copy.deepcopy(self.network.state_dict())
        best_acc = 0.0
        # STARTING TRAINING LOOP
        for epoch in range(start_epoch, n_epochs):
            if (epoch+1) == 2:
                for param in self.network.module.backbone_net.parameters():
                    param.requires_grad = True
            train_loss_history = []
            val_loss_history = []
            train_acc_history = []
            
            val_acc_history = []
            for phase in ['train', 'val']:	
                # Set model to training mode
                if phase == 'train':
                    self.network.train()
                # Set model to evaluate mode
                else:
                    self.network.eval() 
                tq = tqdm(dataloader[phase])
                for i, data in enumerate(tq):
                    if i >= start_iter:
                        start = time.time()
                        img, labels, ft_feat = data
                        self.optimizer.zero_grad()
                        with torch.set_grad_enabled(phase == 'train'):
                            loss, acc, ft_preds = self.compute_loss(
                                self.network, img, labels, ft_feat, self.writer, phase, self.device)
                            if phase == 'train':
                                loss.backward()
                                self.optimizer.step()
                                train_loss_history.append(loss.item())
                                train_acc_history.append(acc)
                            else:
                                val_loss_history.append(loss.item())
                                val_acc_history.append(acc)
                        end = time.time()

                        # print(
                            # f"[{epoch}/{n_epochs}][{i}/{len(dataloader[phase])}] => LOSS: {loss.item()}, ACC: {acc}, (ELAPSED TIME: {(end - start)}), PHASE: {phase}")
                        tq.set_postfix(LOSS=loss.item(), ACC=acc, MODEL_PRED=ft_preds)
                        losses.append(loss.item())
            epoch_train_loss = np.mean(train_loss_history)
            epoch_train_acc = np.mean(train_acc_history)
            print(f"TRAIN LOSS: {epoch_train_loss} TRAIN ACC: {epoch_train_acc}  EPOCH: {epoch}")

            epoch_val_loss = np.mean(val_loss_history)
            epoch_val_acc = np.mean(val_acc_history)
                
            print(f"VAL LOSS: {epoch_val_loss}, EPOCH: {epoch}, ACC: {epoch_val_acc}")

            if phase == 'val' and epoch_val_acc > best_acc:
                print(
                    f"VAL ACC IMPROVED FROM: {epoch_val_acc} TO: {best_acc}, COPYING OVER NEW SWEIGHTS")
                best_acc = epoch_val_acc
                best_model_wts = copy.deepcopy(self.network.state_dict())
                    
            print(f"EPOCH {epoch + 1} DONE")

            # save the last model, and the ones in the specified interval
            if (epoch + 1) == n_epochs or epoch % self.save_interval == 0:
                self.save_model(output_dir, epoch=(epoch + 1),
                                iteration=0, losses=losses, accuracy=int(epoch_val_acc*100))

        self.network.load_state_dict(best_model_wts)
        torch.save(self.network, self.save_path)
        self.save_model(output_dir, epoch=0, iteration=0, losses=losses)
        self.writer.close()

In [None]:
from torch.autograd import Variable

def compute_loss(network, img, labels, ft_feat, logger, phase, device):
    """
    Compute the losses, given the network, data and labels and 
    device in which the computation will be performed. 
    """
    # loss definitions
    criterion_ce = nn.CrossEntropyLoss()
    criterion_mse = nn.MSELoss()
    ft_feat = Variable(ft_feat.to(device))
    # print('labels', labels.shape, labels)
    y_one_hot = nn.functional.one_hot(labels, num_classes= 3).to(device).float()
    # print('labels', labels.shape, labels)

    out, ft_3, ft_4, ft_5 = network(img.to(device))
    _, predicted = torch.max(out.data, 1)

    # preds = torch.argmax(out, dim=1)
    # print(predicted)
    # print(out.shape, preds.shape, labels.shape)
    acc = float((predicted.cpu() == labels).sum())/float(out.shape[0])
    # print(preds, labelsv_binary, preds)
    
    loss1 = criterion_ce(out, y_one_hot)
    loss2 = criterion_mse(ft_3, ft_feat)
    loss3 = criterion_mse(ft_4, ft_feat)
    loss4 = criterion_mse(ft_5, ft_feat)
    
    
    loss_temp = (loss2 + loss3 + loss4)/3.0
    
    loss = 0.5 * loss1 + 0.5 * (loss_temp)
    return loss, acc, predicted.cpu()

In [3]:
gpus = [0]

network = FourierBiFPN()


if torch.cuda.device_count() > 1:
    network = nn.DataParallel(network, device_ids=gpus).cuda(gpus[0])

for name, param in network.named_parameters():
    param.requires_grad = True

#         for param in network.module.backbone_net.parameters():
#              param.requires_grad = False



Loaded pretrained weights for efficientnet-b0


In [5]:
import torch.optim as optim

learning_rate = 0.001
weight_decay = 0.00001
optimizer = optim.Adam(filter(lambda p: p.requires_grad, network.parameters()), 
                        lr=learning_rate, weight_decay=weight_decay)


In [7]:
train_loader, val_loader, test_loader = get_loaders(batch_size = 32)
dataloaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}

trainer = Trainer(network, optimizer, compute_loss, learning_rate=learning_rate,
                      batch_size=32, device=f'cuda:{gpus[0]}' if torch.cuda.is_available() else 'cpu')
trainer.train(dataloaders, n_epochs=25, output_dir='models', model=None, freeze=False)

### Fourier Conv Improved

In [None]:

class FourierBiFPN(nn.Module):
    def __init__(self, num_classes=3, compound_coef=0, load_weights=False, **kwargs):
        super(FourierBiFPN, self).__init__()
        
        self.compound_coef = compound_coef
        self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6]
        self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384]
        self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
        self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.]
        self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)])
        self.num_scales = len(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]))
        
        conv_channel_coef = {
            # the channels of P3/P4/P5.
            0: [40, 112, 320],
            1: [40, 112, 320],
            2: [48, 120, 352],
            3: [48, 136, 384],
            4: [56, 160, 448],
            5: [64, 176, 512],
            6: [72, 200, 576],
            7: [72, 200, 576],
        }

        num_anchors = len(self.aspect_ratios) * self.num_scales
        self.ftg1 = FTGen_1()
        self.upsample1 = nn.Upsample(size=(80, 80), mode="nearest")
        self.bifpn = nn.Sequential(
            *[BiFPN(self.fpn_num_filters[self.compound_coef],
                    conv_channel_coef[compound_coef],
                    True if _ == 0 else False,
                    attention=True if compound_coef < 6 else False)
              for _ in range(self.fpn_cell_repeats[compound_coef])])
        self.backbone_net = EfficientNet_Head(compound_coef=self.backbone_compound_coef[compound_coef])
        self.p3_fc = nn.Linear(16384, 4096)
        self.p4_fc = nn.Linear(4096, 1024)
        self.final_fc = nn.Sequential(*[nn.Linear(1024, 256), nn.Linear(256, 3)])
    
    def forward(self, inputs):
        max_size = inputs.shape[-1]
        _, p3, p4, p5 = self.backbone_net(inputs)
        features = (p3, p4, p5)
        features = self.bifpn(features)
        
        p1,p2,p3,p4,p5 = features
        
        p3_f = nn.ReLU()(p3)
        p3_f = self.upsample1(p3_f)
        
        p4_f = nn.ReLU()(p4)
        p4_f = self.upsample1(p4_f)
        
        p5_f = nn.ReLU()(p5)
        p5_f = self.upsample1(p5_f)
        
        ft_3 = self.ftg1(p3_f)
        ft_4 = self.ftg1(p4_f)
        ft_5 = self.ftg1(p5_f)
        
        p3 = p3.reshape(p3.shape[0], -1)
        p4 = p4.reshape(p4.shape[0], -1)
        p5 = p5.reshape(p5.shape[0], -1)
        
        p3 = self.p3_fc(p3)
        p4 = self.p4_fc(p4)
        pt = p3 + p4 + p5
        
        out = self.final_fc(pt)
        
        return out, ft_3, ft_4, ft_5