In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import albumentations as A
from PIL import Image
import pickle
import numpy as np
import pandas as pd
import os
import math

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# Architecture Definition

In [None]:
# tuple: (filters, size, stride), single convolutional layer

# ["residual", repetitions]: residual layers, let prev_channel be the number of channels of the previous layer,
# the sequence of conv with filters of size: prev_channels//2 ---> prev_channels ---> residual connection
# is repeated for "repetitions" times

# ["residualYolo", repetitions]: same as "residual" but some feature maps are saved to be used with the YOLO network

# ["avgpool"]: avearge pooling with output of size 1x1 for each channel

# ["prediction"]: last convolutional layer which produces a number of channels equal to the number of classes

# ["softmax"]: apply softmax to predictions

darknet_architecture = [
    (32, 3, 1),
    (64, 3, 2),
    ["residual", 1],
    (128, 3, 2),
    ["residual", 2],
    (256, 3, 2),
    ["residualYolo", 8],
    (512, 3, 2),
    ["residualYolo", 8],
    (1024, 3, 2),
    ["residual", 4],
    ["avgpool"],
    ["prediction"],
    ["softmax"]
]

# Blocks Definition

In [None]:
class ConvLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, bn_act=True, **kwargs):
        super().__init__()
        padding=1 if kernel_size == 3 else 0
        self.conv = nn.Conv2d(in_channels,
                              out_channels,
                              kernel_size=kernel_size,
                              bias=not bn_act,
                              padding=padding, 
                              **kwargs)
        # if batchnorm, then leaky relu as activation function
        self.use_bn_act = bn_act
        if self.use_bn_act:
            self.bn = nn.BatchNorm2d(out_channels)
            self.leaky = nn.LeakyReLU(0.1)

    def forward(self, x):
        if self.use_bn_act:
            return self.leaky(self.bn(self.conv(x)))
        else:
            return self.conv(x)

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, channels, use_residual=True, num_repeats=1):
        super().__init__()
        self.layers = nn.ModuleList()
        for i in range(num_repeats):
            self.layers += [
                nn.Sequential(
                    ConvLayer(channels, channels // 2, kernel_size=1),
                    ConvLayer(channels // 2, channels, kernel_size=3),
                )
            ]

        self.use_residual = use_residual
        self.num_repeats = num_repeats

    def forward(self, x):
        for layer in self.layers:
            x = layer(x) + self.use_residual * x

        return x

In [None]:
# Block of convolutional layers with feature map to be saved for detections
class ConvBlockYolo(nn.Module):
    def __init__(self, channels, num_repeats=1):
        super().__init__()
        self.layers = nn.ModuleList()
        self.feat_map = None
        num_layer = 1
        for i in range(num_repeats):
            if num_layer == 5:
                self.layers += [
                    ConvLayer(channels, channels // 2, kernel_size=1),
                    ConvLayer(channels // 2, channels, kernel_size=3)
                ]
            else:
                self.layers += [
                    nn.Sequential(
                        ConvLayer(channels, channels // 2, kernel_size=1),
                        ConvLayer(channels // 2, channels, kernel_size=3),
                    )
                ]
            num_layer += 2

    def forward(self, x):
        self.feat_map = None
        for layer in self.layers:
            if isinstance(layer, ConvLayer):
                if self.feat_map == None:
                    self.feat_map = layer(x)
                else:
                    x = layer(self.feat_map) + x
            else:
                x = layer(x) + x

        return x

# Network Definition

In [None]:
class darknet53(nn.Module):
    def __init__(self, architecture, num_classes, in_channels=3):
        super().__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.layers = self._create_layers(architecture)
        #self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = x.reshape(x.shape[0], x.shape[1])
        return x#self.softmax(x)
        
    def _create_layers(self, architecture):
        layers = nn.ModuleList()
        in_channels = self.in_channels
        for module in architecture:
            # conv layer
            if isinstance(module, tuple):
                if module[-1] == "linear":
                    bn_act = False
                else:
                    bn_act = True
                layers.append(ConvLayer(in_channels, 
                                        module[0], 
                                        kernel_size=module[1],
                                        bn_act=bn_act,
                                        stride=module[2])
                             )
                in_channels = module[0]
                continue
            
            if isinstance(module, list):
                # residual block
                if module[0] == "residual":
                    layers.append(ConvBlock(in_channels,
                                            num_repeats=module[1])
                                 )
                    continue
                    
                # residual block with feature map to be saved for detection
                elif module[0] == "residualYolo":
                    layers.append(ConvBlockYolo(in_channels,
                                            num_repeats=module[1])
                                 )
                
                # average pool
                elif module[0] == "avgpool":
                    layers.append(nn.AdaptiveAvgPool2d((1,1)))
                    continue
                
                # last convolutional layer
                elif module[0] == "prediction":
                    layers.append(ConvLayer(in_channels,
                                            self.num_classes,
                                            kernel_size=1,
                                            bn_act=False,
                                            stride=1)
                                 )
                    continue
                    
        return layers

# Dataset

In [None]:
class imgnet_trainset(Dataset):
    def __init__(self, df, path="./ImageNet_darknet_dataset/data/images"):
        self.transform = transforms.PILToTensor()
        self.df = df
        self.path = path
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.path, row["filename"].split("_")[0], row["filename"]))
        return (self.transform(img).float()/255, row["label"]) #torch.tensor([row["label"]]))
    
class imgnet_testset(Dataset):
    def __init__(self, df, path="./ImageNet_darknet_dataset/data/images"):
        self.transform = transforms.PILToTensor()
        self.df = df
        self.path = path
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.path, row["filename"].split("_")[0], row["filename"]))
        return (self.transform(img).float()/255, row["label"])#torch.tensor([row["label"]]))
        

In [None]:
def create_dataset(path="./ImageNet_darknet_dataset"):
    # in trainset, occurrences of classes goes from a max of 1200 to a minimum of 632, mean: 499
    df_main = pd.read_csv(os.path.join(path, "main.csv"), sep=";", skipinitialspace=True)
    testset_df = pd.DataFrame(columns=["filename", "label"])
    trainset_df = pd.DataFrame(columns=["filename", "label"])
    for idx in range(len(df_main)):
        print(f"CLASS {idx+1}/1000")
        row = df_main.iloc[idx]
        path_class_csv = os.path.join(path, "data", "info_classes", f"class_{row['label']}.csv")
        df_cls = pd.read_csv(path_class_csv, sep=";", skipinitialspace=True)
        # for trainset sample 100 images for each class, trainset size is 100k
        df_testset_sample = df_cls.sample(100)
        df_cls = df_cls.drop(list(df_testset_sample.index.values))
        df_cls = df_cls.sample(500)
        # add splitted dataframes to trainset and testset dataframes
        testset_df = pd.concat([testset_df, df_testset_sample], axis=0)
        trainset_df = pd.concat([trainset_df, df_cls], axis=0)
    return imgnet_trainset(trainset_df), imgnet_testset(testset_df)

# Training

In [None]:
# data augmentation
augmentation = A.Compose(
    [
        A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4),
        A.Blur(p=0.1),
        A.CLAHE(p=0.1),
        A.Posterize(p=0.1),
        A.ToGray(p=0.1),
        A.ChannelShuffle(p=0.05),

    ],
)

In [None]:
# perform agumentation when loading batch
def collate_fn_padd(batch):
    X = None
    Y = None
    for img, y in batch:
        img_numpy = (torch.permute(img, (1, 2, 0))*255).numpy().astype(np.uint8)
        img_numpy = augmentation(image=img_numpy)["image"]
        tensor_augm = torch.permute(torch.from_numpy(img_numpy), (2, 0, 1)) / 255
        if X == None:
            X = torch.stack((img, tensor_augm))
            Y = torch.cat((y, y))
        else:
            X = torch.cat((X, img.unsqueeze(0), tensor_augm.unsqueeze(0)))
            Y = torch.cat((Y, y, y))
    return (X, Y)

In [None]:
classes = 1000
lr = 1e-3

In [None]:
trainset, testset = create_dataset()

CLASS 559/1000
CLASS 560/1000
CLASS 561/1000
CLASS 562/1000
CLASS 563/1000
CLASS 564/1000
CLASS 565/1000
CLASS 566/1000
CLASS 567/1000
CLASS 568/1000
CLASS 569/1000
CLASS 570/1000
CLASS 571/1000
CLASS 572/1000
CLASS 573/1000
CLASS 574/1000
CLASS 575/1000
CLASS 576/1000
CLASS 577/1000
CLASS 578/1000
CLASS 579/1000
CLASS 580/1000
CLASS 581/1000
CLASS 582/1000
CLASS 583/1000
CLASS 584/1000
CLASS 585/1000
CLASS 586/1000
CLASS 587/1000
CLASS 588/1000
CLASS 589/1000
CLASS 590/1000
CLASS 591/1000
CLASS 592/1000
CLASS 593/1000
CLASS 594/1000
CLASS 595/1000
CLASS 596/1000
CLASS 597/1000
CLASS 598/1000
CLASS 599/1000
CLASS 600/1000
CLASS 601/1000
CLASS 602/1000
CLASS 603/1000
CLASS 604/1000
CLASS 605/1000
CLASS 606/1000
CLASS 607/1000
CLASS 608/1000
CLASS 609/1000
CLASS 610/1000
CLASS 611/1000
CLASS 612/1000
CLASS 613/1000
CLASS 614/1000
CLASS 615/1000
CLASS 616/1000
CLASS 617/1000
CLASS 618/1000
CLASS 619/1000
CLASS 620/1000
CLASS 621/1000
CLASS 622/1000
CLASS 623/1000
CLASS 624/1000
CLASS 625/

In [None]:
# check loss at near 220 epochs
# 8 epochs, loss = 1.3579337318145632
#darknet = darknet53(darknet_architecture, classes).to(device)
ce_loss = nn.CrossEntropyLoss()
#optimizer = optim.Adam(params=darknet.parameters(), lr=lr)
i=1

best_parameters = None
best_loss = math.inf
while(True):
    losses = []
    n_batch = 0
    train_loader = DataLoader(trainset, batch_size=8, shuffle=True)
    total_batches = len(train_loader)
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        pred = darknet(x)
        loss = ce_loss(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print(f"EPOCH: {i} BATCH: {n_batch}/{len(train_loader)} LOSS: {loss.item()}")
        if n_batch % 1000 == 0:
            print(f"BATCH: {n_batch+1}/{total_batches}")
            print(f"loss: {loss.item()}\n")
        n_batch+=1
        losses.append(loss.item())
        
    mean_losses = sum(losses)/len(losses)
    if mean_losses < best_loss:
        if best_loss != math.inf:
            os.remove(f"../backup_models/darknet_weights_loss_{best_loss}")
        best_loss = mean_losses
        best_parameters = darknet.state_dict()
        torch.save(best_parameters, f"../backup_models/darknet_weights_loss_{best_loss}")
    print(f"EPOCH: {i} MEAN LOSSES EPOCH: {sum(losses)/len(losses)}")
    print(f"Loss: {loss.item()}")
    i+=1

BATCH: 48001/62500
loss: 1.957792043685913

BATCH: 49001/62500
loss: 3.367802381515503

BATCH: 50001/62500
loss: 1.060798168182373

BATCH: 51001/62500
loss: 2.994719982147217

BATCH: 52001/62500
loss: 1.3740214109420776

BATCH: 53001/62500
loss: 1.192408800125122

BATCH: 54001/62500
loss: 2.017510414123535

BATCH: 55001/62500
loss: 2.352169990539551

BATCH: 56001/62500
loss: 1.7251794338226318

BATCH: 57001/62500
loss: 1.7635705471038818

BATCH: 58001/62500
loss: 1.9614330530166626

BATCH: 59001/62500
loss: 1.4423578977584839

BATCH: 60001/62500
loss: 1.0767019987106323

BATCH: 61001/62500
loss: 2.0136005878448486

BATCH: 62001/62500
loss: 1.711467981338501

EPOCH: 6 MEAN LOSSES EPOCH: 1.7238861848899125
Loss: 1.4690191745758057
BATCH: 1/62500
loss: 0.9097316265106201

BATCH: 1001/62500
loss: 1.5583593845367432

BATCH: 2001/62500
loss: 0.639952540397644

BATCH: 3001/62500
loss: 0.434953898191452

BATCH: 4001/62500
loss: 1.943143367767334

BATCH: 5001/62500
loss: 0.45002636313438416

BA

KeyboardInterrupt: 

In [None]:
N = len(testset)
print(f"N :{N}")
top1 = 0
top5 = 0
i=1
with torch.no_grad():
    batch_size = 8
    test_loader = DataLoader(testset, batch_size=batch_size)
    N_test_loader = len(test_loader)
    for x, y in test_loader:

        x = x.to(device)
        y = y.to(device)

        pred = darknet(x)
        
        for idx_batch in range(x.shape[0]):
            top1_pred = torch.argmax(pred[idx_batch])
            if top1_pred == y[idx_batch]:
                top1 += 1
                top5 += 1
                continue
            
            top5_pred = torch.topk(pred[idx_batch], 5).indices
            if y[idx_batch] in top5_pred:
                top5 += 1
        if i%1000 == 0:
            print(f"{i}/{N_test_loader}")
        i+=1
        #print(f"TOP1: {top1}")
        #print(f"TOP5: {top5}")
print(f"ACCURACY TOP1: {top1/N}")
print(f"ACCURACY TOP5: {top5/N}")