In [43]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import argparse
import math
import torch
import sys
import time
import datetime
from sklearn.metrics import average_precision_score,roc_auc_score, classification_report
from __future__ import print_function
import torch.optim as optim
import os
from sklearn.metrics import roc_auc_score, f1_score
import torch.backends.cudnn as cudnn
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision
import torch.utils.data as data
from PIL import Image
import sys
from sklearn.metrics import precision_score,recall_score
from PIL import Image
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix

# *   **Arguments**

In [44]:
### from config.config_linear import parse_option ###


def parse_option(args):
    parser = argparse.ArgumentParser('argument for training')

    parser.add_argument('--print_freq', type=int, default=10,
                        help='print frequency')
    parser.add_argument('--save_freq', type=int, default=50,
                        help='save frequency')
    parser.add_argument('--batch_size', type=int, default=128,
                        help='batch_size')
    parser.add_argument('--num_workers', type=int, default=2,
                        help='num of workers to use')
    parser.add_argument('--epochs', type=int, default=25,
                        help='number of training epochs')
    parser.add_argument('--n_cls', type=int, default=2,
                        help='number of training epochs')
    parser.add_argument('--super', type=int, default=0,
                        help='number of training epochs')
    parser.add_argument('--type', type=int, default=0,
                        help='number of training epochs')
    parser.add_argument('--biomarker', type=str, default='fluid_irf')
    # optimization
    parser.add_argument('--learning_rate', type=float, default=0.05,
                        help='learning rate')
    parser.add_argument('--patient_lambda', type=float, default=1,
                        help='learning rate')
    parser.add_argument('--lr_decay_epochs', type=str, default='100',
                        help='where to decay lr, can be a list')
    parser.add_argument('--lr_decay_rate', type=float, default=0.1,
                        help='decay rate for learning rate')
    parser.add_argument('--weight_decay', type=float, default=0,
                        help='weight decay')
    parser.add_argument('--momentum', type=float, default=0.9,
                        help='momentum')
    parser.add_argument('--device', type=str, default='cuda:0')
    parser.add_argument('--parallel', type=int, default=1, help='data parallel')
    # model dataset
    parser.add_argument('--model', type=str, default='resnet50')
    parser.add_argument('--train_csv_path', type=str, default='train data csv')
    parser.add_argument('--test_csv_path', type=str, default='test data csv')
    parser.add_argument('--train_image_path', type=str, default='/data/Datasets')
    parser.add_argument('--test_image_path', type=str, default='/data/Datasets')
    parser.add_argument('--val_csv_path', type=str, default='val data csv')
    parser.add_argument('--val_image_path', type=str, default='val data image')
    parser.add_argument('--results_dir_contrastive', type=str, default='/home/kiran/Desktop/Dev/SupCon_OCT_Clinical/results.txt')
    parser.add_argument('--img_dir', type=str, default='image directory')
    parser.add_argument('--model_type', type=str, default='bcva')
    parser.add_argument('--multi', type=int, default=0)
    parser.add_argument('--noise_analysis', type=int, default=0)
    parser.add_argument('--severity_analysis', type=int, default=0)
    parser.add_argument('--dataset', type=str, default='Prime',
                        choices=['OCT','Biomarker','Prime'], help='dataset')

    # other setting
    parser.add_argument('--cosine', action='store_true',
                        help='using cosine annealing')
    parser.add_argument('--warm', action='store_true',
                        help='warm-up for large batch training')
    parser.add_argument('--ford_region',type = int,default = 0,
                        help='Training on 6 region classes or not')
    parser.add_argument('--percentage', type=int, default=100,
                        help='Percentage of Biomarker Training Data Utilized')

    parser.add_argument('--ckpt', type=str, default='',
                        help='path to pre-trained model')
    parser.add_argument('--backbone_training', type=str, default='BCVA',
                        help='manner in which backbone was trained')
    parser.add_argument('--patient_split', type=int, default=1,
                        help='choose method')
    opt = parser.parse_args(args)

    # set the path according to the environment
    opt.data_folder = './datasets/'

    iterations = opt.lr_decay_epochs.split(',')
    opt.lr_decay_epochs = list([])
    for it in iterations:
        opt.lr_decay_epochs.append(int(it))

    opt.model_name = '{}_{}_lr_{}_decay_{}_bsz_{}'.\
        format(opt.dataset, opt.model, opt.learning_rate, opt.weight_decay,
               opt.batch_size)

    if opt.cosine:
        opt.model_name = '{}_cosine'.format(opt.model_name)

    # warm-up for large-batch training,
    if opt.warm:
        opt.model_name = '{}_warm'.format(opt.model_name)
        opt.warmup_from = 0.01
        opt.warm_epochs = 10
        if opt.cosine:
            eta_min = opt.learning_rate * (opt.lr_decay_rate ** 3)
            opt.warmup_to = eta_min + (opt.learning_rate - eta_min) * (
                    1 + math.cos(math.pi * opt.warm_epochs / opt.epochs)) / 2
        else:
            opt.warmup_to = opt.learning_rate

    if opt.dataset == 'cifar10':
        opt.n_cls = 10
    elif opt.dataset == 'cifar100':
        opt.n_cls = 100
    elif opt.dataset == 'Ford':
        opt.n_cls = 3
    elif opt.dataset == 'Ford_Region':
        opt.n_cls = 3
    elif opt.dataset == 'covid_kaggle':
        opt.n_cls = 4
    elif opt.dataset == 'qu_dataset':
        opt.n_cls = 3
    elif opt.dataset == 'covid_x':
        opt.n_cls = 2
    elif opt.dataset == 'covid_x_A':
        opt.n_cls = 3
    elif opt.dataset == 'OCT':
        opt.n_cls = 4
    elif opt.dataset == 'Prime':
        opt.n_cls = 2
    else:
        raise ValueError('dataset not supported: {}'.format(opt.dataset))

    return opt

* # Training Main

In [45]:
def main():
    best_acc = 0
    opt = parse_option(args)

    # build data loader
    device = opt.device
    train_loader,  test_loader = set_loader_new(opt)

    acc_list = []
    prec_list = []
    rec_list = []
    spec_list = []
    # training routine
    for i in range(0,1):
        r_list = []
        model, classifier, criterion = set_model(opt)
        optimizer = set_optimizer(opt, classifier)
        
        for epoch in range(1, opt.epochs + 1):
            adjust_learning_rate(opt, optimizer, epoch)

            # train for one epoch
            time1 = time.time()
            loss, acc = train_OCT(train_loader, model, classifier, criterion,
                              optimizer, epoch, opt)
            time2 = time.time()
#             print('Train epoch {}, total time {:.2f}, accuracy:{:.2f}'.format(
#                 epoch, time2 - time1, acc))

            loss, test_acc,prec,rec,spec, r = validate(test_loader, model, classifier, criterion, opt)
            print('Train epoch {}, total time {:.2f}, accuracy:{:.2f}, f1_score:{:.2f}'.format(epoch, time2 - time1, acc, r))
            
            r_list.append(r)
        
        print(r_list)
        
        acc_list.append(acc)
        prec_list.append(prec)
        rec_list.append(rec)
        spec_list.append(spec)
    df = pd.DataFrame({'Accuracy':acc_list,'Precision':prec_list,'Recall':rec_list,'Specificity':spec_list})
    excel_name = opt.backbone_training + '_' + opt.biomarker + '_' + str(opt.patient_split) + '.csv'
    df.to_csv(excel_name, index=False)

def train_OCT(train_loader, model, classifier, criterion, optimizer, epoch, opt):
    """one epoch training"""
    model.eval()
    classifier.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    device = opt.device
    end = time.time()
    for idx, (image, vit_deb,ir_hrf, full_vit,partial_vit,fluid_irf,drt,eye_id,bcva,cst,patient) in enumerate(train_loader):
        data_time.update(time.time() - end)

        images = image.to(device)

        if (opt.biomarker == 'vit_deb'):
            labels = vit_deb
        elif (opt.biomarker == 'ir_hrf'):
            labels = ir_hrf
        elif (opt.biomarker == 'full_vit'):
            labels = full_vit
        elif (opt.biomarker == 'partial_vit'):
            labels = partial_vit
        elif (opt.biomarker == 'drt'):
            labels = drt
        else:
            labels = fluid_irf
            
#         labels = labels.long()
#         labels = torch.tensor([labels])
        labels = labels.float()
        bsz = labels.shape[0]
        labels=labels.to(device)
        
        # warm-up learning rate
        warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer)

        # compute loss
        with torch.no_grad():
            features = model.encoder(images)

        output = classifier(features.detach())
   #     probabilities = torch.sigmoid(output)
        
#         output = output.squeeze()
   #     print("output",output.shape,"labels",labels.shape)
    #    print("Outputs =",probabilities)
     #   print("Labels =",labels)

        loss = criterion(output, labels)

        # update metric
        losses.update(loss.item(), bsz)

        acc1= accuracy_single(output, labels, topk=(1,))


        top1.update(acc1[0].item(), bsz)

        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print info
        if (idx + 1) % opt.print_freq == 0:
            print('Train: [{0}][{1}/{2}]\t'
                  'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'loss {loss.val:.3f} ({loss.avg:.3f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                   epoch, idx + 1, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1))
            sys.stdout.flush()

    return losses.avg, top1.avg


def validate(val_loader, model, classifier, criterion, opt):
    """validation"""
    model.eval()
    classifier.eval()
    device = opt.device
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    label_list = []
    out_list = []
    with torch.no_grad():
        end = time.time()
        for idx, (image, vit_deb,ir_hrf, full_vit,partial_vit,fluid_irf,drt) in enumerate(val_loader):
            images = image.float().to(device)

            if (opt.biomarker == 'vit_deb'):
                labels = vit_deb
            elif (opt.biomarker == 'ir_hrf'):
                labels = ir_hrf
            elif (opt.biomarker == 'full_vit'):
                labels = full_vit
            elif (opt.biomarker == 'partial_vit'):
                labels = partial_vit
            elif (opt.biomarker == 'drt'):
                labels = drt
            else:
                labels = fluid_irf
                
#             labels = torch.tensor([labels])
#             labels = labels.long()
            labels = labels.float()
            label_list.append(labels.detach().cpu().numpy())
            labels = labels.to(device)
            bsz = labels.shape[0]

            # forward
            output = classifier(model.encoder(images))
#             output = output.squeeze()

            loss = criterion(output, labels)
            _, pred = output.topk(1, 1, True, True)
            output = torch.round(torch.sigmoid(output))

            out_list.append(output.detach().cpu().numpy())
            # update metri  c
            losses.update(loss.item(), bsz)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

#             if idx % opt.print_freq == 0:
#                 print('Test: [{0}/{1}]\t'
#                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
#                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
#                       'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
#                        idx, len(val_loader), batch_time=batch_time,
#                        loss=losses, top1=top1))

    label_list = np.squeeze(label_list, axis=(2,))
    label_array = np.array(label_list)

    out_array = np.array(out_list)
    out_array = np.concatenate(out_list, axis=0)
    print("output",out_array[0:10])
    print("labels",label_array[0:10])
    
    r = f1_score(label_array.flatten(), out_array.flatten(), average='macro')
    #     r = f1_score(label_array, out_array, average='macro')
    
    prec = precision_score(label_array.flatten(), out_array.flatten())
    rec = recall_score(label_array.flatten(), out_array.flatten())
    spec = recall_score(label_array.flatten(), out_array.flatten(), pos_label=0)

    return losses.avg, top1.avg,prec, rec, spec, r

# *   **Training Multi**

In [46]:
### from training_linear.training_one_epoch_ckpt_multi import main_multilabel ###
### main_multilabel ###


def train_OCT_multilabel(train_loader, model, classifier, criterion, optimizer, epoch, opt):
    """one epoch training"""
    model.eval()
    classifier.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    device = opt.device
    end = time.time()
    
#     for idx, (image, bio_tensor,eye_id,bcva,cst,patient) in enumerate(train_loader):
    for idx, (image, bio_tensor) in enumerate(train_loader):
        data_time.update(time.time() - end)

        images = image.to(device)

        labels = bio_tensor
        labels = labels.float()
        bsz = labels.shape[0]
        labels=labels.to(device)
        
        # warm-up learning rate
        warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer)

        # compute loss
        with torch.no_grad():
            features = model.encoder(images)

        output = classifier(features.detach())
#         print("Outputs =",output)
#         print("Labels =",labels)
        
        loss = criterion(output, labels)

        # update metric
        losses.update(loss.item(), bsz)
        acc1= accuracy(output, labels, topk=(1,))
        top1.update(acc1[0].item(), bsz)

        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print info
        if (idx + 1) % opt.print_freq == 0:
            print('Train: [{0}][{1}/{2}]\t'.format(
                epoch, idx + 1, len(train_loader)))
            sys.stdout.flush()

    return losses.avg, top1.avg

def validate_multilabel(val_loader, model, classifier, criterion, opt):
    """validation"""
    model.eval()
    classifier.eval()
    
    device = opt.device
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    label_list = []
    out_list = []
    
    with torch.no_grad():
        end = time.time()
#         for idx, (image, bio_tensor,eye_id,bcva,cst,patient) in enumerate(val_loader):
        for idx, (image, bio_tensor) in enumerate(val_loader):
            images = image.float().to(device)

            labels = bio_tensor
            labels = labels.float()
            label_list.append(labels.squeeze().detach().cpu().numpy())
            labels = labels.to(device)
            bsz = labels.shape[0]

            # forward
            output = classifier(model.encoder(images))

            loss = criterion(output, labels)
            output = torch.round(torch.sigmoid(output))
            out_list.append(output.detach().cpu().numpy())
            
            # update metric
            losses.update(loss.item(), bsz)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()


    label_array = np.array(label_list)
    out_array = np.array(out_list)
    out_array = np.concatenate(out_list, axis=0)
    print("output",out_array,"labels",label_array)
    r = f1_score(label_array, out_array, average='macro')

    return losses.avg, r

def main_multilabel():
    best_acc = 0
    opt = parse_option(args)

    # build data loader
    device = opt.device
    train_loader,  test_loader = set_loader_new(opt)
    val_loader = val_loader_fun(opt)

    
    # training routine
    for i in range(0,1):
        r_list = []
        model, classifier, criterion = set_model(opt)
        optimizer = set_optimizer(opt, classifier)
        
        for epoch in range(1, opt.epochs + 1):
            adjust_learning_rate(opt, optimizer, epoch)

            # train for one epoch
            time1 = time.time()
            loss, acc = train_OCT_multilabel(train_loader, model, classifier, criterion,
                                             optimizer, epoch, opt)
            time2 = time.time()
            
                

    # eval for one epoch
            loss, r = validate_multilabel(test_loader, model, classifier, criterion, opt)
            print('Train epoch {}, total time {:.2f}, accuracy:{:.2f}, f1_score:{:.2f}'.format(epoch, time2 - time1, acc, r))

            r_list.append(r)
        
        print(r_list)
    
    df = pd.DataFrame({'AUROC': r_list})
    excel_name = opt.backbone_training + '_' + opt.biomarker + opt.model + str(opt.percentage) + 'multiAUROC' + str(opt.patient_split) + '.csv'
    df.to_csv(excel_name, index=False)
    
    submission_generate(val_loader, model,classifier, opt)

# *   **Model**

In [47]:
### from models.resnet import  SupConResNet,LinearClassifier,LinearClassifier_MultiLabel, SupConResNet_Original, SupConResNet_Original_Headless ###
### Resnet Model ###

# ---------------------------------------------------------------------------------------------------#
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, is_last=False):
        super(BasicBlock, self).__init__()
        self.is_last = is_last
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        preact = out
        out = F.relu(out)
        if self.is_last:
            return out, preact
        else:
            return out


# ---------------------------------------------------------------------------------------------------#
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, is_last=False):
        super(Bottleneck, self).__init__()
        self.is_last = is_last
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        preact = out
        out = F.relu(out)
        if self.is_last:
            return out, preact
        else:
            return out


# ---------------------------------------------------------------------------------------------------#
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, in_channel=1, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.in_planes = 64

        #self.conv1 = nn.Conv2d(in_channel, 64, kernel_size=3, stride=1, padding=1,bias=False)
        self.conv1 = nn.Conv2d(in_channel, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves
        # like an identity. This improves the model by 0.2~0.3% according to:
        # https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for i in range(num_blocks):
            stride = strides[i]
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, layer=100):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)

        out = self.layer2(out)

        out = self.layer3(out)

        out = self.layer4(out)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        return out


# ---------------------------------------------------------------------------------------------------#
def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)


def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)


def resnet50(**kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)


model_dict = {
    'resnet18': [resnet18, 512],
    'resnet34': [resnet34, 512],
    'resnet50': [resnet50, 2048],
    'resnet101': [resnet101, 2048],
}


# ---------------------------------------------------------------------------------------------------#
class SupConResNet_Original(nn.Module):
    def __init__(self, name='resnet50',head='mlp',feat_dim=128,use_head=True):
        super(SupConResNet_Original,self).__init__()
        self.use_head = use_head
        if(name == 'resnet50'):
            self.encoder = torchvision.models.resnet50(zero_init_residual=True)
            self.encoder.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
            self.encoder.fc = nn.Identity()

            if head == 'linear':
                self.head = nn.Linear(2048, feat_dim)
            elif head == 'mlp':
                self.head = nn.Sequential(
                    nn.Linear(2048, 2048),
                    nn.ReLU(inplace=True),
                    nn.Linear(2048, feat_dim)
                )
            else:
                raise NotImplementedError(
                    'head not supported: {}'.format(head))

        else:
            self.encoder = torchvision.models.resnet18(zero_init_residual=True)
            self.encoder.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
            self.encoder.fc = nn.Identity()

            if head == 'linear':
                self.head = nn.Linear(512, feat_dim)
            elif head == 'mlp':
                self.head = nn.Sequential(
                    nn.Linear(512, 512),
                    nn.ReLU(inplace=True),
                    nn.Linear(512, feat_dim)
                )
            else:
                raise NotImplementedError(
                    'head not supported: {}'.format(head))

    def forward(self, x):
        feat = self.encoder(x)

        feat = F.normalize(self.head(feat), dim=1)
        return feat
    
# # ---------------------------------------------------------------------------------------------------#
class LinearClassifier(nn.Module):
    """Linear classifier"""
    def __init__(self, name='resnet50', num_classes=2):
        super(LinearClassifier, self).__init__()
        _, feat_dim = model_dict[name]
        self.fc = nn.Linear(feat_dim, num_classes)

    def forward(self, features):
        return self.fc(features)

# ---------------------------------------------------------------------------------------------------#
# class LinearClassifier(nn.Module):
#     """Linear classifier"""
#     def __init__(self, name='resnet50', num_classes=2):
#         super(LinearClassifier, self).__init__()
#         _, feat_dim = model_dict[name]
        
#         hidden_dim = feat_dim
        
#         self.fc = nn.Sequential(
#             nn.Linear(feat_dim, hidden_dim),   # Additional layer 1
#             nn.ReLU(),
#             nn.Linear(hidden_dim, num_classes) # Output layer
#         )

#     def forward(self, features):
#         return self.fc(features)


# # ---------------------------------------------------------------------------------------------------#
# class LinearClassifier_MultiLabel(nn.Module):
#     """Linear classifier"""
#     def __init__(self, name='resnet50', num_classes=2):
#         super(LinearClassifier_MultiLabel, self).__init__()
#         _, feat_dim = model_dict[name]
# #         self.fc = nn.Linear(feat_dim, feat_dim)
# #         self.sigm = nn.Sigmoid()
#         self.fc = nn.Linear(feat_dim, num_classes)
#         self.sigm = nn.Sigmoid()

#     def forward(self, features):
#         return self.sigm(self.fc(features))

* #   **Data loaders**

In [48]:
### from utils.utils import AverageMeter,warmup_learning_rate ###
### setting the model and the loaders ###

# from datasets.biomarker_multi_fusion import BiomarkerDatasetAttributes_MultiLabel_MultiClass

# ---------------------------------------------------------------------------------------------------#
def set_model(opt):
        
    if(opt.multi == 1 and opt.super!=3):
        model = SupConResNet_Original(name=opt.model)
        criterion = torch.nn.BCEWithLogitsLoss()
        classifier = LinearClassifier(name=opt.model, num_classes=6)
        
    elif(opt.multi == 0):
        model = SupConResNet_Original(name=opt.model)
        criterion = torch.nn.BCEWithLogitsLoss()
#         criterion = torch.nn.CrossEntropyLoss()
        classifier = LinearClassifier(name=opt.model, num_classes=1)
        
    ckpt = torch.load(opt.ckpt, map_location='cpu')
    state_dict = ckpt['model']
    device = opt.device
    if torch.cuda.is_available():
        if opt.parallel == 0:
            model.encoder = torch.nn.DataParallel(model.encoder)
        else:
            new_state_dict = {}
            for k, v in state_dict.items():
                k = k.replace("module.", "")
                new_state_dict[k] = v
            state_dict = new_state_dict
        model = model.to(device)
        classifier = classifier.to(device)
        criterion = criterion.to(device)
        cudnn.benchmark = True

        model.load_state_dict(state_dict)

    return model, classifier, criterion
# ---------------------------------------------------------------------------------------------------#
def set_loader_new(opt):
    
    # construct data loader
    if opt.dataset == 'Prime':
        mean = (.1706)
        std = (.2112)
    else:
        raise ValueError('dataset not supported: {}'.format(opt.dataset))

# ---------------------------------------------------------------------------------------------------#
    normalize = transforms.Normalize(mean=mean, std=std)

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(size=224, scale=(0.2, 1.)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.ToTensor(),
        normalize,
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])
    
# ---------------------------------------------------------------------------------------------------#        
    if opt.dataset =='Prime':
        data_path_train = opt.train_image_path
        csv_path_train = opt.train_csv_path
        csv_path_test = opt.test_csv_path
        data_path_test = opt.test_image_path
          
        if(opt.multi == 1 and opt.super !=3):
            train_dataset = BiomarkerDatasetAttributes_MultiLabel(csv_path_train, data_path_train, transforms=train_transform)
            test_dataset = BiomarkerDatasetAttributes_MultiLabel(csv_path_test, data_path_test, transforms=val_transform)
        else:
            train_dataset = BiomarkerDatasetAttributes(csv_path_train,data_path_train,transforms = train_transform)
            test_dataset = BiomarkerDatasetAttributes_Validate(csv_path_test,data_path_test,transforms = val_transform)
    else:
        raise ValueError(opt.dataset)
# ---------------------------------------------------------------------------------------------------#        
    train_loader = torch.utils.data.DataLoader(
                    train_dataset, batch_size=opt.batch_size, shuffle=True,
                    num_workers=opt.num_workers, pin_memory=True)
# ---------------------------------------------------------------------------------------------------# 
    if(opt.biomarker == 'drt' and opt.patient_split == 1):
        dl = True
    elif(opt.multi == 1):
        dl = True
    else:
        dl=False
# ---------------------------------------------------------------------------------------------------#        
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=True,
        num_workers=0, pin_memory=True,drop_last=dl)

    return train_loader, test_loader

# ---------------------------------------------------------------------------------------------------#        
class TwoCropTransform:
    """Create two crops of the same image"""
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, x):
        return [self.transform(x), self.transform(x)]


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        _, gt = target.topk(maxk, 1, True, True)
        gt = gt.t()
        correct = pred.eq(gt.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
    
def accuracy_single(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


def adjust_learning_rate(args, optimizer, epoch):
    lr = args.learning_rate
    
    if args.cosine:
        eta_min = lr * (args.lr_decay_rate ** 3)
        lr = eta_min + (lr - eta_min) * (
                1 + math.cos(math.pi * epoch / args.epochs)) / 2
    else:
        steps = np.sum(epoch > np.asarray(args.lr_decay_epochs))
        if steps > 0:
            lr = lr * (args.lr_decay_rate ** steps)
            
    print(lr)
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def warmup_learning_rate(args, epoch, batch_id, total_batches, optimizer):
    
    if args.warm and epoch <= args.warm_epochs:
        p = (batch_id + (epoch - 1) * total_batches) / \
            (args.warm_epochs * total_batches)
        lr = args.warmup_from + p * (args.warmup_to - args.warmup_from)

        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


def set_optimizer(opt, model):

    optimizer = optim.SGD(model.parameters(),
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay)


    return optimizer


def save_model(model, optimizer, opt, epoch, save_file):
    
    print('==> Saving...')
    state = {
        'opt': opt,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'epoch': epoch,
    }
    torch.save(state, save_file)
    del state

def accuracy_multilabel(output,target):
    output = output.detach().cpu().numpy()
    target = target.detach().cpu().numpy()
    r = roc_auc_score(target,output,multi_class='ovr')
    print(r)

# *   **Data Readers**

In [49]:
### from datasets.biomarker_multi import BiomarkerDatasetAttributes_MultiLabel ###

# elif(opt.multi == 1 and opt.super !=3): #


class BiomarkerDatasetAttributes_MultiLabel(data.Dataset):
    def __init__(self,df, img_dir, transforms):
        self.img_dir = img_dir
        self.transforms = transforms
        self.df = pd.read_csv(df)
#         self.clinical_dir = pd.read_csv(clinical_dir)
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        temp_path = self.df.iloc[idx,0][1:9]
        if temp_path == "TREX DME":
            path = self.img_dir + "/TREX_DME" + self.df.iloc[idx,0]
        else:
            path = self.img_dir  + self.df.iloc[idx,0]
        image = Image.open(path).convert("L")
        image = np.array(image)
        image = Image.fromarray(image)
        image = self.transforms(image)
        b1 = self.df.iloc[idx,1]
        b2 = self.df.iloc[idx,2]
        b3 = self.df.iloc[idx,3]
        b4 = self.df.iloc[idx, 4]
        b5 = self.df.iloc[idx, 5]
        b6 = self.df.iloc[idx, 6]
        bio_tensor = torch.tensor([b1, b2, b3, b4, b5, b6])
        #assert self.df.iloc[idx,0] == self.clinical_dir.iloc[idx,0]
        
#         c1 = (self.df.iloc[idx,7])
#         c2 = (self.df.iloc[idx,8])
        
#         eye_id = (self.df.iloc[idx,9])
#         patient_id = (self.df.iloc[idx,10])
        
#         return image, bio_tensor, eye_id, c1, c2, patient_id
        return image, bio_tensor

In [50]:
class BiomarkerDatasetAttributes(data.Dataset):
    def __init__(self,df, img_dir, transforms):
        self.img_dir = img_dir
        self.transforms = transforms
        self.df = pd.read_csv(df)
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        temp_path = self.df.iloc[idx,0][1:9]
        if temp_path == "TREX DME":
            path = self.img_dir + "/TREX_DME" + self.df.iloc[idx,0]
        else:
            path = self.img_dir  + self.df.iloc[idx,0]
        image = Image.open(path).convert("L")
        image = np.array(image)
        image = Image.fromarray(image)
        image = self.transforms(image)
        
        ir_hrf = torch.tensor([self.df.iloc[idx,1]])
        partial_vit = torch.tensor([self.df.iloc[idx,2]])
        full_vit = torch.tensor([self.df.iloc[idx,3]])
        vit_deb = torch.tensor([self.df.iloc[idx,4]])
        drt = torch.tensor([self.df.iloc[idx,5]])
        fluid_irf = torch.tensor([self.df.iloc[idx,6]])
        bcva = torch.tensor([self.df.iloc[idx,7]])
        cst = torch.tensor([self.df.iloc[idx,8]])
        eye_id = torch.tensor([self.df.iloc[idx,9]])
        patient = torch.tensor([self.df.iloc[idx,10]])
        
#         ir_hrf = self.df.iloc[idx,1]
#         partial_vit = self.df.iloc[idx,2]
#         full_vit = self.df.iloc[idx,3]
#         vit_deb = self.df.iloc[idx,4]
#         drt = self.df.iloc[idx,5]
#         fluid_irf = self.df.iloc[idx,6]
#         bcva = self.df.iloc[idx,7]
#         cst = self.df.iloc[idx,8]
#         eye_id = self.df.iloc[idx,9]
#         patient = self.df.iloc[idx,10]

        
        
        return image, vit_deb,ir_hrf, full_vit,partial_vit,fluid_irf,drt,eye_id,bcva,cst,patient

In [51]:
class BiomarkerDatasetAttributes_Validate(data.Dataset):
    def __init__(self,df, img_dir, transforms):
        self.img_dir = img_dir
        self.transforms = transforms
        self.df = pd.read_csv(df)
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        temp_path = self.df.iloc[idx,0][1:9]
        if temp_path == "TREX DME":
            path = self.img_dir + "/TREX_DME" + self.df.iloc[idx,0]
        else:
            path = self.img_dir  + self.df.iloc[idx,0]
        image = Image.open(path).convert("L")
        image = np.array(image)
        image = Image.fromarray(image)
        image = self.transforms(image)
        
        
        ir_hrf = torch.tensor([self.df.iloc[idx,1]])
        partial_vit = torch.tensor([self.df.iloc[idx,2]])
        full_vit = torch.tensor([self.df.iloc[idx,3]])
        vit_deb = torch.tensor([self.df.iloc[idx,4]])
        drt = torch.tensor([self.df.iloc[idx,5]])
        fluid_irf = torch.tensor([self.df.iloc[idx,6]])
        
#         ir_hrf = self.df.iloc[idx,1]
#         partial_vit = self.df.iloc[idx,2]
#         full_vit = self.df.iloc[idx,3]
#         vit_deb = self.df.iloc[idx,4]
#         drt = self.df.iloc[idx,5]
#         fluid_irf = self.df.iloc[idx,6]

        return image, vit_deb,ir_hrf, full_vit,partial_vit,fluid_irf,drt

# *   **Arguments Specifying**

In [52]:
args = ('--batch_size', '128', '--patient_split', '1', '--model', 'resnet50', 
        '--biomarker' ,'full_vit', '--backbone_training', 'BCVA', '--dataset' ,'Prime', '--epochs', '10',
        '--device', 'cuda:0' ,'--super', '0', '--multi', '0',
        '--train_csv_path', '/kaggle/input/combined-bio-and-clinical/Modified training biomarkers and clinical.csv' ,
        '--test_csv_path', '/kaggle/input/leaked-ans/Leaked answers.csv' ,
        '--ckpt', '/kaggle/input/last-path/last.pth',
        '--train_image_path' ,'/kaggle/input/olives-vip-cup-2023/olives/2023 IEEE SPS Video and Image Processing (VIP) Cup - Ophthalmic Biomarker Detection/TRAIN/OLIVES',
        '--test_image_path', '/kaggle/input/olives-vip-cup-2023/olives/2023 IEEE SPS Video and Image Processing (VIP) Cup - Ophthalmic Biomarker Detection/TEST/',
        '--val_csv_path','/kaggle/input/olives-vip-cup-2023/olives/2023 IEEE SPS Video and Image Processing (VIP) Cup - Ophthalmic Biomarker Detection/TEST/test_set_submission_template.csv',
        '--val_image_path','/kaggle/input/olives-vip-cup-2023/olives/2023 IEEE SPS Video and Image Processing (VIP) Cup - Ophthalmic Biomarker Detection/TEST/')

# *   **Submission Data reading**

In [53]:
class RECOVERY_TEST(data.Dataset):
    def __init__(self,df, img_dir, transforms):
        self.img_dir = img_dir
        self.transforms = transforms
        self.df = pd.read_csv(df)
       
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        temp_path = self.df.iloc[idx,0][0:9]
        if temp_path == "/TREX DME":
            path = self.img_dir + "/TREX_DME" + self.df.iloc[idx,0]
        else:
            path = self.img_dir  + self.df.iloc[idx,0]
        image = Image.open(path).convert("L")
        image = np.array(image)
        image = Image.fromarray(image)
        image = self.transforms(image)
       
        return image , self.df.iloc[idx,0]

# *   **Submission data loading**

In [54]:
def val_loader_fun(opt):
    mean = (.1706)
    std = (.2112)
    normalize = transforms.Normalize(mean=mean, std=std)
    val_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])
    csv_path_val = opt.val_csv_path
    data_path_val = opt.val_image_path
    val_dataset = RECOVERY_TEST(csv_path_val,data_path_val ,transforms = val_transform)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1, shuffle=False,
        num_workers=0, pin_memory=True,drop_last=False)

    return val_loader

# *   **Submission generating**

In [55]:
def submission_generate(val_loader, model, classifier, opt):
    """validation"""
    model.eval()
    classifier.eval()
    device = opt.device
    out_list = []
    with torch.no_grad():
        for idx, (image, temp_path) in enumerate(val_loader):
            images = image.float().to(device)
            output = classifier(model.encoder(images))
            output = torch.round(torch.sigmoid(output))
            out_list.append((output.squeeze().detach().cpu().numpy() , temp_path))
    
    sub_dir_csv = '/kaggle/input/olives-vip-cup-2023/olives/2023 IEEE SPS Video and Image Processing (VIP) Cup - Ophthalmic Biomarker Detection/TEST/test_set_submission_template.csv'
    sub_dir = pd.read_csv(sub_dir_csv)
    print(len(sub_dir))
    for i in range(0,len(sub_dir)):
        #print(sub_dir.iloc[i,0] , out_list[i][1][0])
        assert sub_dir.iloc[i,0] == out_list[i][1][0]
        
        sub_dir.iloc[i,1] = out_list[i][0][0]
        sub_dir.iloc[i, 2] = out_list[i][0][1]
        sub_dir.iloc[i, 3] = out_list[i][0][2]
        sub_dir.iloc[i, 4] = out_list[i][0][3]
        sub_dir.iloc[i, 5] = out_list[i][0][4]
        sub_dir.iloc[i, 6] = out_list[i][0][5]
        
    print(sub_dir.head())
    sub_dir.to_csv( f'/kaggle/working/baseline_result_{datetime.datetime.now()}.csv',index=False)

# *   **The first function called**

In [56]:
### main linear function ###


# from training_linear.training_one_epoch_ckpt_multi import main_multilabel


try:
    import apex
    from apex import amp, optimizers
except ImportError:
    pass


if __name__ == '__main__':
    opt = parse_option(args)
    # opt.super --> Supervised (1) or Not (0) or (2) Fusion Supervised or (3) BCE Loss for AUROC
    # opt.multi --> MultiLabel (1) or Not(0)
    # 0 --> Ckpt Training
    # multi 1 and super 3 --> BCE Individual Biomarkers

    if(opt.multi == 1 and (opt.super == 0 or opt.super ==8)):
        main_multilabel()

    else:
        main()

0.05
Train: [1][10/73]	BT 0.330 (0.609)	DT 0.015 (0.288)	loss 0.662 (0.682)	Acc@1 39.062 (42.656)
Train: [1][20/73]	BT 0.336 (0.572)	DT 0.023 (0.253)	loss 0.636 (0.668)	Acc@1 46.094 (44.453)
Train: [1][30/73]	BT 0.331 (0.556)	DT 0.018 (0.239)	loss 0.614 (0.649)	Acc@1 43.750 (43.672)
Train: [1][40/73]	BT 0.332 (0.551)	DT 0.017 (0.234)	loss 0.583 (0.630)	Acc@1 48.438 (43.184)
Train: [1][50/73]	BT 0.340 (0.547)	DT 0.021 (0.230)	loss 0.552 (0.615)	Acc@1 43.750 (43.078)
Train: [1][60/73]	BT 0.343 (0.556)	DT 0.026 (0.239)	loss 0.520 (0.604)	Acc@1 47.656 (43.424)
Train: [1][70/73]	BT 0.357 (0.552)	DT 0.042 (0.235)	loss 0.533 (0.595)	Acc@1 46.875 (43.806)
output [[0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]]
labels [[0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]]
Train epoch 1, total time 40.02, accuracy:43.91, f1_score:0.66
0.05
Train: [2][10/73]	BT 0.335 (0.591)	DT 0.015 (0.268)	loss 0.526 (0.522)	Acc@1 42.969 (44.531)
Train: [2][20/73]	BT 0.351 (0.563)	DT 0.030 (0.