In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import torch.nn as nn
import torch.nn.functional as F
from pprint import pprint

2023-01-24 11:20:56.099158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-24 11:20:56.192278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jrozycki/miniconda3/envs/DensenetUG/lib/python3.10/site-packages/cv2/../../lib64:
2023-01-24 11:20:56.192290: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-01-24 11:20:56.636353: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'l

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict

In [3]:
class DenseLayer(nn.Sequential):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
        super(DenseLayer, self).__init__()
        self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
        self.add_module('relu1', nn.ReLU(inplace=True)),
        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
                        growth_rate, kernel_size=1, stride=1, bias=False)),
        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
        self.add_module('relu2', nn.ReLU(inplace=True)),
        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
                        kernel_size=3, stride=1, padding=1, bias=False)),
        self.drop_rate = drop_rate

    def forward(self, x):
        new_features = super(DenseLayer, self).forward(x)
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
        return torch.cat([x, new_features], 1)

In [4]:
class Transision(nn.Sequential):
    def __init__(self, num_input_features, num_output_features):
        super(Transision, self).__init__()
        self.add_module('norm', nn.BatchNorm2d(num_input_features)),
        self.add_module('relu', nn.ReLU(inplace=True)),
        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
                        kernel_size=1, stride=1, bias=False)),
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))

In [5]:
class DenseBlock(nn.Sequential):
    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
        super(DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
            self.add_module('denselayer%d' % (i + 1), layer)

In [6]:
class DenseNet(nn.Module):
    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, num_classes=1):
        super(DenseNet, self).__init__()
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_features)),
            ('relu0', nn.ReLU(inplace=True)),
            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]))
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = Transision(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        self.features.add_module('norm5', nn.BatchNorm2d(num_features))
        self.classifier = nn.Linear(num_features, num_classes)

        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal(m.weight.data)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()

    def forward(self, x):
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
        out = torch.sigmoid(self.classifier(out))
        return out

In [7]:
def densenet121(pretrained=False, **kwargs):
    model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['densenet121']))
    return model

In [8]:
import time
import copy
import pandas as pd
import torch
from torch.autograd import Variable
from train import train_model, get_metrics
from pipeline import get_study_level_data, get_dataloaders
from DenseNetData import GetDatasetSize
import deeplake




In [9]:
train_path = "./Data/train"
val_path = "./Data/valid"
test_path = "./Data/test"
    
train_set = GetDatasetSize(train_path)
val_set = GetDatasetSize(val_path)
test_set = GetDatasetSize(test_path)
print(train_set,"\n\n",val_set,"\n\n",test_set)

{'Cancer': 465, 'normal': 202} 

 {'Cancer': 59, 'normal': 13} 

 {'Cancer': 261, 'normal': 54}


In [10]:
labels = ['Cancer', 'Normal']
train_list = list(train_set.values())
val_list = list(val_set.values())
test_list = list(test_set.values())

In [11]:
tai = {'train': train_list[0], 'valid': val_list[0], 'test': test_list[0]}
tni = {'train': train_list[1], 'valid': val_list[1], 'test': test_list[1]}
data_cat = ['train', 'valid', 'test']

In [12]:
def n_p(x):
    '''convert numpy float to Variable tensor float'''    
    return Variable(torch.FloatTensor([x]), requires_grad=False)

In [13]:
Wt1 = {x: n_p(tni[x] / (tni[x] + tai[x])) for x in data_cat}
Wt0 = {x: n_p(tai[x] / (tni[x] + tai[x])) for x in data_cat}

In [14]:
print('tai:', tai)
print('tni:', tni, '\n')
print('Wt0 train:', Wt0['train'])
print('Wt0 valid:', Wt0['valid'])
print('Wt1 train:', Wt1['train'])
print('Wt1 valid:', Wt1['valid'])

tai: {'train': 465, 'valid': 59, 'test': 261}
tni: {'train': 202, 'valid': 13, 'test': 54} 

Wt0 train: tensor([0.6972])
Wt0 valid: tensor([0.8194])
Wt1 train: tensor([0.3028])
Wt1 valid: tensor([0.1806])


In [15]:
study_data = get_study_level_data("normal")

100%|██████████| 4/4 [00:00<00:00,  9.41it/s]
100%|██████████| 4/4 [00:00<00:00, 91.48it/s]
100%|██████████| 4/4 [00:00<00:00, 20.76it/s]


In [16]:
# print(study_data)

dataloaders = get_dataloaders(study_data)
dataset_sizes = {x: len(study_data[x]) for x in data_cat}
dataset_sizes

{'train': 667, 'valid': 72, 'test': 315}

In [17]:
class Loss(torch.nn.modules.Module):
    def __init__(self, Wt1, Wt0):
        super(Loss, self).__init__()
        self.Wt1 = Wt1
        self.Wt0 = Wt0
        
    def forward(self, output, target, data_cat):
        loss = self.Wt1[data_cat] * (target * torch.log(output)) + self.Wt0[data_cat] * ((1 - target) * torch.log(1 - output))
        return -torch.mean(loss)

In [18]:
model = densenet121(pretrained=False)
criteria = Loss(Wt1, Wt0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

  nn.init.kaiming_normal(m.weight.data)


In [19]:
model = train_model(model, criteria, optimizer, dataloaders, exp_lr_scheduler, dataset_sizes, num_epochs=3)

Train batches: 334
Valid batches: 36 

Epoch 1/3
----------
train Loss: 0.1407 Acc: 0.5892
Confusion Meter:
 [[0.6089109  0.3910891 ]
 [0.41935483 0.58064514]]
valid Loss: 0.2782 Acc: 0.8194
Confusion Meter:
 [[0. 1.]
 [0. 1.]]
Time elapsed: 1m 54s

Epoch 2/3
----------
train Loss: 0.1170 Acc: 0.7241
Confusion Meter:
 [[0.8366337  0.16336633]
 [0.32473117 0.6752688 ]]
valid Loss: 0.1522 Acc: 0.1111
Confusion Meter:
 [[0.15384616 0.84615386]
 [0.89830506 0.10169491]]
Time elapsed: 3m 49s

Epoch 3/3
----------
train Loss: 0.0998 Acc: 0.7436
Confusion Meter:
 [[0.9108911  0.08910891]
 [0.32903227 0.67096776]]
valid Loss: nan Acc: 0.8194
Confusion Meter:
 [[0. 1.]
 [0. 1.]]
Time elapsed: 5m 48s

Training complete in 5m 48s
Best valid Acc: 0.819444
