In [8]:
# TODO: add crop for log_sigma and for abs diff for FVC
# TODO: add normalization for data
# TODO: ...

In [1]:
%config Completer.use_jedi = False

import os
import platform
from collections import namedtuple
import time

# import tqdm
import pandas as pd
import numpy as np
import sparse

from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.nn.modules.loss import _Loss
from torch.optim.lr_scheduler import _LRScheduler
# from torchvision import transforms
# from torchsummary import summary
# from efficientnet_pytorch_3d import EfficientNet3D
from my_efficientnet_pytorch_3d import EfficientNet3D

# from utils import *


########################

RUNNING_IN_KAGGLE = 'linux' in platform.platform().lower()
IMAGE_PATH = "../input/osic-pulmonary-fibrosis-progression/" if RUNNING_IN_KAGGLE else 'data/'
PROCESSED_PATH = 'FIX IT!' if RUNNING_IN_KAGGLE else 'data/processed-data/'  # TODO: fix this line

dtype = torch.float32
USE_GPU = True
if USE_GPU and torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'
device = torch.device(device)

In [2]:
class CTDataset(Dataset):
    _ReturnValue = namedtuple('ReturnValue', ['weeks', 'fvcs', 'features', 'masks', 'images'])

    def __init__(
            self, root, csv_path, train=True, test_size=0.25, random_state=42):
        """
        :param dataset:

        :param root:
        :param train:
        :param train_test_split:
        :param random_state:
        """
        assert test_size is not None

        self.root = root
        self.train = train
        self.csv_path = csv_path
        self.test_size = test_size
        self.random_state = random_state

        if not os.path.exists(self.root):
            raise ValueError('Data is missing')

        self._patients = list(sorted(os.listdir(self.root)))

        if self.test_size == 0:
            self._train_patients, self._test_patients = self._patients, []
        else:
            self._train_patients, self._test_patients = train_test_split(
                self._patients, test_size=self.test_size, random_state=random_state
            )

        self._table_features = dict()
        table_data = pd.read_csv(self.csv_path)
        for patient in self._patients:
            patient_data = table_data[table_data.Patient == patient]

            all_weeks = patient_data.Weeks.tolist()
            all_fvcs = patient_data.FVC.tolist()

            all_weeks, all_fvcs = zip(*sorted(zip(all_weeks, all_fvcs), key=lambda x: x[0]))

            age = sorted(zip(*np.unique(patient_data.Age, return_counts=True)), key=lambda x: x[1])[-1][0]
            sex = sorted(zip(*np.unique(patient_data.Sex, return_counts=True)), key=lambda x: x[1])[-1][0]
            smoking_status = sorted(zip(*np.unique(patient_data.SmokingStatus, return_counts=True)), key=lambda x: x[1])[-1][0]

            sex = [0, 1] if sex == 'Female' else [1, 0]
            smoking_status = (
                [1, 0, 0] if smoking_status == 'Ex-smoker' else
                [0, 1, 0] if smoking_status == 'Never smoked' else
                [0, 0, 1] if smoking_status == 'Currently smokes' else
                [0, 0, 0]
            )
            self._table_features[patient] = (
                all_weeks, all_fvcs, [age] + sex + smoking_status
            )

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        patient = self._train_patients[index] if self.train else self._test_patients[index]
        base_path = os.path.join(self.root, patient)

        meta = np.load(os.path.join(base_path, 'meta.npy'), allow_pickle=True).tolist()
        masks = sparse.load_npz(os.path.join(base_path, 'masks.npz'))
        images = np.load(os.path.join(base_path, 'images.npy'))

        meta_processed = dict()
        for key, values in meta.items():
            if key in {'SliceLocation', 'InstanceNumber'}:
                continue
            else:
                unique_values, values_cnt = np.unique(values, return_counts=True, axis=0)
                most_frequent = sorted(zip(unique_values, values_cnt), key=lambda x: x[1])[-1][0]
                most_frequent = np.array(most_frequent).reshape(-1)
                if key in {
                    'SliceThickness', 'TableHeight', 'WindowCenter', 'WindowWidth'
                }:
                    meta_processed[key] = most_frequent[0]
                elif key == 'PixelSpacing':
                    if len(most_frequent) == 1:
                        meta_processed['PixelSpacingX'], meta_processed['PixelSpacingY'] = (
                            most_frequent[0], most_frequent[0]
                        )
                    else:
                        meta_processed['PixelSpacingX'], meta_processed['PixelSpacingY'] = (
                            most_frequent[0], most_frequent[1]
                        )
                elif key == 'PatientPosition':
                    pass
                elif key == 'PositionReferenceIndicator':
                    pass

        all_weeks, all_fvcs, features = self._table_features[patient]
        features = [value for key, value in meta_processed.items()] + features

        return CTDataset._ReturnValue(weeks=all_weeks, fvcs=all_fvcs, features=features, masks=masks, images=images)

    def __len__(self):
        return len(self._train_patients if self.train else self._test_patients)

    def __repr__(self):
        fmt_str = 'OSIC Pulmonary Fibrosis Progression Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        tmp = 'train' if self.train is True else 'test'
        fmt_str += '    Split: {}\n'.format(tmp)
        fmt_str += '    Root Location: {}\n'.format(self.root)
        return fmt_str


class LaplaceLoss(_Loss):
    def forward(self, y_true, preds, log_sigma):
        abs_diff = (y_true - preds).abs()
#         abs_diff.clamp_max_(1_000)
        log_sigma.clamp_(-5, 5)  # -np.log(70), np.log(70)
#         log_sigma.clamp_min_(-5)
        losses = np.sqrt(2) * abs_diff / log_sigma.exp() + log_sigma + np.log(2) / 2
        return losses.mean()


class SqueezeLayer(nn.Module):
    def forward(self, x):
        return x.squeeze()


class FeatureExtractor(nn.Module):
    def __init__(self, net):
        super().__init__()
        self.net = net

    def forward(self, x):
        return self.net.extract_features(x.unsqueeze(0).unsqueeze(0))


class OSICNet(nn.Module):
    def __init__(self, dtype, device, efficient_net_model_number, hidden_size, dropout_rate):  # , output_size
        super().__init__()

        self.dtype = dtype
        self.device = device

        self.CT_features_extractor = nn.Sequential(
            FeatureExtractor(
                EfficientNet3D.from_name(
                    f'efficientnet-b{efficient_net_model_number}', override_params={'num_classes': 1}, in_channels=1
                )
            ),
            nn.AdaptiveAvgPool3d(1),
            SqueezeLayer()
        )

        self.predictor = nn.Sequential(
            nn.Linear(1280, hidden_size),  # 1294
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, 5)  # output_size
        )
        
        self._initialize_weights()

        self.CT_features_extractor.to(self.device)
        self.predictor.to(self.device)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv3d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.kernel_size[2] * m.out_channels
                m.weight.data.normal_(0, np.sqrt(2. / n))
            elif isinstance(m, torch.nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

        
#     def forward(self, data):
#         mean_dataset, std_dataset = -971.4692260919278, 117.84143467421829
#         lungs = -1000 * (1.0 - data.masks) + data.masks * data.images
#         lungs = (lungs - mean_dataset) / std_dataset
#         lungs = torch.tensor(lungs, dtype=self.dtype, device=self.device)
#         lungs_features = self.CT_features_extractor(lungs)

#         data_weeks = torch.tensor(data.weeks, dtype=self.dtype)
#         weeks = torch.empty(len(data.weeks), 4, dtype=self.dtype)
#         weeks[:, 3] = 1
#         weeks[:, 2] = data_weeks
#         weeks[:, 1] = data_weeks ** 2
#         weeks[:, 0] = data_weeks ** 3

#         agg_loss = 0
#         for week, FVC in zip(data.weeks, data.fvcs):
#             table_features = torch.tensor(np.r_[week, FVC, data.features], dtype=self.dtype, device=self.device)
#             X = lungs_features  # torch.cat([lungs_features, table_features])

#             pred_numbers = self.predictor(X).cpu()
#             coefs = pred_numbers[:4]
#             log_sigma = pred_numbers[4]

#             FVC_preds = (weeks * coefs).sum(dim=1)
#             FVC_true = torch.tensor(data.fvcs, dtype=self.dtype)

#             agg_loss += LaplaceLoss()(FVC_true, FVC_preds, log_sigma)

#         return agg_loss / len(data.weeks)

    def forward(self, data):
        mean_dataset, std_dataset = -971.4692260919278, 117.84143467421829
        lungs = -1000 * (1.0 - data.masks) + data.masks * data.images
        lungs = (lungs - mean_dataset) / std_dataset
        lungs = torch.tensor(lungs, dtype=self.dtype, device=self.device)
        lungs_features = self.CT_features_extractor(lungs)

#         data_weeks = torch.tensor(data.weeks, dtype=self.dtype)
#         weeks = torch.empty(len(data.weeks), 4, dtype=self.dtype)
#         weeks[:, 3] = 1
#         weeks[:, 2] = data_weeks
#         weeks[:, 1] = data_weeks ** 2
#         weeks[:, 0] = data_weeks ** 3

#         agg_loss = 0
        all_preds = []
        for week, FVC in zip(data.weeks, data.fvcs):
            table_features = torch.tensor(np.r_[week, FVC, data.features], dtype=self.dtype, device=self.device)
            X = lungs_features  # torch.cat([lungs_features, table_features])

            pred_numbers = self.predictor(X).cpu()
            all_preds.append(pred_numbers)
#             coefs = pred_numbers[:4]
#             log_sigma = pred_numbers[4]

#             FVC_preds = (weeks * coefs).sum(dim=1)
#             FVC_true = torch.tensor(data.fvcs, dtype=self.dtype)
            
#             agg_loss += LaplaceLoss()(FVC_true, FVC_preds, log_sigma)

#         return agg_loss / len(data.weeks)
        return all_preds


class LinearDecayLR(_LRScheduler):
    def __init__(self, optimizer, start_epoch, stop_epoch, start_lr, stop_lr, last_epoch=-1):
        self.optimizer = optimizer

        self.start_epoch = start_epoch
        self.stop_epoch = stop_epoch

        self.start_lr = start_lr
        self.stop_lr = stop_lr

        self.last_epoch = last_epoch

        super().__init__(optimizer, last_epoch)

    def get_lr(self) -> list:
        if self.last_epoch < self.start_epoch:
            new_lr = self.start_lr
        elif self.last_epoch > self.stop_epoch:
            new_lr = self.stop_lr
        else:
            new_lr = self.start_lr + (
                (self.stop_lr - self.start_lr) *
                (self.last_epoch - self.start_epoch) /
                (self.stop_epoch - self.start_epoch)
            )
        return [new_lr for _ in self.optimizer.param_groups]

In [16]:
# np.sqrt(2) * 1000 / np.exp(5) + 5 + np.log(2) / 2

In [17]:
# x = torch.tensor([1., 2., 3.]).requires_grad_(True)
# l1 = (x * 2).sum()
# l2 = (x * 3).sum()
# l3 = (x * 5).sum()
# loss = l1 + l2 + l3
# loss.backward()
# x.grad

In [None]:
train_dataset = CTDataset(
    f'{PROCESSED_PATH}/train',
    f'{IMAGE_PATH}/train.csv',
    train=True, test_size=0.25, random_state=42
)

test_dataset = CTDataset(
    f'{PROCESSED_PATH}/train',
    f'{IMAGE_PATH}/train.csv',
    train=False, test_size=0.25, random_state=42
)

In [8]:
# model = OSICNet(dtype=dtype, device=device, efficient_net_model_number=0, hidden_size=512, dropout_rate=0.5)
# optimizer = optim.SGD(model.parameters(), lr=1e-5, momentum=0.9)  # , weight_decay=5e-4)

### TMP ###
# optimizer = optim.SGD(model.parameters(), lr=1e-9, momentum=0, weight_decay=0)  # 0.9, 5e-4)
# lr_scheduler = LinearDecayLR(optimizer, )

In [None]:
MAX_EPOCHS = 20


start_time = time.time()
for epoch in range(MAX_EPOCHS):
    for cur_iter, data in enumerate(train_dataset):  # tqdm(, desc='Iteration over dataset'):
        if cur_iter == 1:
            break
        cur_start_time = time.time()
        
        optimizer.zero_grad()
#         loss = model(data)


        data_weeks = torch.tensor(data.weeks, dtype=dtype)
        weeks = torch.empty(len(data.weeks), 4, dtype=dtype)
        weeks[:, 0] = data_weeks ** 3
        weeks[:, 1] = data_weeks ** 2
        weeks[:, 2] = data_weeks
        weeks[:, 3] = 1

        all_preds = model(data)

#         log_sigmas = []
#         ALL_PREDS = []
        agg_loss = 0
        for week, FVC, preds in zip(data.weeks, data.fvcs, all_preds):
            coefs = preds[:4]
            log_sigma = preds[4]
#             log_sigmas.append(log_sigma.item())
#             print(log_sigma.item())

            FVC_preds = (weeks * coefs).sum(dim=1)
            FVC_true = torch.tensor(data.fvcs, dtype=dtype)

            agg_loss += LaplaceLoss()(FVC_true, FVC_preds, log_sigma)
        loss = agg_loss / len(data.weeks)

        loss.backward()
        optimizer.step()
        
        cur_end_time = time.time()

        print(
            f'Epoch {epoch + 1:3d}, '
            f'iter {cur_iter + 1:4d}, '
            f'loss {loss.item():12.6f}, '
            f'cur iter time {cur_end_time - cur_start_time:6.1f} sec, '
            f'elapsed time {cur_end_time - start_time:6.1f} sec, '
        )

Epoch   1, iter    1, loss    46.706024, cur iter time    8.5 sec, elapsed time    8.6 sec, 
Epoch   2, iter    1, loss    35.064163, cur iter time    8.4 sec, elapsed time   17.1 sec, 
Epoch   3, iter    1, loss    42.277420, cur iter time    8.6 sec, elapsed time   25.8 sec, 
Epoch   4, iter    1, loss    56.624676, cur iter time    8.4 sec, elapsed time   34.5 sec, 
Epoch   5, iter    1, loss    52.560135, cur iter time    8.4 sec, elapsed time   43.0 sec, 
Epoch   6, iter    1, loss    35.330425, cur iter time    8.4 sec, elapsed time   51.6 sec, 
Epoch   7, iter    1, loss    23.436638, cur iter time    8.5 sec, elapsed time   60.3 sec, 
Epoch   8, iter    1, loss    54.687748, cur iter time    8.7 sec, elapsed time   69.2 sec, 
Epoch   9, iter    1, loss    42.766113, cur iter time    8.7 sec, elapsed time   78.0 sec, 
Epoch  10, iter    1, loss    30.697641, cur iter time    8.7 sec, elapsed time   86.9 sec, 
Epoch  11, iter    1, loss    29.377777, cur iter time    8.6 sec, ela

In [24]:
from torchsummary import summary

In [26]:
# summary(model.CT_features_extractor[0].net)

In [None]:
for name, p in model.named_parameters():
    print(f'{name[20:]:50} : {p.data.min().item():15.3e}, {p.data.max().item():15.3e}')

In [13]:
all_preds = model(train_dataset[2])

In [14]:
all_preds

[tensor([-0.0239,  0.0427, -0.0162,  0.0765, -0.0407], grad_fn=<CopyBackwards>),
 tensor([-0.0596,  0.0999,  0.0310,  0.0311, -0.0328], grad_fn=<CopyBackwards>),
 tensor([-0.0534,  0.0610,  0.0263,  0.1096, -0.0509], grad_fn=<CopyBackwards>),
 tensor([-0.0468, -0.0004,  0.0322,  0.0902, -0.0749], grad_fn=<CopyBackwards>),
 tensor([-0.0137,  0.0090,  0.0003,  0.0589, -0.0920], grad_fn=<CopyBackwards>),
 tensor([-0.0302,  0.0220,  0.0029,  0.1146, -0.0387], grad_fn=<CopyBackwards>),
 tensor([-0.0218,  0.0516, -0.0411,  0.0951,  0.0158], grad_fn=<CopyBackwards>),
 tensor([-0.0205,  0.0779, -0.0219,  0.1012, -0.0544], grad_fn=<CopyBackwards>),
 tensor([ 0.0394,  0.0843, -0.0223,  0.0686, -0.0343], grad_fn=<CopyBackwards>)]

In [16]:
data = train_dataset[2]
data_weeks = torch.tensor(data.weeks, dtype=dtype)
weeks = torch.empty(len(data.weeks), 4, dtype=dtype)
weeks[:, 0] = data_weeks ** 3
weeks[:, 1] = data_weeks ** 2
weeks[:, 2] = data_weeks
weeks[:, 3] = 1

# all_preds = model(data)

agg_loss = 0
for week, FVC, preds in zip(data.weeks, data.fvcs, all_preds):
    coefs = preds[:4]
    log_sigma = preds[4]

    FVC_preds = (weeks * coefs).sum(dim=1)
    FVC_true = torch.tensor(data.fvcs, dtype=dtype)

    agg_loss += LaplaceLoss()(FVC_true, FVC_preds, log_sigma)
loss = agg_loss / len(data.weeks)

In [17]:
loss

tensor(4912.0527, grad_fn=<DivBackward0>)

In [18]:
loss.backward()

In [25]:
for name, p in model.named_parameters():
    if p.requires_grad:
        if p.grad is None:
            print(f'Has grad but it is None: {name[20:]:50}')
        else:
            print(f'{name[20:]:50} : {p.grad.data.cpu().min().item():15.3e}, {p.grad.data.cpu().max().item():15.3e}')
    else:
        print(f'No grad: {name[20:]:50}')

RuntimeError: CUDA error: unspecified launch failure

In [12]:
for i in range(len(train_dataset)):
    print(i, train_dataset[i].images.shape)

In [4]:
dataset_all = CTDataset(
    f'{PROCESSED_PATH}/train',
    f'{IMAGE_PATH}/train.csv',
    train=True, test_size=0.0, random_state=42
)

images = [-1000 * (1.0 - dataset_all[i].masks) + dataset_all[i].masks * dataset_all[i].images
          for i in range(len(dataset_all))]

sum_image = 0
sum_sq_image = 0
for image in images:
    sum_image += image.sum()
    sum_sq_image += (image ** 2).sum()

N = np.prod((176., 192., 256., 256.))

mean = sum_image / N

mean

var = sum_sq_image / N + mean ** 2 - 2 * mean * sum_image / N

std = var ** 0.5

mean, std