In [105]:
from torch.utils.data import Dataset
import pandas as pd
from dataset import FeaturesDataset
import numpy as np
import os

class FeaturesDataset(Dataset):
    def __init__(self, dataset_dir: str, normalize: bool = True):
        super(FeaturesDataset, self).__init__()
        X_df = pd.read_csv(os.path.join(dataset_dir, 'dfu_features_dataset_selected.csv'), index_col=0)
        y_df = pd.read_csv(os.path.join(dataset_dir, 'dfu_labels_dataset.csv'), index_col=0)
        
        self.features = X_df.columns.to_list()
        self.X = X_df.to_numpy().astype(np.float32)
        self.y = y_df.to_numpy().ravel()

        if normalize:
            self.X = (self.X - self.X.mean(axis=0)) / self.X.std(axis=0)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = FeaturesDataset(dataset_dir='data/dataset', normalize=True)

# Concrete Dropout

In [198]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np

class LinearCD(nn.Linear):
    r'''
        Linear layer with Concrete Dropout regularization.

        Code strongly inspired by: 
            https://github.com/danielkelshaw/ConcreteDropout/blob/master/condrop/concrete_dropout.py

        Note the relationship between the weight regularizer (w_reg) and dropout regularization (drop_reg):
        
            w_reg/drop_reg = (l^2)/2 
        
        with prior lengthscale l (number of in_features). 
        
        Note also that the factor of two should be ignored for cross-entropy loss, and used only for the
        Euclidean loss.
    '''
    def __init__(self, in_features, out_features, bias=True, threshold=.1, init_min=.5, init_max=.5):
        super(LinearCD, self).__init__(in_features, out_features, bias)        
        logit_init_min = np.log(init_min) - np.log(1. - init_min)
        logit_init_max = np.log(init_max) - np.log(1. - init_max)
        
        # The probability of deactive a neuron.
        self.logit_p = nn.Parameter(torch.rand(in_features) * (logit_init_max - logit_init_min) + logit_init_min)
        self.logit_threshold = np.log(threshold) - np.log(1. - threshold)

    def forward(self, x):
        if self.training:
            return F.linear(self.concrete_bernoulli(x), self.weight, self.bias)

        return F.linear(x, self.weight * (self.logit_p < self.logit_threshold).float(), self.bias) 

    def concrete_bernoulli(self, x):
        eps = 1e-8
        unif_noise = torch.cuda.FloatTensor(*x.size()).uniform_() if self.logit_p.is_cuda else torch.FloatTensor(*x.size()).uniform_()

        p = torch.sigmoid(self.logit_p)
        tmp = .1

        drop_prob = (torch.log(p + eps) - torch.log((1-p) + eps) + torch.log(unif_noise + eps)
        - torch.log((1. - unif_noise) + eps))
        drop_prob = torch.sigmoid(drop_prob / tmp)

        random_tensor = 1 - drop_prob
        retain_prob = 1 - p # rescale factor typical for dropout

        return torch.mul(x, random_tensor) #/ retain_prob

    def reg(self):
        tmp = .1
        eps = 1e-6
        p = torch.sigmoid(self.logit_p)
        bernoulli = (torch.log(p + eps) - torch.log((1-p) + eps))
        reg = 1 - torch.sigmoid(bernoulli / tmp)
        return torch.sum(reg)

In [238]:
import torch
from torch import nn

class Model(nn.Module):
    def __init__(self,in_features: int, nb_features: int, threshold: float = .1):
        super(Model, self).__init__()

        if threshold < 0. or threshold > 1.:
            raise ValueError('threshold must be between 0 and 1')

        self.model = nn.Sequential(
            LinearCD(in_features, nb_features, bias=False, threshold=threshold),
            # nn.SiLU(),
            nn.Linear(nb_features, nb_features//2),
            nn.Dropout(p=0.2),
            nn.ReLU(),
            nn.Linear(nb_features//2, nb_features//4),
            nn.Dropout(p=0.2),
            nn.ReLU(),
            nn.Linear(nb_features//4, 2)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

In [262]:
from tqdm import tqdm
from torch.utils.data import DataLoader

def train(model, dataset, batch_size = 128, n_epochs=10):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    criterion = nn.CrossEntropyLoss()

    # weighted sampler
    samples = dataset.dataset.y[dataset.indices]
    class_weight = [1/(samples == 0).sum(), 1/(samples == 1).sum()]
    samples_weight = np.zeros(len(dataset))
    samples_weight[samples == 0] = class_weight[0]
    samples_weight[samples == 1] = class_weight[1]
    
    sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(dataset))
    loader = DataLoader(dataset, batch_size=batch_size, sampler=sampler)
    # loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    reg = 1e-6

    epoch_iterator = tqdm(
            range(n_epochs),
            leave=True,
            unit="epoch",
            postfix={"tls": "%.4f" % 1},
        )

    modules = []
    for i in model.modules():
        if isinstance(i, LinearCD):
            modules.append(i)

    for _ in epoch_iterator:
        reg = min(reg + 1e-5, 1e-2)
        for idx, (inputs, targets) in enumerate(loader):
            optimizer.zero_grad()

            inputs = inputs.to(device)
            targets = targets.to(device)
            pred = model(inputs)

            reg_value = 0
            for module in modules:
                reg_value += module.reg()

            loss = criterion(pred, targets) + reg*reg_value
            loss.backward()
            optimizer.step()

            if idx % 10 == 0:
                epoch_iterator.set_postfix(tls="%.4f" % loss.item())
    print(reg)
    return model

# Training

In [263]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

n_features = len(dataset.features)

k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)

features_importance = []
model_accuracy = []

for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    train_set = torch.utils.data.Subset(dataset, train_ids)
    test_set = torch.utils.data.Subset(dataset, test_ids)

    model = Model(n_features, 256, threshold=.2)
    model = train(model, train_set, batch_size=32, n_epochs=500)

    model.eval()
    y_pred = []
    y_true = []

    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)
    for inputs, targets in test_loader:
        pred = model(inputs.cuda())
        pred = pred.argmax(dim=1)
        y_pred.append(pred.cpu())
        y_true.append(targets)

    y_pred = torch.cat(y_pred)
    y_true = torch.cat(y_true)
    model_accuracy.append(accuracy_score(y_true, y_pred))
    features_importance.append(torch.sigmoid(model.model[0].logit_p).cpu().detach().numpy())
    print(model_accuracy[-1])


100%|██████████| 500/500 [00:15<00:00, 32.36epoch/s, tls=0.4971] 


0.005000999999999965
0.8157894736842105


100%|██████████| 500/500 [00:16<00:00, 30.04epoch/s, tls=0.3955] 


0.005000999999999965
0.868421052631579


100%|██████████| 500/500 [00:14<00:00, 34.70epoch/s, tls=0.5845] 


0.005000999999999965
0.868421052631579


100%|██████████| 500/500 [00:16<00:00, 29.68epoch/s, tls=0.4195] 


0.005000999999999965
0.8421052631578947


100%|██████████| 500/500 [00:15<00:00, 32.02epoch/s, tls=0.4839]

0.005000999999999965
0.8918918918918919





In [264]:
# Mean accuracy
print('Mean accuracy: {}'.format(np.mean(model_accuracy)))

Mean accuracy: 0.857325746799431


In [265]:
features_importance_ = torch.tensor(features_importance).mean(axis=0)
features_score, index = features_importance_.sort()
features_names = np.array(dataset.features)[index.cpu()]

features_importance_df = pd.DataFrame(features_importance_[index], index=features_names, columns=['Importance'])
features_importance_df.index.name = 'Features'

features_importance_df.to_csv('data/features_importance/concrete_dropout.csv')

features_importance_df

Unnamed: 0_level_0,Importance
Features,Unnamed: 1_level_1
R_LPA_min,0.170767
L_MPA_min,0.186285
R_LCA_kurtosis,0.195982
Foot_ETD,0.202991
LCA_ETD,0.210169
...,...
R_LCA_NRT_C1,0.730680
R_LPA_NRT_C1,0.734289
R_MCA_NRT_C5,0.735871
L_LCA_NRT_C4,0.738042


In [226]:
features_score

tensor([0.0847, 0.1017, 0.1022, 0.1051, 0.1123, 0.1146, 0.1263, 0.1267, 0.1846,
        0.1909, 0.2108, 0.2165, 0.2478, 0.2855, 0.2923, 0.2928, 0.2941, 0.3037,
        0.3215, 0.3799, 0.4331, 0.4369, 0.4385, 0.4409, 0.4453, 0.4453, 0.4460,
        0.4611, 0.4654, 0.4659, 0.4789, 0.4797, 0.4970, 0.5022, 0.5225, 0.5239,
        0.5382, 0.5589, 0.5632, 0.5657, 0.5786, 0.5879, 0.5879, 0.5930, 0.5967,
        0.6033, 0.6083, 0.6136, 0.6141, 0.6180, 0.6206, 0.6209, 0.6213, 0.6264,
        0.6300, 0.6308, 0.6333, 0.6337, 0.6349, 0.6353, 0.6376, 0.6388, 0.6423,
        0.6437, 0.6503, 0.6540, 0.6541, 0.6548, 0.6601, 0.6609, 0.6668, 0.6722,
        0.6736, 0.6766, 0.6773, 0.6806, 0.6831, 0.6836, 0.6846, 0.6890, 0.6890,
        0.6897, 0.6913, 0.6919, 0.6948, 0.6953, 0.6994, 0.7031, 0.7032, 0.7057,
        0.7068, 0.7071, 0.7076, 0.7111, 0.7137, 0.7147, 0.7185, 0.7189, 0.7221,
        0.7226, 0.7253, 0.7287, 0.7300, 0.7322, 0.7332, 0.7348, 0.7379, 0.7406,
        0.7406, 0.7470, 0.7491, 0.7540, 

In [206]:
features_importance_ = np.array(features_importance).mean(axis=0)

features_score, index = torch.tensor(features_importance_).sort()

features_names = dataset.features

print('Features:{}'.format(np.array(features_names)[index]))
print('Features Score:{}'.format(1-features_score))

Features:['L_LPA_std' 'R_LPA_min' 'Foot_ETD' 'L_MPA_min' 'L_MPA_std' 'LPA_ETD'
 'L_kurtosis' 'R_MPA_NRT_C2' 'L_NRT_C5' 'MPA_ETD' 'L_MCA_std'
 'L_LPA_NRT_C5' 'R_LPA_NRT_C7' 'R_LPA_std' 'L_MPA_HSE' 'L_LCA_kurtosis'
 'R_kurtosis' 'R_MCA_NRT_C4' 'R_MPA_skew' 'L_NRT_C4' 'L_LPA_NRT_C4'
 'L_NRT_C7' 'R_HSE' 'L_NRT_C3' 'R_MCA_HSE' 'R_LCA_NRT_C4' 'R_NRT_C6'
 'R_LCA_kurtosis' 'R_MCA_skew' 'L_MCA_skew' 'R_MPA_NRT_C7' 'R_MCA_std'
 'R_LPA_NRT_C1' 'R_std' 'L_LCA_HSE' 'R_MPA_NRT_C3' 'R_MPA_HSE' 'MCA_ETD'
 'L_MPA_kurtosis' 'R_LPA_HSE' 'LCA_ETD' 'R_LPA_kurtosis' 'R_MCA_kurtosis'
 'L_HSE' 'L_LPA_kurtosis' 'L_LCA_NRT_C2' 'L_LPA_NRT_C6' 'L_MCA_NRT_C1'
 'R_MPA_kurtosis' 'R_NRT_C4' 'R_LCA_HSE' 'R_TCI' 'R_MCA_NRT_C5'
 'L_MPA_NRT_C4' 'L_LPA_NRT_C1' 'L_MPA_NRT_C5' 'R_LCA_NRT_C3'
 'R_MCA_NRT_C6' 'L_LCA_NRT_C3' 'L_MPA_NRT_C2' 'L_LPA_NRT_C7'
 'R_LCA_NRT_C5' 'R_NRT_C2' 'R_MPA_NRT_C4' 'R_NRT_C5' 'L_MCA_NRT_C4'
 'R_LPA_skew' 'L_MCA_NRT_C3' 'L_LPA_skew' 'R_skew' 'R_MPA_max' 'L_MPA_max'
 'L_MCA_NRT_C0' 'R_NRT_C7' 'R_MP

In [205]:
loader = DataLoader(dataset, batch_size=32, shuffle=True)
x, y = next(iter(loader))

threshold = .1
model.model[0].logit_threshold = torch.tensor(np.log(threshold) - np.log(1. - threshold))
model.eval()
torch.argmax(torch.softmax(model(x.cuda()), dim=1), dim=1)
print(y==torch.argmax(torch.softmax(model(x.cuda()), dim=1), dim=1).cpu())

tensor([False, False, False, False, False, False,  True, False,  True, False,
        False, False,  True,  True, False,  True, False,  True,  True, False,
         True,  True, False, False,  True,  True, False,  True, False, False,
        False, False])


In [209]:
np.array(dataset.features)[(torch.sigmoid(model.model[0].logit_p)<0.1).cpu()]

array([], dtype='<U14')

In [212]:
torch.sigmoid(model.model[0].logit_p)

tensor(0.1672, device='cuda:0', grad_fn=<MinBackward1>)