In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
from collections import OrderedDict
import math

In [3]:
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [4]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import StratifiedShuffleSplit

In [5]:
import numpy as np
import pandas as pd
import scipy.sparse as sparse

In [5]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda:0')
    else:
        return torch.device('cpu')

In [6]:
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [7]:
class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""

    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [8]:
class VisdomConfig:
    url = None
    port = None
    username = None
    password = None
    env = None

In [44]:
class DNN(nn.Module):
    def __init__(self, input_dim, output_dim, layer_cofig):
        super(DNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.layer_config = layer_config
        self.model = self.compose_model()
    
    def forward(self, batch):
        return self.model(batch)
    
    def calculate_bound(self, weight):
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weight)
        bound = 1 / math.sqrt(fan_in)
        return bound

    def compose_model(self, layer_config=None):
        if layer_config is None:
            layer_config = self.layer_config
        layers = OrderedDict()
        for idx, config in enumerate(layer_config):
            input_dim, output_dim = config
            layer = nn.Linear(input_dim, output_dim)
            nn.init.kaiming_uniform_(layer.weight,nonlinearity='relu')
            bound = self.calculate_bound(layer.weight)
            nn.init.uniform_(layer.bias, -bound, bound)
            layers['linear-%d' % idx] = layer
            if idx != len(layer_config) - 1:
                layers['nonlinear-%d' % idx] = nn.ReLU()
        
        return nn.Sequential(layers)

In [15]:
class AiBasicMedDataset(Dataset):
    def __init__(self, data, labels=None, transform=None):
        self.data = self.data_to_tensor(data)
        self.labels = self.labels_to_tensor(labels)
        self.transform = transform

    def data_to_tensor(self, data):
        return torch.FloatTensor(data.todense())

    def labels_to_tensor(self, labels=None):
        if labels is None:
            return None

        return torch.LongTensor(labels)

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data = self.data[idx, :]
        if self.labels is not None:
            labels = self.labels[idx]
            return data, labels

        return data

In [15]:
class DLSparseMaker(BaseEstimator):
    """
    This makes the race feature a one hot encoded value instread
    """

    def __init__(self, num_symptoms, age_mean=None, age_std=None):
        self.num_symptoms = num_symptoms
        self.age_std = age_std
        self.age_mean = age_mean

    def fit(self, df, y=None):
        self.age_mean = df['AGE'].mean()
        self.age_std = df['AGE'].std()

    def transform(self, df, y=None):
        if self.age_mean is None or self.age_std is None:
            raise ValueError("mean and std for age have not been evaluated. Have you run fit ?")
        symptoms = df.SYMPTOMS
        race = df.RACE

        df = df.drop(columns=['SYMPTOMS', 'RACE'])
        if 'LABEL' in df.columns:
            df = df[['LABEL', 'AGE', 'GENDER']]
        else:
            df = df[['AGE', 'GENDER']]

        df['AGE'] = (df['AGE'] - self.age_mean) / self.age_std

        dense_matrix = sparse.coo_matrix(df.values)
        symptoms = symptoms.apply(lambda v: [int(idx) + 5 for idx in v.split(",")])

        columns = []
        rows = []
        for idx, val in enumerate(symptoms):
            race_val = race.iloc[idx]
            rows += [idx] * (len(val) + 1)  # takes care of the race: it's one hot encoded, so!
            columns += [int(race_val)]
            columns += val

        data = np.ones(len(rows))
        symptoms_race_coo = sparse.coo_matrix((data, (rows, columns)), shape=(df.shape[0], self.num_symptoms + 5))
        return sparse.hstack([dense_matrix, symptoms_race_coo]).tocsc()

    def fit_transform(self, df, y=None):
        self.fit(df)
        return self.transform(df)

In [6]:
train_file = "/home/oagba/bulk/data/output_basic_15k/symptoms/csv/parsed/train.csv_sparse.csv"

In [7]:
num_symptoms = 376
num_conditions = 801
input_dim = 383
train_batch_size=128

In [8]:
learning_rate = 3e-3

In [23]:
def compute_accuracy(out, labels):
    prob = F.log_softmax(out, dim=1)
    _, preds = torch.max(out, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [24]:
def train(model, train_loader, optimizer):
    losses = []
    accs = []
    for batch, labels in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = F.cross_entropy(out, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss)
        with torch.no_grad():
            acc = compute_accuracy(out, labels)
        
        accs.append(acc)
    
    with torch.no_grad():
        batch_loss = torch.stack(losses).mean().item()
        batch_acc = torch.stack(accs).mean().item()
    return batch_loss, batch_acc

In [53]:
def test(model, val_loader):
    losses = []
    accs = []
    with torch.no_grad():
        for batch, labels in val_loader:
            out = model(batch)
            loss = F.cross_entropy(out, labels)
            losses.append(loss)
            acc = compute_accuracy(out, labels)
            accs.append(acc)
        batch_loss = torch.stack(losses).mean().item()
        batch_acc = torch.stack(accs).mean().item()
    return batch_loss, batch_acc

In [12]:
def split_data(data, labels, train_size):
    split_selector = StratifiedShuffleSplit(
        n_splits=1,
        train_size=train_size
    )

    train_data = None
    val_data = None
    train_labels = None
    val_labels = None
    for train_index, val_index in split_selector.split(data, labels):
        train_data = data.iloc[train_index]
        val_data = data.iloc[val_index]
        train_labels = labels[train_index]
        val_labels = labels[val_index]

    return train_data, train_labels, val_data, val_labels

In [9]:
df = pd.read_csv(train_file, index_col="Index")

  mask |= (ar1 == a)


In [10]:
labels = df.LABEL.values
df = df.drop(columns=['LABEL'])

In [13]:
train_data, train_labels, val_data, val_labels = split_data(df, labels, 0.8)

In [16]:
sparsifier = DLSparseMaker(num_symptoms)
sparsifier.fit(train_data)

In [17]:
print("mean: ", sparsifier.age_mean)
print("std: ", sparsifier.age_std)

mean:  38.741316816862515
std:  23.380120690086834


In [34]:
train_data = sparsifier.transform(train_data)
val_data = sparsifier.transform(val_data)

In [35]:
device = get_default_device()

In [37]:
train_data = AiBasicMedDataset(train_data, train_labels)
val_data = AiBasicMedDataset(val_data, val_labels)

In [40]:
train_loader = DataLoader(
    train_data,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

In [51]:
val_loader = DataLoader(
    val_data,
    batch_size=train_batch_size*2,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [41]:
train_loader = DeviceDataLoader(train_loader, device)

In [52]:
val_loader = DeviceDataLoader(val_loader, device)

In [42]:
layer_config = [
    [383, 1024],
    [1024, 1024],
    [1024, 801]
]

In [45]:
model = DNN(input_dim, num_conditions, layer_config)

In [46]:
model = to_device(model, device)

Tesla K40c with CUDA capability sm_35 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70 sm_75.
If you want to use the Tesla K40c GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [47]:
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [49]:
# train for 20 epochs and see ...
for idx in range(20):
    loss, acc = train(model, train_loader, optimizer)
    print("Epoch {}: loss: {}.4f acc {}.4f".format(idx+1, loss, acc))

Epoch 1: loss: 2.2509398460388184.4f acc 0.46928438544273376.4f
Epoch 2: loss: 1.4014298915863037.4f acc 0.5674718022346497.4f
Epoch 3: loss: 1.3499873876571655.4f acc 0.5735849738121033.4f
Epoch 4: loss: 1.3294744491577148.4f acc 0.5760802626609802.4f
Epoch 5: loss: 1.3184858560562134.4f acc 0.5774691700935364.4f
Epoch 6: loss: 1.3110846281051636.4f acc 0.578559935092926.4f
Epoch 7: loss: 1.3059180974960327.4f acc 0.5791831612586975.4f
Epoch 8: loss: 1.3019062280654907.4f acc 0.5801682472229004.4f
Epoch 9: loss: 1.2986336946487427.4f acc 0.5808623433113098.4f
Epoch 10: loss: 1.2959980964660645.4f acc 0.581196129322052.4f
Epoch 11: loss: 1.2936228513717651.4f acc 0.5817708969116211.4f
Epoch 12: loss: 1.2916280031204224.4f acc 0.5820255279541016.4f
Epoch 13: loss: 1.2900021076202393.4f acc 0.5825397372245789.4f
Epoch 14: loss: 1.2883979082107544.4f acc 0.5830676555633545.4f
Epoch 15: loss: 1.2869853973388672.4f acc 0.5832527279853821.4f
Epoch 16: loss: 1.2857179641723633.4f acc 0.583613

In [54]:
val_loss, val_acc = test(model, val_loader)

In [59]:
print("Val Loss: %.4f\t Val. Acc: %.4f" % (val_loss, val_acc))

Val Loss: 1.2909	 Val. Acc: 0.5800


In [56]:
# adjust the learning rate
new_learning_rate = learning_rate * 0.1
for g in optimizer.param_groups:
    g['lr'] = new_learning_rate

In [None]:
# another 10 epochs!?
epoch_count = 20
for idx in range(10):
    loss, acc = train(model, train_loader, optimizer)
    print("Epoch {}: loss: {}.4f acc {}.4f".format(idx+1+epoch_count, loss, acc))

epoch_count += 10

In [58]:
print(model)

DNN(
  (model): Sequential(
    (linear-0): Linear(in_features=383, out_features=1024, bias=True)
    (nonlinear-0): ReLU()
    (linear-1): Linear(in_features=1024, out_features=1024, bias=True)
    (nonlinear-1): ReLU()
    (linear-2): Linear(in_features=1024, out_features=801, bias=True)
  )
)


In [60]:
val_loss, val_acc = test(model, val_loader)

In [61]:
print("Val Loss: %.4f\t Val. Acc: %.4f" % (val_loss, val_acc))

Val Loss: 1.2704	 Val. Acc: 0.5870


In [63]:
# adjust the learning rate
new_learning_rate /= 3
for g in optimizer.param_groups:
    g['lr'] = new_learning_rate

In [None]:
# another 10 epochs!?
epoch_count = 30
for idx in range(10):
    loss, acc = train(model, train_loader, optimizer)
    print("Epoch {}: loss: {}.4f acc {}.4f".format(idx+1+epoch_count, loss, acc))

epoch_count += 10

In [65]:
print("""
Epoch 31: loss: 1.253902792930603.4f acc 0.5941389799118042.4f
Epoch 32: loss: 1.2536697387695312.4f acc 0.5939987301826477.4f
Epoch 33: loss: 1.253538966178894.4f acc 0.5941035151481628.4f
Epoch 34: loss: 1.2534538507461548.4f acc 0.594102144241333.4f
""")


Epoch 31: loss: 1.253902792930603.4f acc 0.5941389799118042.4f
Epoch 32: loss: 1.2536697387695312.4f acc 0.5939987301826477.4f
Epoch 33: loss: 1.253538966178894.4f acc 0.5941035151481628.4f
Epoch 34: loss: 1.2534538507461548.4f acc 0.594102144241333.4f



In [66]:
val_loss, val_acc = test(model, val_loader)

In [67]:
print("Val Loss: %.4f\t Val. Acc: %.4f" % (val_loss, val_acc))

Val Loss: 1.2685	 Val. Acc: 0.5874


In [68]:
def compute_accuracy_(out, labels):
    prob = F.log_softmax(out, dim=1)
    _, preds = torch.max(prob, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [69]:
def test_(model, val_loader):
    losses = []
    accs = []
    with torch.no_grad():
        for batch, labels in val_loader:
            out = model(batch)
            loss = F.cross_entropy(out, labels)
            losses.append(loss)
            acc = compute_accuracy_(out, labels)
            accs.append(acc)
        batch_loss = torch.stack(losses).mean().item()
        batch_acc = torch.stack(accs).mean().item()
    return batch_loss, batch_acc

In [70]:
val_loss, val_acc = test_(model, val_loader)

In [71]:
print("Val Loss: %.4f\t Val. Acc: %.4f" % (val_loss, val_acc))

Val Loss: 1.2685	 Val. Acc: 0.5874


In [77]:
for item in val_loader:
    d, l = item
    break

In [82]:
from sklearn.metrics import precision_score

In [91]:
def compute_precision(out, labels):
    _, preds = torch.max(out, dim=1)
    if preds.is_cuda:
        cpu_pred = preds.cpu().numpy()
        cpu_labels = labels.cpu().numpy()
    else:
        cpu_pred = preds.numpy()
        cpu_labels = labels.numpy()
    return precision_score(cpu_labels, cpu_pred, average='weighted', zero_division=1)

In [112]:
with torch.no_grad():
    prec = []
    for item in val_loader:
        d, l = item
        out = model(d)
        prec.append(compute_precision(out, l))

RuntimeError: Caught RuntimeError in pin memory thread for device 0.
Original Traceback (most recent call last):
  File "/shares/bulk/oagba/work/medvice-parser/lib64/python3.6/site-packages/torch/utils/data/_utils/pin_memory.py", line 31, in _pin_memory_loop
    data = pin_memory(data)
  File "/shares/bulk/oagba/work/medvice-parser/lib64/python3.6/site-packages/torch/utils/data/_utils/pin_memory.py", line 55, in pin_memory
    return [pin_memory(sample) for sample in data]
  File "/shares/bulk/oagba/work/medvice-parser/lib64/python3.6/site-packages/torch/utils/data/_utils/pin_memory.py", line 55, in <listcomp>
    return [pin_memory(sample) for sample in data]
  File "/shares/bulk/oagba/work/medvice-parser/lib64/python3.6/site-packages/torch/utils/data/_utils/pin_memory.py", line 47, in pin_memory
    return data.pin_memory()
RuntimeError: cuda runtime error (710) : device-side assert triggered at /pytorch/aten/src/THC/THCCachingHostAllocator.cpp:278


In [114]:
layer_config

[[383, 1024], [1024, 1024], [1024, 801]]

In [None]:
Epoch 34: loss: 1.2534538507461548.4f acc 0.594102144241333.4f

In [116]:
data = {
    "input_dim": input_dim,
    "output_dim": num_conditions,
    "layer_config": layer_config,
    "train_acc": 0.594102144241333,
    "val_acc": 0.5874,
    "train_loss": 1.2534538507461548,
    "val_loss": 1.2685,
    "epoch": 30,
    "model_dict": model.state_dict(),
    "age_std": sparsifier.age_std,
    "age_mean": sparsifier.age_mean
}

In [117]:
import os
opdir = "/home/oagba/bulk/dl_runs/working/jupyter_basic_15k"
filename = os.path.join(opdir, "run1.torch")

In [118]:
torch.save(data, filename)

RuntimeError: cuda runtime error (710) : device-side assert triggered at /pytorch/torch/csrc/generic/serialization.cpp:31