In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import torch
import time
import os
import re

In [2]:
cuda = torch.cuda.is_available()
print(cuda)

True


In [3]:
from torch.utils.data import DataLoader, Dataset, TensorDataset

import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
import torch.optim as optim

In [4]:
class MyDataset(Dataset):
    def __init__(self, values):
        self.values = values

    def __len__(self):
        return len(self.values)

    def __getitem__(self, index):
        X, Y = self.values[index]
        return np.array(X).astype("float"), np.array(Y).astype("float")

In [5]:
def readFile(path):
    lines = list()
    with open(path) as file:
        csv_reader = file.readlines()
        line_count = 0
        for row in csv_reader:
            row = re.split(r'\t+', row.rstrip('\t'))[1:]
            values = row.pop(-1)
            values = values.split(';')
            for r in values:
                chunk = re.findall('\d*\.?\d+',r)
                chunk = list(map(float, chunk)) 
                row.append(chunk)
            lines.append(row)
    features = []
    for x in lines:
        features.append([x[2], x[3], x[4], max(x[10])])
    filterUnk = list(filter(lambda x: 'Unknown' not in x, features))
    notGauges = ['PowerPICC', 'PICC', 'PowerPort', 'CentralLine', 'Other']
    notivLoc = ['Other', 'PowerPort', 'CentralLine', 'PICC', 'PowerPICC']
    filterGauge = list(filter(lambda x: x[0] not in notGauges, filterUnk))
    filterIV = list(filter(lambda x: x[1] not in notivLoc, filterGauge))
    return filterIV

In [6]:
original = readFile("SN-SRMC-CT-1.txt") + readFile("SN-SRMC-CT-2.txt")

In [7]:
gaugeTypes = set()
ivLocation = set()
protocol = dict()
pressure = set()
for inputX in original:
    gaugeTypes.add(inputX[0])
    ivLocation.add(inputX[1])
    if inputX[2] in protocol:
        protocol[inputX[2]] = protocol[inputX[2]] + [inputX[3]]
    else:
        protocol[inputX[2]] = [inputX[3]]
    pressure.add(inputX[3])

"Gauge Types:", len(gaugeTypes), "IV Locations:", len(ivLocation), "Protocols:", len(protocol), "Pressures:", len(pressure)

('Gauge Types:',
 6,
 'IV Locations:',
 10,
 'Protocols:',
 3360,
 'Pressures:',
 25891)

In [8]:
def parseProtocol(x):
    protocol = x.split(" ")
    Contrast = 0
    Saline = 0
    Mixed = 0
    AmountC = 0
    AmountS = 0
    AmountM = 0
    PercentM = 0
    Flowrate = 0
    for i in protocol:
        if i[0] == "@":
            FlowRate = float(i[1:])
        if i[0] == "C":
            Contrast = 1
            AmountC = int(i[1:])
        if i[0] == "S":
            Saline = 1
            AmountS = int(i[1:])
        if i[0] == "R":
            rprot = i.split("%")
            Mixed = 1
            AmountM = int(rprot[0][1:])
            PercentM = int(rprot[1][1:])
    return [Contrast, Saline, Mixed, AmountC, AmountS, AmountM, PercentM, FlowRate]

In [9]:
def roundup(x):
    return int(math.ceil(x/100.0))*100

mapGauge = dict()
for i, x in enumerate(sorted(list(gaugeTypes))):
    mapGauge[x] = i
mapIV = dict()
for i, x in enumerate(sorted(list(ivLocation))):
    mapIV[x] = i
mapPressure = dict()
buckets = []
count = 0
bucketSize = 20
for p in range(int(min(pressure)), int(roundup(max(pressure))), bucketSize):
    buckets.append(p)
    mapPressure[p] = count
    count += 1
result = []
for v in original:
    result.append(([mapGauge[v[0]], mapIV[v[1]]] + parseProtocol(v[2]) , float(v[3])))

In [10]:
import random 

for i in range(len(result)-1, 0, -1): 
      
    # Pick a random index from 0 to i  
    j = random.randint(0, i + 1)  
    
    # Swap arr[i] with the element at random index  
    result[i], result[j] = result[j], result[i]      

half = int(len(result)//2)

training_set = result[:half]
validation_set = result[half:]

validation_p = np.array([p[1] for p in validation_set])

mean = np.mean(validation_p)
std = np.std(validation_p)

print(mean, std)

29.524817525434855 34.73501216666802


In [11]:
num_workers = 8 if cuda else 0 

train_dataset = MyDataset(training_set)
train_loader_args = dict(shuffle=True, batch_size=64, num_workers=num_workers, pin_memory=True) if cuda                    else dict(shuffle=True, batch_size=64)
train_loader = DataLoader(train_dataset, **train_loader_args)

val_dataset = MyDataset(validation_set)
val_loader_args = dict(shuffle=False, batch_size=64, num_workers=num_workers, pin_memory=True) if cuda                    else dict(shuffle=False, batch_size=1)
val_loader = DataLoader(val_dataset, **val_loader_args)

In [12]:
input_size = len(train_dataset.__getitem__(0)[0])
output_size = 1
hidden_size = int(input_size**2)//2

input_size, output_size, hidden_size

(10, 1, 50)

In [20]:
class MyNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.act1 = nn.Sigmoid()
        self.b1 = nn.BatchNorm1d(hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.act2 = nn.Sigmoid()
        self.b2 = nn.BatchNorm1d(hidden_size)
        self.layer3 = nn.Linear(hidden_size, hidden_size)
        self.act3 = nn.Sigmoid()
        self.b3 = nn.BatchNorm1d(hidden_size)
        self.layer4 = nn.Linear(hidden_size, output_size)

    def forward(self, input_val):
        h = input_val
        h = self.layer1(h)
        h = self.act1(h)
        h = self.b1(h)
        h = self.layer2(h)
        h = self.act2(h)
        h = self.b2(h)
        h = self.layer3(h)
        h = self.act3(h)
        h = self.b3(h)
        h = self.layer4(h)
        return h

In [21]:
model = MyNetwork(input_size, 32, output_size)
model = model.float()
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)
print(model)

MyNetwork(
  (layer1): Linear(in_features=10, out_features=32, bias=True)
  (act1): Sigmoid()
  (b1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer2): Linear(in_features=32, out_features=32, bias=True)
  (act2): Sigmoid()
  (b2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer3): Linear(in_features=32, out_features=32, bias=True)
  (act3): Sigmoid()
  (b3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer4): Linear(in_features=32, out_features=1, bias=True)
)


In [22]:
def combine(x, y):
    total = 0
    for v in range(len(x)):
        if x[v] and y[v]:
            total += 1
    return total

In [23]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    correct_predictions_0_5 = 0.0
    correct_predictions_1 = 0.0
    correct_predictions_2 = 0.0
    correct_predictions_5 = 0.0
    correct_predictions_10 = 0.0
    correct_predictions_20 = 0.0
    correct_predictions_g_20 = 0.0
        
    start_time = time.time()
    og_time = start_time
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data.float()).double().squeeze(1)
        
        total_predictions += target.size(0)
        correct_predictions += torch.sum(torch.abs(outputs - target) < 2).item()
        correct_predictions_0_5 += torch.sum(torch.abs(outputs - target) <= 0.5).item()
        correct_predictions_1 += combine(torch.abs(outputs - target) > 0.5,  torch.abs(outputs - target) <= 1)
        correct_predictions_2 += combine(torch.abs(outputs - target) > 1,  torch.abs(outputs - target) <= 2)
        correct_predictions_5 += combine(torch.abs(outputs - target) > 2,  torch.abs(outputs - target) <= 5)
        correct_predictions_10 += combine(torch.abs(outputs - target) > 5,  torch.abs(outputs - target) <= 10)
        correct_predictions_20 += combine(torch.abs(outputs - target) > 10,  torch.abs(outputs - target) <= 20)
        correct_predictions_g_20 += torch.sum(torch.abs(outputs - target) > 20).item()

        loss = criterion(outputs, target)
        running_loss += loss.item()            
        loss.backward()
        optimizer.step()
    end_time = time.time()
    
    running_loss /= len(train_loader)
    acc = (correct_predictions/total_predictions)*100.0
    print('Training Accuracy: ', acc, "    ", 'Training Loss: ', running_loss)
    return running_loss, acc, [correct_predictions_0_5, correct_predictions_1, correct_predictions_2, correct_predictions_5, correct_predictions_10, correct_predictions_20, correct_predictions_g_20]

In [24]:
def val_model(model, dev_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0
        correct_predictions_0_5 = 0.0
        correct_predictions_1 = 0.0
        correct_predictions_2 = 0.0
        correct_predictions_5 = 0.0
        correct_predictions_10 = 0.0
        correct_predictions_20 = 0.0
        correct_predictions_g_20 = 0.0

        for batch_idx, (data, target) in enumerate(dev_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data.float()).squeeze(1)

            total_predictions += target.size(0)
            correct_predictions += torch.sum(torch.abs(outputs - target) < 2).item()
            correct_predictions_0_5 += torch.sum(torch.abs(outputs - target) <= 0.5).item()
            correct_predictions_1 += combine(torch.abs(outputs - target) > 0.5,  torch.abs(outputs - target) <= 1)
            correct_predictions_2 += combine(torch.abs(outputs - target) > 1,  torch.abs(outputs - target) <= 2)
            correct_predictions_5 += combine(torch.abs(outputs - target) > 2,  torch.abs(outputs - target) <= 5)
            correct_predictions_10 += combine(torch.abs(outputs - target) > 5,  torch.abs(outputs - target) <= 10)
            correct_predictions_20 += combine(torch.abs(outputs - target) > 10,  torch.abs(outputs - target) <= 20)
            correct_predictions_g_20 += torch.sum(torch.abs(outputs - target) > 20).item()
                
            loss = criterion(outputs, target).detach()
            running_loss += loss.item()
            
        running_loss /= len(dev_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Validation Accuracy: ', acc, "    ", 'Validation Loss: ', running_loss)
        return running_loss, acc, [correct_predictions_0_5, correct_predictions_1, correct_predictions_2, correct_predictions_5, correct_predictions_10, correct_predictions_20, correct_predictions_g_20]

In [None]:
from tqdm import tqdm

n_epochs = 30
Train_loss = []
Train_acc = []
Val_loss = []
Val_acc = []
val_a_acc = []
train_a_acc = []

# from torch.utils.tensorboard import SummaryWriter
# writer = SummaryWriter("./runs/training")

for epoch in tqdm(range(n_epochs)):
    print("Epoch " + str(epoch)) 
            
    train_l, train_a, t_all_a = train_epoch(model, train_loader, criterion, optimizer)
    val_l, val_a, v_all_a = val_model(model, val_loader, criterion)
    
    scheduler.step(train_l)
    
#     for name, param in model.named_parameters():
#         writer.add_histogram('grad_' + name, param.grad.data, epoch)
    
    Train_loss.append(train_l)
    Train_acc.append(train_a)
    Val_loss.append(val_l)
    Val_acc.append(val_a)
    val_a_acc.append(v_all_a)
    train_a_acc.append(v_all_a)
    
#     writer.add_scalars('loss', {'train': train_l, 'val': val_l}, epoch)
#     writer.add_scalars('acc', {'train': train_a, 'val': val_a}, epoch)
    
    print("Saving the Model Version " + str(epoch))
    torch.save(model, './model_final_r.pt')
    torch.save(optimizer, './adam_model_final_r.pt')
    print('='*115)


  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 0
Training Accuracy:  0.020885547201336674      Training Loss:  29.571586322757703


  "type " + obj.__name__ + ". It won't be checked "
  3%|▎         | 1/30 [00:13<06:37, 13.69s/it]

Validation Accuracy:  0.008950681743592804      Validation Loss:  29.366443915637692
Saving the Model Version 0
Epoch 1
Training Accuracy:  0.17603532641126626      Training Loss:  29.00397296006766


  7%|▋         | 2/30 [00:27<06:22, 13.65s/it]

Validation Accuracy:  0.40576423904287373      Validation Loss:  28.48428324783408
Saving the Model Version 1
Epoch 2
Training Accuracy:  0.7399451008473565      Training Loss:  28.051723595917085


 10%|█         | 3/30 [00:40<06:07, 13.60s/it]

Validation Accuracy:  0.6504162067010771      Validation Loss:  27.45105122512059
Saving the Model Version 2
Epoch 3


In [None]:
objects = ('[0, 0.5]', '(0.5, 1]', '(1, 2]', '(2, 5]', '(5, 10]', '(10, 20]', '(20, Inf.)')
performanceV = val_a_acc[-1]
performanceT = train_a_acc[-1]

x = np.arange(len(objects))
# print(v_all_a)
width = 0.2 # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, performanceV, width, label="Validation")
rects1 = ax.bar(x + width/2, performanceT, width, label="Training")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Training Points')
ax.set_title('Accuracy for Last Epoch')
ax.set_xticks(x)
labels = []
totalV = sum(performanceV)
totalT = sum(performanceT)

for x in range(len(objects)):
    labels.append(objects[x] + "\n" + str(int((performanceV[x]/totalV)*100)) + "%," + str(int((performanceT[x]/totalT)*100)) + "%")
ax.set_xticklabels(labels)
ax.legend()
plt.savefig('Acc_Last_Epoch.png', dpi=300)

plt.show()

objects = ('[0, 0.5]', '(0.5, 1]', '(1, 2]', '(2, 5]', '(5, 10]', '(10, 20]', '(20, Inf.)')
performanceV = val_a_acc[0]
performanceT = train_a_acc[0]

x = np.arange(len(objects))
# print(v_all_a)
width = 0.2  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, performanceV, width, label="Validation")
rects1 = ax.bar(x + width/2, performanceT, width, label="Training")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Training Points')
ax.set_title('Accuracy for First Epoch')
ax.set_xticks(x)
labels = []
totalV = sum(performanceV)
totalT = sum(performanceT)

for x in range(len(objects)):
    labels.append(objects[x] + "\n" + str(int((performanceV[x]/totalV)*100)) + "%," + str(int((performanceT[x]/totalT)*100)) + "%")
ax.set_xticklabels(labels)
ax.legend()
plt.savefig('Acc_Zero_Epoch.png', dpi=300)

plt.show()

x = np.arange(n_epochs)

plt.plot(x, Train_loss, label="Training")
plt.plot(x, Val_loss, label="Validation")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Epochs vs Loss")
plt.savefig('Loss.png', dpi=300)
plt.show()