In [20]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import cv2
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pdb
import math

torch.cuda.empty_cache()

def searchForFocus(filename, substring):
    with open(filename, 'r') as file:
        data = file.read()
        location = data.find(substring)
        croppedStr = data[location+len(substring):]
        # Split at spaces and find first number
        for word in croppedStr.split(): # Split at spaces
            # Delete any commas    
            word = word.replace(',', "")
            try:
                focusPosition = int(word)
                return focusPosition
            except ValueError:
                continue
    file.close()

class Dataset(torch.utils.data.Dataset):
    # ids indicates what subfolders (samples) to access
    def __init__(self, foldername, subfolderPrefix, ids):
        self.foldername = foldername
        self.subfolderPrefix = subfolderPrefix
        self.ids = ids
        
    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        sampleFoldername = self.foldername + '/' + self.subfolderPrefix + str(index)
        
# Moved to separate createAF file        
#         # Load in image as [0,1] array
#         image = cv2.imread(sampleFoldername + '/before' + str(index) + '.tif', 0) * 1 / 255.0
        
#         # Shift it so is from [-1,1]
#         image *= 2
#         image -= 1
#         X = torch.from_numpy(image)
#         X = X.unsqueeze(0) # Add fake first dimension to specify 1-channel

#         # Get the label
#         beforeFocus = searchForFocus(sampleFoldername + '/focusInfo.txt', 'before focus: ')
#         afterFocus = searchForFocus(sampleFoldername + '/focusInfo.txt', 'after focus: ')
        
#         y = afterFocus - beforeFocus
        
        X = torch.load(sampleFoldername + '/X')
        y = torch.load(sampleFoldername + '/y')
        
        return X, y
    

In [21]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

params = {'batch_size': 2,
          'shuffle': True,
          'num_workers': 2}

# Randomly partition the full list into a training set and validation set
numSamples = 100 # total number of samples collected
frac = 1/5 # fraction to be validation

# Fix the seed for comparison
np.random.seed(0)
permutedIds = np.random.permutation(range(numSamples))
splitPoint = int((1-frac) * len(permutedIds))
trainingIds = permutedIds[:splitPoint]
valIds = permutedIds[splitPoint:]
testIds = range(numSamples, 200)

training_set = Dataset('/home/aofeldman/Desktop/AFdataCollection', 'sample', trainingIds)
training_generator = torch.utils.data.DataLoader(training_set, **params)

validation_set = Dataset('/home/aofeldman/Desktop/AFdataCollection', 'sample', valIds)
#validation_generator = torch.utils.data.DataLoader(validation_set, **params)

test_set = Dataset('/home/aofeldman/Desktop/AFdataCollection', 'sample', testIds)

In [22]:
with torch.no_grad():
    X, y = training_set.__getitem__(1)

    def imshow(img,wait):
        img = img / 2 + 0.5     # unnormalize
        npimg = np.squeeze(img.numpy())
        width = int(0.15 * npimg.shape[1])
        height = int(0.15 * npimg.shape[0])
        cv2.imshow("Hi",cv2.resize(npimg, (width, height)))
        cv2.waitKey(wait)
        cv2.destroyAllWindows()
    imshow(X, 1000)
    print(X.shape)
    print(y)

torch.Size([1, 6004, 7920])
-87


In [23]:
# Consider placing dropout layers after conv2d layers (conv2d -> batchnorm2d -> leakyReLU -> dropout(p=0.1))
# And also place after fully connected layers (linear -> leakyReLU -> dropout(p=0.3))
# TODO: Should actually figure out appropriate amount of padding for layers 

net = nn.Sequential(
    # First, let's try to "downsample" by using a large stride with a one-channel convolution
    nn.Conv2d(1, 1, kernel_size=7, stride=3),
    nn.Conv2d(1, 4, kernel_size=5, stride=1),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Dropout2d(0.1),
    nn.MaxPool2d(kernel_size=4, stride=4),
    nn.Conv2d(4, 4, kernel_size=5, stride=1, padding=1),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Dropout2d(0.1),
    nn.MaxPool2d(kernel_size=4, stride=4),
    nn.Conv2d(4, 4, kernel_size=5, stride=1, padding=1),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Dropout2d(0.1),
    nn.MaxPool2d(kernel_size=4, stride=4),
    nn.Conv2d(4, 4, kernel_size=5, stride=1, padding=1),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Dropout2d(0.1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Dropout2d(0.1),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Flatten(),
    nn.Linear(252, 100),
    #nn.BatchNorm1d(100),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    #nn.Dropout(0.3),
    nn.Linear(100, 10),
    #nn.BatchNorm1d(10),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    #nn.Dropout(0.3),
    nn.Linear(10, 1)
)

net = net.to(device)

In [24]:
for p in net.parameters():
    print(p.data.shape)

torch.Size([1, 1, 7, 7])
torch.Size([1])
torch.Size([4, 1, 5, 5])
torch.Size([4])
torch.Size([4])
torch.Size([4])
torch.Size([4, 4, 5, 5])
torch.Size([4])
torch.Size([4])
torch.Size([4])
torch.Size([4, 4, 5, 5])
torch.Size([4])
torch.Size([4])
torch.Size([4])
torch.Size([4, 4, 5, 5])
torch.Size([4])
torch.Size([4])
torch.Size([4])
torch.Size([4, 4, 3, 3])
torch.Size([4])
torch.Size([4])
torch.Size([4])
torch.Size([100, 252])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])
torch.Size([1, 10])
torch.Size([1])


In [25]:
count = 0
for p in net.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
    print(p.data)
print(f'total params: {count}')

tensor([[[[ 0.0649, -0.1221,  0.0271,  0.0571, -0.1025, -0.1066, -0.0066],
          [-0.1292,  0.0231, -0.0684, -0.1060,  0.1351, -0.0822, -0.0761],
          [-0.0549, -0.0369,  0.0223,  0.1170, -0.0348, -0.0443, -0.0077],
          [ 0.1146, -0.1058, -0.1262, -0.0412,  0.0644,  0.0782, -0.0650],
          [-0.0231,  0.0069,  0.0009, -0.0057,  0.0257,  0.1331, -0.1190],
          [-0.1303,  0.1106,  0.0874,  0.0949, -0.0014,  0.0278, -0.0407],
          [ 0.0161,  0.1418, -0.0711, -0.1017, -0.0423, -0.0199,  0.0061]]]],
       device='cuda:0')
tensor([0.1390], device='cuda:0')
tensor([[[[-0.1124,  0.0825,  0.0668,  0.1620, -0.1714],
          [-0.0717,  0.0933,  0.0316, -0.0484,  0.1392],
          [ 0.1029, -0.0790, -0.1066, -0.1478, -0.0995],
          [ 0.0063, -0.1082,  0.0782, -0.0519,  0.1547],
          [ 0.1026,  0.0992, -0.1950, -0.0404,  0.1742]]],


        [[[ 0.1650, -0.0886, -0.1122,  0.0757, -0.1773],
          [ 0.1053, -0.0867,  0.0573,  0.1418,  0.0663],
          [

tensor([[-0.0121,  0.0080, -0.0352,  ...,  0.0090,  0.0211,  0.0286],
        [-0.0565, -0.0347, -0.0533,  ...,  0.0116,  0.0361,  0.0229],
        [-0.0348,  0.0044,  0.0079,  ..., -0.0548,  0.0215, -0.0009],
        ...,
        [-0.0098, -0.0404, -0.0581,  ...,  0.0269, -0.0301, -0.0559],
        [-0.0103, -0.0175,  0.0424,  ...,  0.0582, -0.0534, -0.0267],
        [-0.0547,  0.0513, -0.0314,  ..., -0.0085,  0.0217,  0.0392]],
       device='cuda:0')
tensor([ 4.1970e-02,  9.0202e-03,  5.3032e-02,  4.1826e-02, -4.6916e-02,
        -1.9413e-02,  1.0657e-02, -4.7123e-02, -2.1643e-02, -3.1226e-02,
        -7.3495e-03,  4.8432e-02,  5.8608e-02, -3.2333e-03,  5.9364e-03,
        -4.9755e-02,  1.5146e-02,  5.4507e-02,  5.4559e-02, -3.7071e-02,
         1.0801e-02, -1.3441e-02,  5.2097e-02,  4.7543e-02,  1.8491e-02,
        -5.9882e-02,  5.3412e-02,  2.3588e-02,  2.4856e-02,  4.5730e-02,
         2.7711e-02, -1.3152e-02,  3.2699e-02, -2.5682e-04,  2.8087e-02,
        -1.7377e-02,  5.2896e-0

In [26]:
import torch.optim as optim

criterion = nn.MSELoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.RMSprop(net.parameters())

In [27]:
# For early stopping, use blocks of epochs (say size 3), and a window (say size 10) of how far to look in the past.
# Then, compare the average loss on the current block to the block in the past. Another idea would be compare the variances.
# If the fraction is not sufficiently small, halt.

batch_multiplier = 20

max_epochs = 300
# Should be stated as a fraction of the previous error
max_frac = 0.999
window = 10 # How many epochs in the past to compare to
block = 5 # What size block of epochs to use
learnFreq = 10 # How often to compute evaluation loss (on training and validation sets)

epoch_training_loss = []
epoch_val_loss = []

for epoch in range(max_epochs):
    print("\nOn epoch: " + str(epoch))
    
    #running_loss = 0
    
    count = 0
    
    net.train()
    for inputs, labels in training_generator:
        
        inputs, labels = inputs.to(device).float(), labels.to(device).float()
        
        if count == 0:
            optimizer.step()
            # zero the parameter gradients
            optimizer.zero_grad()
            count = batch_multiplier

        # forward + backward + optimize
        outputs = net(inputs).to(device)
        loss = criterion(torch.squeeze(outputs), labels) / batch_multiplier
        loss.backward()
        #optimizer.step()
        
        count -=1
        
        loss = loss.detach()
        inputs = inputs.detach()
        outputs = outputs.detach()
        
        #print('Outputs', torch.squeeze(outputs))
        #print('Batch size: ' + str(len(inputs)))
        # Multiply by the batch size and batch_multiplier (because earlier divided)
        #running_loss += loss.item() * len(inputs) * batch_multiplier
        #print('Batch average loss ' + str(loss.item()))
    #training_loss = running_loss / training_set.__len__()
    #print("Epoch training loss: " + str(training_loss))
    #epoch_training_loss.append(training_loss)
    if epoch % learnFreq == 0 or epoch == (max_epochs - 1):
        with torch.no_grad():
            net.eval()
            for i, dataset in enumerate([training_set, validation_set]):
                if i == 0:
                    print('\nEpoch training results ')
                else:
                    print('\nEpoch validation results ')
                MSE = 0
                avgAbsDev = 0
                for sample in dataset.ids:
                    X, y = dataset.__getitem__(sample)
                    X = X.unsqueeze(0) # Add fake batch dimension
                    X = X.to(device).float()
        
                    yHat = net(X).to(device).to('cpu')
                    MSE += (yHat - y)**2
                    avgAbsDev += np.abs(yHat - y)
        
                #print('Sample: ' + str(sample))
                #print('y: ' + str(y))
                #print('yHat: ' + str(yHat))
                MSE /= len(dataset.ids)
                avgAbsDev /= len(dataset.ids)
                print('RMSE on dataset: ' + str(np.sqrt(MSE)))
                print('Avg Abs Dev on dataset: ' + str(avgAbsDev))
                if i == 0:
                    epoch_training_loss.append(MSE)
                else:
                    epoch_val_loss.append(MSE)
        
#         val_loss = 0
#         net.eval()
#         for ind in validation_set.ids:
#             X, y = validation_set.__getitem__(ind)
#             X = X.unsqueeze(0).to(device) # Add fake batch dimension
#             yHat = net(X.float()).to(device).to('cpu')
#             val_loss += (y - yHat)**2
#         val_loss /= validation_set.__len__()
#         epoch_val_loss.append(val_loss)
#     print("Epoch validation loss: " + str(val_loss))
        
#     if len(epoch_val_loss) >= window + block + 1:
#         latestBlock = np.mean(epoch_val_loss[-1:-1-block])
#         earlierBlock = np.mean(epoch_val_loss[-1-window:-1-window-block])
        
#         # latestBlock must be sufficiently smaller than earlierBlock
#         if latestBlock / earlierBlock > max_frac:
#             print('Converged')
#             pdb.set_trace()
#             break
             
print('Finished Training')


On epoch: 0

Epoch training results 
RMSE on dataset: tensor([[292.6297]])
Avg Abs Dev on dataset: tensor([[254.5506]])

Epoch validation results 
RMSE on dataset: tensor([[286.0395]])
Avg Abs Dev on dataset: tensor([[250.7449]])

On epoch: 1

On epoch: 2

On epoch: 3

On epoch: 4

On epoch: 5

On epoch: 6

On epoch: 7

On epoch: 8

On epoch: 9

On epoch: 10

Epoch training results 
RMSE on dataset: tensor([[243.8782]])
Avg Abs Dev on dataset: tensor([[199.4862]])

Epoch validation results 
RMSE on dataset: tensor([[221.2398]])
Avg Abs Dev on dataset: tensor([[185.7750]])

On epoch: 11

On epoch: 12

On epoch: 13

On epoch: 14

On epoch: 15

On epoch: 16

On epoch: 17

On epoch: 18

On epoch: 19

On epoch: 20

Epoch training results 
RMSE on dataset: tensor([[126.3075]])
Avg Abs Dev on dataset: tensor([[104.9573]])

Epoch validation results 
RMSE on dataset: tensor([[117.7973]])
Avg Abs Dev on dataset: tensor([[101.0599]])

On epoch: 21

On epoch: 22

On epoch: 23

On epoch: 24

On ep


On epoch: 225

On epoch: 226

On epoch: 227

On epoch: 228

On epoch: 229

On epoch: 230

Epoch training results 
RMSE on dataset: tensor([[35.9609]])
Avg Abs Dev on dataset: tensor([[32.2699]])

Epoch validation results 
RMSE on dataset: tensor([[33.7302]])
Avg Abs Dev on dataset: tensor([[29.3638]])

On epoch: 231

On epoch: 232

On epoch: 233

On epoch: 234

On epoch: 235

On epoch: 236

On epoch: 237

On epoch: 238

On epoch: 239

On epoch: 240

Epoch training results 
RMSE on dataset: tensor([[39.6926]])
Avg Abs Dev on dataset: tensor([[29.4960]])

Epoch validation results 
RMSE on dataset: tensor([[39.5574]])
Avg Abs Dev on dataset: tensor([[30.2091]])

On epoch: 241

On epoch: 242

On epoch: 243

On epoch: 244

On epoch: 245

On epoch: 246

On epoch: 247

On epoch: 248

On epoch: 249

On epoch: 250

Epoch training results 
RMSE on dataset: tensor([[31.7675]])
Avg Abs Dev on dataset: tensor([[27.7269]])

Epoch validation results 
RMSE on dataset: tensor([[34.6401]])
Avg Abs Dev 

In [28]:
with torch.no_grad():
    net.eval()
    for i, dataset in enumerate([training_set, validation_set]):
        if i == 0:
            print('\nTraining results ')
        else:
            print('\nValidation results ')
        MSE = 0
        avgAbsDev = 0
        for sample in dataset.ids:
            X, y = dataset.__getitem__(sample)
            X = X.unsqueeze(0) # Add fake batch dimension
            X = X.to(device).float()

            yHat = net(X).to(device).to('cpu')
            MSE += (yHat - y)**2
            avgAbsDev += np.abs(yHat - y)

        #print('Sample: ' + str(sample))
        #print('y: ' + str(y))
        #print('yHat: ' + str(yHat))
        MSE /= len(dataset.ids)
        avgAbsDev /= len(dataset.ids)
        print('RMSE on dataset: ' + str(np.sqrt(MSE)))
        print('Avg Abs Dev on dataset: ' + str(avgAbsDev))
        
# with torch.no_grad():
#     net.eval()
#     for i, dataset in enumerate([training_set, validation_set, test_set]):
#         if i == 0:
#             print('\nTraining Set Results')
#         elif i == 1:
#             print('\nValidation Set Results')
#         else:
#             print('\nTest set Results')
#         MSE = 0
#         avgAbsDev = 0
#         for sample in dataset.ids:
#             X, y = dataset.__getitem__(sample)
#             X = X.unsqueeze(0) # Add fake batch dimension
#             X = X.to(device).float()
        
#             yHat = net(X).to(device).to('cpu')
#             MSE += (yHat - y)**2
#             avgAbsDev += np.abs(yHat - y)
        
#             #print('Sample: ' + str(sample))
#             #print('y: ' + str(y))
#             #print('yHat: ' + str(yHat))
#         MSE /= len(dataset.ids)
#         avgAbsDev /= len(dataset.ids)
#         print('RMSE on dataset: ' + str(np.sqrt(MSE)))
#         print('Avg Abs Dev on dataset: ' + str(avgAbsDev))


Training results 
RMSE on dataset: tensor([[34.9959]])
Avg Abs Dev on dataset: tensor([[30.1083]])

Validation results 
RMSE on dataset: tensor([[37.3590]])
Avg Abs Dev on dataset: tensor([[31.7942]])


In [34]:
%matplotlib qt5
ax = plt.figure()
plt.plot(range(0, max_epochs + learnFreq, learnFreq), epoch_training_loss, range(0, max_epochs + learnFreq, learnFreq), epoch_val_loss)
plt.legend(['Training', 'Validation'])
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.xticks(range(0,max_epochs+10,10))

([<matplotlib.axis.XTick at 0x7f4dc41331c0>,
  <matplotlib.axis.XTick at 0x7f4dc4133190>,
  <matplotlib.axis.XTick at 0x7f4dc4157f40>,
  <matplotlib.axis.XTick at 0x7f4dc3fbc340>,
  <matplotlib.axis.XTick at 0x7f4dc3fbc850>,
  <matplotlib.axis.XTick at 0x7f4dc3fb4850>,
  <matplotlib.axis.XTick at 0x7f4dc3fbccd0>,
  <matplotlib.axis.XTick at 0x7f4dc3fc1220>,
  <matplotlib.axis.XTick at 0x7f4dc3fc1730>,
  <matplotlib.axis.XTick at 0x7f4dc3fc1c70>,
  <matplotlib.axis.XTick at 0x7f4dc3fc61c0>,
  <matplotlib.axis.XTick at 0x7f4dc3fc66d0>,
  <matplotlib.axis.XTick at 0x7f4dc3fc6be0>,
  <matplotlib.axis.XTick at 0x7f4dc3fcd130>,
  <matplotlib.axis.XTick at 0x7f4dc3fc6940>,
  <matplotlib.axis.XTick at 0x7f4dc3fc19a0>,
  <matplotlib.axis.XTick at 0x7f4dc3fbcb80>,
  <matplotlib.axis.XTick at 0x7f4dc3fcd820>,
  <matplotlib.axis.XTick at 0x7f4dc3fcdd30>,
  <matplotlib.axis.XTick at 0x7f4dc3f54280>,
  <matplotlib.axis.XTick at 0x7f4dc3f54790>,
  <matplotlib.axis.XTick at 0x7f4dc3f54ca0>,
  <matplot

In [30]:
torch.cuda.memory_allocated()

570999808

In [31]:
torch.cuda.max_memory_allocated()

2490945536

In [32]:
torch.cuda.memory_reserved()

1505755136

In [33]:
torch.cuda.max_memory_reserved()

3214934016