In [1]:
# Torch imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.cuda as cuda
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from collections import OrderedDict

dtype = torch.FloatTensor
# Cuda active?
if cuda.is_available():
    dtype = torch.cuda.FloatTensor
    print("GPU: {}".format(cuda.get_device_name(0)))
else:
    print("Cuda unavailable.")

GPU: GeForce GTX 1080 Ti


In [2]:
# Load data
import csv, numpy as np
import os
import OpenEXR as exr, Imath
from PIL import Image
from skimage.transform import resize
####################
# Helper functions #
####################

def readEXRPatch(filepath, channelrange, i, j, shape):
    """Helper function for reading .exr files from the KAIST dataset.
    """
    pt = Imath.PixelType(Imath.PixelType.HALF)
    file = exr.InputFile(filepath)
#     print(file.header())
    channels = ["w{}nm".format(wavelength) for wavelength in channelrange]
    imgstrs = file.channels(channels, pt, i, i + shape[0] - 1)
    out = np.zeros(shape + (len(channelrange),), dtype=np.float32)
    for i, imgstr in enumerate(imgstrs):
        red = np.frombuffer(imgstr, dtype=np.float16)
        red = np.reshape(red, (shape[0], -1))
        out[:,:,i] = red[:,j:j+shape[1]]
    return out

def readPNGFiles(filedir, filename, channelrange, minwavelength, increment, i, j, shape):
    """Helper function for reading patches from the set of png files from the CAVE dataset.
    """
    indexes = [int((wavelength-minwavelength)/increment + 1) for wavelength in channelrange]
    patch = np.zeros(shape + (len(channelrange),))
    for i, index in enumerate(indexes):
        img = Image.open(os.path.join(filedir, "{}_{:02}.png".format(filename, index)))
        # Divide by max val of np.uint16 to normalize image
        x = np.array(img, dtype=np.float32)
        x = x[i:i+shape[0], j:j+shape[1]]
        patch[:,:,i] = x/np.iinfo(np.uint16).max
    return patch

###########
# Dataset #
###########
# For reference, the csv field names: 
# fieldnames = ["type", "dir", "name", "row", "col", "side", "scale", "flip"]

class HyperspectralDataset(Dataset):
    def __init__(self, csvfile, minwavelength, maxwavelength, transform=None):
        """Open and load the lines of the csvfile."""
        self.minwavelength = minwavelength
        self.maxwavelength = maxwavelength
        self.datahandles = []
        with open(csvfile, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                self.datahandles.append(row)
    
    def __len__(self):
        return len(self.datahandles)

    def __getitem__(self, idx):
        # Unpack data entry:
        entry = self.datahandles[idx]
        filetype = entry["type"]
        entry["side"] = int(entry["side"])
        entry["scale"] = float(entry["scale"])
        entry["row"] = int(entry["row"])
        entry["col"] = int(entry["col"])
        if filetype == "exr":
#             print(entry)
            # Read reflectance file
            filepath = os.path.join(entry["dir"], entry["name"])
            patch = readEXRPatch(filepath, range(self.minwavelength, self.maxwavelength+1, 10),
                                 entry["row"], entry["col"], 
                                 (int(entry["side"]/entry["scale"]),
                                  int(entry["side"]/entry["scale"])
                                 )
                                )
            
        elif filetype == "png":
            # Read all the png files
            print("duh")
            patch = readPNGFiles(entry["dir"], entry["name"], range(self.minwavelength, self.maxwavelength+1, 10),
                                 400, 10, 
                                 entry["row"], entry["col"],
                                 (int(entry["side"]/entry["scale"]),
                                  int(entry["side"]/entry["scale"])
                                 )                                
                                )
        else:
            raise ValueError("Invalid data entry at row {} - Cannot load data of type '{}'.".format(idx, dtype))
            
        # Resizing
        anti_aliasing = True if entry["scale"] < 1 else False
        patch = resize(patch, (entry["side"], entry["side"], patch.shape[2]), mode="constant")
            
        # Flip if necessary:
        if entry["flip"]:
            patch = np.flip(patch, axis = 1) # Horizontal flip
        # Numpy images are (H, W, C)
        # But torch needs (C, H, W)
        patch = patch.transpose(2, 0, 1)
        patch = torch.from_numpy(patch.copy()).type(dtype)

        return patch





In [None]:
# Build model
# Hyperparameters
height = 96
width = 96
inchannels = 29
outchannels = 64
filtersize = 3
nlayers = 11
samepad = int((filtersize-1)/2)

###########
# Encoder #
###########
encoderLayers = [nn.Conv2d(inchannels, outchannels, filtersize, padding=samepad)]
for i in range(nlayers):
    encoderLayers += [nn.ReLU(), nn.Conv2d(outchannels, outchannels, filtersize, padding=samepad)]
Encoder = nn.Sequential(*encoderLayers)

###########
# Decoder #
########### 
decoderLayers = [] 
for i in range(nlayers-1):
    decoderLayers += [nn.ReLU(), nn.Conv2d(outchannels, outchannels, filtersize, padding=samepad)]
decoderLayers += [nn.ReLU(), nn.Conv2d(outchannels, inchannels, filtersize, padding=samepad), nn.ReLU()]
Decoder = nn.Sequential(*decoderLayers)

###############
# Autoencoder #
###############
Autoencoder = nn.Sequential(OrderedDict([
    ("Encoder", Encoder),
    ("Decoder", Decoder)])
)

# Activate Cuda
if cuda.is_available():
    Autoencoder.cuda()
# Extract layer weights for regularization
weights = []
for layer in Autoencoder[0]:
    try:
        weights.append(layer.weight)
    except:
        pass
for layer in Autoencoder[1]:
    try:
        weights.append(layer.weight)
    except:
        pass


In [None]:
# Train Model
lam = 1e-8 # Weight decay parameter for L2 regularization
learning_rate = 1e-4
num_epochs = 60
batch_size = 64

########
# Data #
########
train_data = HyperspectralDataset("data/kaist_set/kaist_train.csv", 420, 700)
# dataset[286]
print(len(train_data))
train_data = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)

# Optimizer
optimizer = optim.Adam(Autoencoder.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40], gamma=0.1)

# Loss
loss = nn.MSELoss()

# Checkpointing
best_loss = torch.FloatTensor([float('inf')])

for epoch in range(num_epochs):
    print("epoch: {}".format(epoch))
    scheduler.step()
    for it, data in enumerate(train_data):
        
#         print(data)
        data = Variable(data, requires_grad=False)
        # New batch
        scheduler.optimizer.zero_grad()
        outimage = Autoencoder(data)
        out = 0.5*loss(outimage, data)
        for weight in weights:
            out += lam*weight.norm()
        out.backward()
        scheduler.optimizer.step()
        if (it % 10):
            print("\titeration: {}\tloss: {}".format(it, out))
        # Checkpointing
            # Get bool not ByteTensor
    is_best = bool(acc.numpy() > best_accuracy.numpy())
    # Get greater Tensor to keep track best acc
    best_accuracy = torch.FloatTensor(max(acc.numpy(), best_accuracy.numpy()))
    # Save checkpoint if is a new best
    save_checkpoint({
        'epoch': start_epoch + epoch + 1,
        'state_dict': model.state_dict(),
        'best_accuracy': best_accuracy
    }, is_best)

10080
epoch: 0
	iteration: 0
	iteration: 10
	iteration: 20
	iteration: 30
	iteration: 40
	iteration: 50
	iteration: 60
	iteration: 70
	iteration: 80
	iteration: 90
	iteration: 100
	iteration: 110
	iteration: 120
	iteration: 130
	iteration: 140
	iteration: 150
epoch: 1
	iteration: 0
	iteration: 10
	iteration: 20
	iteration: 30
	iteration: 40
	iteration: 50
	iteration: 60
	iteration: 70


In [None]:
def save_checkpoint(state, is_best, filename='/output/checkpoint.pth.tar'):
    """Save checkpoint if a new best is achieved"""
    if is_best:
        print ("=> Saving a new best")
        torch.save(state, filename)  # save checkpoint
    else:
        print ("=> Validation Accuracy did not improve")
        


In [None]:
a = Variable(cuda.FloatTensor([[1,2],[3,4]]))
b = Variable(cuda.FloatTensor([[4, 3], [2, 1]]))
loss = 0.5*LossFn(a, b)
print(type(loss))
weights = []
Autoencoder.cuda()
for layer in Autoencoder[0]:
    try:
        weights.append(layer.weight)
    except:
        pass
for layer in Autoencoder[1]:
    try:
        weights.append(layer.weight)
    except:
        pass
for weight in weights:
    loss += weight.norm()
print(loss)
loss.backward()
print(id(weights[0]))
print(id(Autoencoder[0][0].weight))


In [None]:
layer = nn.Conv2d(3, 2, 3, 1)
layer2 = nn.ReLU()
# print(list(layer.parameters())[0])
# print(list(layer2.parameters()))
for i in Autoencoder[0]:
    if 

In [None]:
# Organize output

In [None]:
cuda

In [None]:
epoch

In [None]:
red = readEXRPatch("data/KAIST/scene01_reflectance.exr", range(420, 721, 10), 1000, 1000, (96, 96))
red[:,:,0]

In [None]:
from PIL import Image
import os

filedir = "data/CAVE/"
filename="balloons_ms"
i = 3
img = Image.open(os.path.join(filedir, filename, filename, "{}_{:02}.png".format(filename, i)))
x = readPNGFiles(filedir, filename, range(400, 701, 10), 400, 10, 30, 30, (96, 96))

In [None]:
x.shape

In [None]:
x.shape

In [None]:
y = x/np.iinfo(np.uint16).max

In [None]:
2167/65535


In [None]:
z = np.array([[1, 2], [3, 4]])

In [None]:
np.flip(z, axis=1)

In [None]:
int(3)/int(1)


In [None]:
import torchvision

In [None]:
torchvision.transforms.Resize(x)

In [None]:
x.shape

In [None]:
help(Image.fromarray)