In [16]:
import torch
import torch.nn as nn
import time
import torch.nn.functional as F
from PIL import Image
import torchvision, os
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import torchvision.datasets as dset
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
import copy

In [22]:
## Data-batching
data_dir = "data/"

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [19]:
class VGG16(nn.Module):
    def __init__(self, out_dim, *args, **kwargs):
        super(VGG16, self).__init__(*args, **kwargs)
        vgg16 = torchvision.models.vgg16(pretrained=True)
        self._up_kwargs = {'mode': 'bilinear', 'align_corners': True}
        self.features = vgg16.features
        self.conv1x1 = nn.Conv2d(512, out_dim, 1,1,0)
        self.deconv = nn.ConvTranspose2d(512, out_dim, kernel_size=32, stride=32, padding=0, output_padding=0)
    def forward(self, x):
        imsize = x.size()[2:]
        x =self.features(x)
        x = self.conv1x1(x)
        x = F.upsample(x, imsize, **self._up_kwargs)
        return x



In [20]:
vgg = VGG16(21)

In [29]:
im = Image.open('data/indoorCVPR_09annotations/Annotations/airport_inside/airport_inside_0001.xml')
im_matrix = np.array(im)
print(im_matrix[0][0])

UnidentifiedImageError: cannot identify image file 'data/indoorCVPR_09annotations/Annotations/airport_inside/airport_inside_0001.xml'

In [13]:
# import shutil, os 
# with open('data/TrainImages.txt') as ft:
#     traincontent = ft.readlines()

# trainlist = [x.strip() for x in traincontent]
# with open('data/TestImages.txt') as f:
#     testcontent = f.readlines()
# testlist = [x.strip() for x in testcontent]
# source_directory = 'data/indoorCVPR_09/Images/'
# dest_directory_test = 'data/test/'
# dest_directory_train = 'data/train/'


# listdir = os. listdir(source_directory)
# for subdir in listdir:
#     if not os.path.exists(dest_directory_train+subdir):
#         os.makedirs(dest_directory_train+subdir)
# for subdir in listdir:
#     if not os.path.exists(dest_directory_test+subdir):
#         os.makedirs(dest_directory_test+subdir)
# for filename in trainlist:
#     shutil.move(source_directory+filename,dest_directory_train+filename)
    
# for filename in testlist:
#     shutil.move(source_directory+filename,dest_directory_test+filename)

In [26]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    print(outputs.shape)
                    print(labels.shape)
                    loss = criterion(outputs[0], labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model



In [28]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg = vgg.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.SGD(vgg.conv1x1.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = StepLR(optimizer_conv, step_size=7, gamma=0.1)
vgg = train_model(vgg, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=5)

plt.ioff()
plt.show()

Epoch 0/4
----------
torch.Size([4, 21, 224, 224])
torch.Size([4])


RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1

In [10]:

palette = torch.tensor([2 ** 10 - 1, 2 ** 15 - 1,2 ** 21 - 1])


for filename in os.listdir(directory):
    input_image = Image.open(os.path.join(directory, filename))
    input_image = input_image.convert("RGB")
    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    
    with torch.no_grad():
        output = vgg(input_batch)

    output_predictions = output.argmax(1)
    colors = torch.as_tensor([i for i in range(21)])[:, None] * palette
    colors = (colors % 255).numpy().astype("uint8")


    # plot the semantic segmentation predictions of 21 classes in each color
    r = Image.fromarray(output_predictions[-1,:].byte().cpu().numpy()).resize(input_image.size)
    r.putpalette(colors)

    
    plt.figure()
    plt.imshow(input_image)
    plt.figure()
    plt.imshow(r)

IsADirectoryError: [Errno 21] Is a directory: 'data/indoorCVPR_09/Images/laundromat'