In [29]:
import os # To walk through the directories.
import numpy as np # To manipulate arrays.
import matplotlib.pyplot as plt

In [32]:
import cv2 # OpenCV to read images.
from patchify import patchify # To divide the images into smaller patches.
from PIL import Image # To perform cropping or resizing operations in an image.

images_path = "Dataset/MassachusettsBuildings/Images"

images_list = []
patch_size = 256 # To divide the images into 256 patch size.
image_files = os.listdir(images_path) # Gets a list of all the files in the path.
for image_file in image_files:
    image = cv2.imread(images_path + '/' + image_file, cv2.IMREAD_COLOR)  # Reads each image as BGR.
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Converts each image as RGB.
    image_dim = (0, 0, (image.shape[1]//patch_size)*patch_size, (image.shape[0]//patch_size)*patch_size) # Nearest size divisible by 256 (patch size).
    image = Image.fromarray(image) # Converts the image into a PIL image.
    image = image.crop(image_dim)  # Crops the image from the top left corner.
    image = np.array(image) # Converts the image into a numpy array.

    image_patches = patchify(image, (patch_size, patch_size, 3), step=patch_size)  # There is no overlap since the step=PatchSize.
    for i in range(image_patches.shape[0]):
        for j in range(image_patches.shape[1]):
            image_patch = image_patches[i,j] # Gets each patched image.
            
            # #Use minmaxscaler instead of just dividing by 255. 
            # single_patch_img = scaler.fit_transform(single_patch_img.reshape(-1, single_patch_img.shape[-1])).reshape(single_patch_img.shape
            # # single_patch_img = (single_patch_img.astype('float32')) / 255.

            image_patch = image_patch[0] # Drop the extra unecessary dimension that patchify adds.
            images_list.append(image_patch) # Adds each patched image into the dataset list.

images_list = np.array(images_list) # Converts the dataset into a numpy array.
print(f"Dimension of image data: {images_list.shape}.")

(3750, 256, 256, 3)


In [33]:
targets_path = "Dataset/MassachusettsBuildings/Targets"

targets_list = []
patch_size = 256 # To divide the images into 256 patch size.
target_files = os.listdir(targets_path) # Gets a list of all the files in the path.
for target_file in target_files:
    target = cv2.imread(targets_path + '/' + target_file, cv2.IMREAD_COLOR)  # Read each image as BGR.
    target = cv2.cvtColor(target, cv2.COLOR_BGR2RGB) # Converts each image as RGB.
    target_dim = (0, 0, (target.shape[1]//patch_size)*patch_size, (target.shape[0]//patch_size)*patch_size) # Nearest size divisible by 256 (patch size).
    target = Image.fromarray(target) # Converts the image into a PIL image.
    target = target.crop(target_dim)  # Crops the image from the top left corner.
    target = np.array(target) # Converts the image into a numpy array.

    target_patches = patchify(target, (patch_size, patch_size, 3), step=patch_size)  # Step = 256 for 256 patches means no overlap
    for i in range(target_patches.shape[0]):
        for j in range(target_patches.shape[1]):
            target_patch = target_patches[i,j] # targetPatches[i][j][:][:] # Gets each patched image.
            
            # #Use minmaxscaler instead of just dividing by 255. 
            # single_patch_img = scaler.fit_transform(single_patch_img.reshape(-1, single_patch_img.shape[-1])).reshape(single_patch_img.shape
            # targetPatch = targetPatch.astype("float32")/255.

            target_patch = target_patch[0] # Drop the extra unecessary dimension that patchify adds.                               
            targets_list.append(target_patch) # Adds each patched image into the dataset list.

targets_list = np.array(targets_list) # Converts the dataset into a numpy array.
print(f"Dimension of target data: {targets_list.shape}.")

(3750, 256, 256, 3)


In [34]:
targets_list = (targets_list[:,:,:,0]/255).astype("int8") # Converts into one hot encoded vector (just the first channel, no need for all 3 channels). 
targets_list = np.array(targets_list) # Converts the dataset into a numpy array.
targets_list = np.expand_dims(targets_list, axis=3) # Expands the dimension at axis 3 to match with the dataset.
print(f"Dimension of target labels: {targets_list.shape}.")

(3750, 256, 256, 1)


In [35]:
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

x_train, x_test, y_train, y_test = train_test_split(images_list, targets_list, test_size=0.1, random_state=375)

x_train = torch.from_numpy(x_train.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.int8))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.int8))

trainset = TensorDataset(x_train, y_train) # Creates your dataset.
testset = TensorDataset(x_test, y_test) # Creates your dataset.

print(f"Size of trainset after spliting the dateset: {len(trainset)}.")
print(f"Size of testset after spliting the dateset: {len(testset)}.")

batch_size = 32 # Hyperparameter.
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) # shuffle=True to reshuffle at every epoch.
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=4) # shuffle=True to reshuffle at every epoch.

print(f"Size of train loader (with batch size {batch_size}): {len(train_loader)}.") # ceil(len(trainset)/batch_size).
print(f"Size of test loader (with batch size {batch_size}): {len(test_loader)}.") # ceil(len(testset)/batch_size).

Size of trainset after spliting the dateset: 3375.
Size of testset after spliting the dateset: 375.
Size of train loader (with batch size 32): 106.
Size of test loader (with batch size 32): 12.


In [36]:
import torch.nn as nn
import torch.optim as optim
from UNET_Model.unet import Unet

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print (f"Device being used: {device}")

epochs = 10 # Hyperparameter.
lr = 0.001 # Hyperparameter.

model = Unet(in_channels=3, n_classes=1, is_batchnorm=True)
model = model.to(device=device) # Move the model parameters from CPU to GPU.

criterion = nn.BCELoss() # Loss function.
criterion = criterion.to(device=device) # Move the model parameters from CPU to GPU.

optimizer = optim.Adam(model.parameters(), lr=lr) # Hyperparameter.
# optimizer = optim.SGD(Model.parameters(), lr=LearningRate) # Hyperparameter.

cuda


In [None]:
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(epochs):
    running_loss = 0.0
    running_corrects = 0
    for i, (images, targets) in enumerate(train_loader):
        images = images.to(Device)
        targets = targets.to(Device)

        # Forward pass.
        predicts = model(targets)
        loss = criterion(predicts, targets)

        # Backward and optimize.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # statistics
        _, predicts = torch.max(predicts, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        if (i+1) % 10 == 0:
            print (f'Epoch: {epoch+1}/{epochs}, Step: {i+1}/{len(train_loader)}, Loss: {loss.item():.4f}')

In [None]:
print('Finished Training')
PATH = './unet.pth'
torch.save(Model.state_dict(), PATH)


In [None]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(Device)
        labels = labels.to(Device)
        outputs = Model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        for i in range(BatchSize):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

In [None]:
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network: {acc} %')

for i in range(10):
    acc = 100.0 * n_class_correct[i] / n_class_samples[i]
    print(f'Accuracy of {classes[i]}: {acc} %')