In [13]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from natsort import natsorted
from torchvision import models
from torchsummary import summary
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
import os

In [43]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()

        self.conv1_1 = nn.Conv2d(3, 64,kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding = 1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding = 1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding = 1)

        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.conv3_1_1 = nn.Conv2d(256, 64, kernel_size=1, padding=1)
        self.conv3_2_1 = nn.Conv2d(256, 64, kernel_size=1, padding=1)
        self.conv3_3_1 = nn.Conv2d(256, 64, kernel_size=1, padding=1)
        self.conv3_1_3 = nn.Conv2d(64, 12, kernel_size = 1, padding = 1)
        self.conv3_1_4 = nn.Conv2d(12, 1, kernel_size = 1, padding=1)
        self.conv3_1_5 = nn.Conv2d(1, 1, kernel_size=1, padding=1)
        
        self.conv5_1_1 = nn.Conv2d(512, 64, kernel_size=1, padding=1)
        self.conv5_2_1 = nn.Conv2d(512, 64, kernel_size=1, padding=1)
        self.conv5_3_1 = nn.Conv2d(512, 64, kernel_size=1, padding=1)

        
        self.pool = nn.MaxPool2d(2, 2)
        self.fc6 = nn.Linear(7*7*512, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 1000)

    def forward(self, x, training=True):
        x_1_1 = F.relu(self.conv1_1(x))
        x_1_2 = F.relu(self.conv1_2(x_1_1))
        x_1_pool = self.pool(x_1_2)

        x_2_1 = F.relu(self.conv2_1(x_1_pool))
        x_2_2 = F.relu(self.conv2_2(x_2_1))
        x_2_pool = self.pool(x_2_2)

        x_3_1 = F.relu(self.conv3_1(x_2_pool))
        x_3_2 = F.relu(self.conv3_2(x_3_1))
        x_3_3 = F.relu(self.conv3_3(x_3_2))
        x_3_pool = self.pool(x_3_3)

        x_4_1 = F.relu(self.conv4_1(x_3_pool))
        x_4_2 = F.relu(self.conv4_2(x_4_1))
        x_4_3 = F.relu(self.conv4_3(x_4_2))
        x_4_pool = self.pool(x_4_3)

        x_5_1 = F.relu(self.conv5_1(x_4_pool))
        x_5_2 = F.relu(self.conv5_2(x_5_1))
        x_5_3 = F.relu(self.conv5_3(x_5_2))
        x_5_pool = self.pool(x_5_3)

        x_3_1_1 = F.relu(self.conv3_1_1(x_3_1))
        x_3_2_1 = F.relu(self.conv3_2_1(x_3_2))
        x_3_3_1 = F.relu(self.conv3_3_1(x_3_3))

        x_3_1_2 = x_3_1_1 + x_3_2_1 + x_3_3_1

        x_5_1_1 = F.relu(self.conv5_1_1(x_5_1))
        x_5_2_1 = F.relu(self.conv5_2_1(x_5_2))
        x_5_3_1 = F.relu(self.conv5_3_1(x_5_3))


        x_3_1_2 = x_3_1_1 + x_3_2_1 + x_3_3_1
        x_5_1_2 = x_5_1_1 + x_5_2_1 + x_5_3_1
        x_3_1_3 = F.relu(self.conv3_1_3(x_3_1_2))

        #x = x.view(-1, 7 * 7 * 512)
        #x = F.relu(self.fc6(x))
        #x = F.dropout(x, 0.5, training=training)
        #x = F.relu(self.fc7(x))
        #x = F.dropout(x, 0.5, training=training)
        #x = self.fc8(x)
        return x_3_1_3

    def predict(self, x):
        x = F.softmax(self.forward(x, training=False))

In [44]:
img_dims_width = 512
img_dims_height = 512

class Dataset(BaseDataset):

    CLASSES = ['non-crack', 'crack']
    def __init__(self, images_dir, masks_dir, classes=['crack'], transform=None, n_classes=2):
        self.train_ids = natsorted(next(os.walk(images_dir))[2])
        self.mask_ids = natsorted(next(os.walk(masks_dir))[2])

        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.train_ids]
        self.masks_fps = [os.path.join(masks_dir, mask_id) for mask_id in self.mask_ids]

        self.class_values= [self.CLASSES.index(cls.lower()) for cls in classes]
        self.transform = transform

        self.img_rows = img_dims_width
        self.img_cols = img_dims_height
        self.img_width = img_dims_width
        self.img_height = img_dims_height

    def __len__(self):
        return len(self.train_ids)

    def __getitem__(self, i):

        image = cv2.imread(self.images_fps[i])
        mask = cv2.imread(self.masks_fps[i],0)

        image = cv2.resize(image, dsize=(self.img_width, self.img_height))
        mask = cv2.resize(mask, dsize=(self.img_width, self.img_height))
        if self.transform:
            img, mask = self.transform(image), self.transform(mask)
        masks = [(mask==v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')
        image = torch.from_numpy(image).permute(2,0,1).float()
        mask = torch.from_numpy(mask).permute(2,0,1).float()
        return image, mask

In [45]:
if __name__ == "__main__":

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    DATA_DIR = "/media/preethamam/Utilities-SSD/Xtreme_Programming/Z_Data/DLCrack/Liu+Xincong+DS3+CrackSegNet"
    images_dir = os.path.join(DATA_DIR , "TrainingCracks")
    masks_dir = os.path.join(DATA_DIR , "TrainingCracksGroundtruth")
    train_data = Dataset(images_dir=images_dir, masks_dir = masks_dir)
    training_dataloader = DataLoader(train_data, batch_size=8, shuffle = True)
    model = VGG().to(device)
    summary(model,input_size = (3,448,448))
    #for i, data in enumerate(training_dataloader,0):
        #inputs, labels = data
        #optimizer.zero_grad()
        #outputs = model(inputs)
        #loss = criterion(outputs, labels)
        #loss.backward()
        #optimizer.step()

        #running_loss = loss.item()
        #if i % 2000
    #print("Finished Training")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 448, 448]           1,792
            Conv2d-2         [-1, 64, 448, 448]          36,928
         MaxPool2d-3         [-1, 64, 224, 224]               0
            Conv2d-4        [-1, 128, 224, 224]          73,856
            Conv2d-5        [-1, 128, 224, 224]         147,584
         MaxPool2d-6        [-1, 128, 112, 112]               0
            Conv2d-7        [-1, 256, 112, 112]         295,168
            Conv2d-8        [-1, 256, 112, 112]         590,080
            Conv2d-9        [-1, 256, 112, 112]         590,080
        MaxPool2d-10          [-1, 256, 56, 56]               0
           Conv2d-11          [-1, 512, 56, 56]       1,180,160
           Conv2d-12          [-1, 512, 56, 56]       2,359,808
           Conv2d-13          [-1, 512, 56, 56]       2,359,808
        MaxPool2d-14          [-1, 512,