In [None]:
## Batch Size
batch_size = 6

## Learning Rate
learning_rate_1 = 0.0001
learning_rate_2 = 0.00001


jaccard_weight = 0.3

# Epochs (Consider setting high and implementing early stopping)
num_epochs_1 = 10
num_epochs_2 = 20

In [None]:
## Standard Library
import os
import json

## External Libraries
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torch.autograd import Variable
import torch.nn.functional as functional
from torch.utils.data import Dataset, DataLoader
from skimage import io
import matplotlib.pyplot as plt
import albumentations as A

In [None]:
data_dir = "D:/Documents/Masters/Spring 2023/Deep Learning/DL Project/Training data/"

parts_json = f"{data_dir}/parts.json
instrument_json = f"{data_dir}/instrument.json

In [None]:
## Image Transforms
train_transform = A.Compose([
    A.PadIfNeeded(min_height =1024, min_width =1280, p =1),
    A.RandomCrop(height = 1024, width = 1280, p =1),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.Normalize(p=1),
    A.ToTensorV2()
])

val_transform = A.Compose([
    A.PadifNeeded(min_height =1024, min_width = 1280, p =1),
    CenterCrop(height = 1024, width = 1280, p =1),
    A.Normalize(p=1),
    A.ToTensorV2()
    
])

## Image Dataloader
class ImageDataset(Dataset):
    
    """
    ImageDataset
    """
    
    def __init__(self,
                 input_dir,
                 op,
                 op2,
                 mask_json_path,
                 transforms=None):
        """
        
        Args:
            input_dir (str): Path to either colorization or segmentation directory
            op (str): One of "parts", or "instrument" signifying the desired training
            op2 (str): 
            mask_json_path (str): Path to mapping.json file
            transforms (list or None): Image transformations to apply upon loading.
        """
        self.transform = transforms
        self.op = op
        with open(mask_json_path, 'r') as f:
            self.mask = json.load(f)
        self.mask_num = len(self.mask)
        self.mask_value = [value for value in self.mask.values()]
        self.mask_value.sort()
        try:
            if self.op == 'parts':
                self.data_dir = os.path.join(input_dir, 'parts/')
            elif self.op == 'instrument':
                self.data_dir = os.path.join(input_dir, 'instruments/')
        except ValueError:
            print('not a valid training set')
            
        try:
            if self.op == "train":
                self.data_dir = os.path.join(input_dir, 'train/')
            if self.op == "val":
                self.data_dir = os.path.join(input_dir, 'val')
        except ValueError:
            print('not a valid split')

    def __len__(self):
        """
        
        """
        data_dir2 = os.path.join(self.data_dir, 'raw/')
        file = glob.glob1(data_dir2, "*.png")
        return len(file)

    def __getitem__(self,
                    idx):
        """
        
        """
        ## Load Image and Parse Properties
        img_name = str(idx) + '.png'
        mask_name = str(idx) + '_class.png'
        img = io.imread(os.path.join(self.data_dir, 'raw/', img_name))
        mask = io.imread(os.path.join(self.data_dir, 'masks/',mask_name))
        if len(mask.shape) == 2:
            h, w  = mask.shape
        elif len(mask.shape) == 3:
            h, w, c = mask.shape
        h_new, w_new = 1024, 1280
        h_start, w_start = 28,320
        
        #crop images
        img = img[h_start: h_start + h_new, w_start: w_start + w_new]
        mask = mask[h_start: h_start + h_new, w_start: w_start + w_new]
        ## Convert grey-scale label to one-hot encoding
        new_mask = np.zeros((h_new, w_new, self.mask_num))
        for idx in range(self.mask_num):
            #if the mask has 3 dimension use this code
            new_mask[:, :, idx] = mask[:,:,0] == self.mask_value[idx]
        ## Transform image and mask
        if self.transform:
            img, mask = self.img_transform(img, new_mask)
       
        return img, mask

    def img_transform(self,
                      img,
                      mask):
        """
        
        """
        ## Apply Transformations to Image and Mask
        img = self.transform(img)
        mask = self.transform(mask)
        return img, mask

In [None]:
class LossMulti:
    def __init__(self, jaccard_weight=0, class_weights=None, num_classes=1):
        if class_weights is not None:
            nll_weight = utils.cuda(
                torch.from_numpy(class_weights.astype(np.float32)))
        else:
            nll_weight = None
        self.nll_loss = nn.NLLLoss2d(weight=nll_weight)
        self.jaccard_weight = jaccard_weight
        self.num_classes = num_classes

    def __call__(self, outputs, targets):
        loss = (1 - self.jaccard_weight) * self.nll_loss(outputs, targets)

        if self.jaccard_weight:
            eps = 1e-15
            for cls in range(self.num_classes):
                jaccard_target = (targets == cls).float()
                jaccard_output = outputs[:, cls].exp()
                intersection = (jaccard_output * jaccard_target).sum()

                union = jaccard_output.sum() + jaccard_target.sum()
                loss -= torch.log((intersection + eps) / (union - intersection + eps)) * self.jaccard_weight
        return loss

In [None]:
class UNet16(nn.Module):
    def __init__(self, num_classes=1, num_filters=32, pretrained=False):
        """
        :param num_classes:
        :param num_filters:
        :param pretrained:
            False - no pre-trained network used
            True - encoder pre-trained with VGG11
        """
        super().__init__()
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = torchvision.models.vgg16(pretrained=pretrained).features

        self.relu = nn.ReLU(inplace=True)

        self.conv1 = nn.Sequential(self.encoder[0],
                                   self.relu,
                                   self.encoder[2],
                                   self.relu)

        self.conv2 = nn.Sequential(self.encoder[5],
                                   self.relu,
                                   self.encoder[7],
                                   self.relu)

        self.conv3 = nn.Sequential(self.encoder[10],
                                   self.relu,
                                   self.encoder[12],
                                   self.relu,
                                   self.encoder[14],
                                   self.relu)

        self.conv4 = nn.Sequential(self.encoder[17],
                                   self.relu,
                                   self.encoder[19],
                                   self.relu,
                                   self.encoder[21],
                                   self.relu)

        self.conv5 = nn.Sequential(self.encoder[24],
                                   self.relu,
                                   self.encoder[26],
                                   self.relu,
                                   self.encoder[28],
                                   self.relu)

        self.center = DecoderBlock(512, num_filters * 8 * 2, num_filters * 8)

        self.dec5 = DecoderBlock(512 + num_filters * 8, num_filters * 8 * 2, num_filters * 8)
        self.dec4 = DecoderBlock(512 + num_filters * 8, num_filters * 8 * 2, num_filters * 8)
        self.dec3 = DecoderBlock(256 + num_filters * 8, num_filters * 4 * 2, num_filters * 2)
        self.dec2 = DecoderBlock(128 + num_filters * 2, num_filters * 2 * 2, num_filters)
        self.dec1 = ConvRelu(64 + num_filters, num_filters)
        self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

    def forward(self, x):
        conv1 = self.conv1(x)
        conv2 = self.conv2(self.pool(conv1))
        conv3 = self.conv3(self.pool(conv2))
        conv4 = self.conv4(self.pool(conv3))
        conv5 = self.conv5(self.pool(conv4))

        center = self.center(self.pool(conv5))

        dec5 = self.dec5(torch.cat([center, conv5], 1))

        dec4 = self.dec4(torch.cat([dec5, conv4], 1))
        dec3 = self.dec3(torch.cat([dec4, conv3], 1))
        dec2 = self.dec2(torch.cat([dec3, conv2], 1))
        dec1 = self.dec1(torch.cat([dec2, conv1], 1))

        if self.num_classes > 1:
            x_out = F.log_softmax(self.final(dec1), dim=1)
        else:
            x_out = self.final(dec1)

        return x_out

In [None]:
## Initialize your unet
n_classes = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UNet16(n_classes)
model.to(device)

## Initialize Dataloaders
train_dataset=ImageDataset(input_dir=data_dir, op="parts", op2="train",  mask_json_path=parts_json, 
                           transforms=train_transform)

validation_dataset=ImageDataset(input_dir=segmentation_data_dir, op="parts", op2 = "val", mask_json_path=mask_json, 
                                transforms=val_transform)




train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=validation_batch_size, shuffle=False)







## Initialize Optimizer and Learning Rate Scheduler
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate_1)

criterion = LossMulti(num_class = n_classes, jaccard_weight = jaccard_weight)
print("Start Training...")

train_loss = []
val_loss = []



best_loss = 1e99

# last_loss = 100
# patience = 3
# trigger = 0

for epoch in range(num_epochs):
    ########################### Training #####################################
    print("\nEPOCH " +str(epoch+1)+" of "+str(num_epochs)+"\n")
    # TODO: Design your own training section
    model.train()
    running_loss = 0
    for images,lables in train_dataloader:
        i_batch_size = images.size()[0]
        images = images.to(device)
        lables = lables.to(device)
        output = model(images)
        loss = criterion(output, lables)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print("Training", epoch+1, "finished, loss:",loss.item())
    train_loss[epoch] = loss.item()
    
    ########################### Validation #####################################
    # TODO: Design your own validation section
    
    model.eval()
    
    with torch.no_grad():
        running_vloss = 0.0
        for i, (data) in enumerate(validation_dataloader):
            images, lables = data
            images = images.to(device)
            lables = lables.to(device)
            
            val_out = model(images)
            vloss = criterion(val_out, lables)
            running_vloss += vloss
        
        avg_vloss = running_vloss/(i+1)
        
        if avg_vloss < best_loss:
            best_vloss = avg_vloss
            torch.save(model.state_dict(), "best-parameters.pt")
        
        
            
       
    val1a_loss[epoch] = avg_vloss.item()
    
    
#     if(abs(avg_vloss.item()-last_loss) <0.005):
#         trigger += 1
#     else:
#         trigger = 0
        
    
    
    
    print("Validation", epoch+1, "finished, val_loss, last_loss:", avg_vloss.item(), last_loss)

    last_loss = avg_vloss.item()
#     print("trigger:", trigger)
#     if trigger >= patience:
#         if(abs(loss.item()-avg_vloss.item() < 0.01)):
#             break
    

    
