In [0]:
from __future__ import print_function, division
import os
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import torchvision

device = "cuda"

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
data_path = 'content/drive/My Drive'
load_path = 'sample_data'

In [0]:
!cp '/content/drive/My Drive/data.zip' /content
!unzip data.zip

In [0]:
def dict_to_device(dictionary, device):
    for k,v in dictionary.items():
        dictionary[k] = v.to(device)
    return dictionary

In [0]:
class Killer_Whale_Dataset(Dataset):
    # this is a class for the Killer whale dataset
    
    #first of all override the __init__() method.
    def __init__(self, data_folder,transform = None):
    	# super() method is to use the method in its parent class
        super().__init__()
        self.folder_list = os.listdir(data_folder)
        self.img_path = os.path.join(data_folder,'img/')
        self.mask_path = os.path.join(data_folder,'mask/')
        self.img_list = sorted(os.listdir(self.img_path))
        self.mask_list = sorted(os.listdir(self.mask_path))
        self.transform = transform

    def __getitem__(self,idx):
        self.img = Image.open(self.img_path+self.img_list[idx]).convert('RGB')
        self.mask = Image.open(self.mask_path+self.mask_list[idx]).convert('RGB')
        if self.transform:
            self.img = self.transform(self.img)
            self.img = nn.AdaptiveAvgPool2d((224,224))(self.img)
            self.mask = self.transform(self.mask)
            self.mask = nn.AdaptiveAvgPool2d((224,224))(self.mask)
        

        sample = {'img':self.img,'mask':self.mask}

        
            
        
        return sample
    def __len__(self):
    	return len(self.img_list)

In [0]:
def Tensor2Image(t):
    trans = transforms.ToPILImage()
    img = trans(t[0,:,:,:])
    return img



class AE(nn.Module):
    def __init__(self):
        super(AE, self).__init__()
        
        
        self.conv2a = nn.Conv2d(in_channels=3, out_channels=128,kernel_size=3,stride = 2)
        self.conv2b = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.conv2c = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.conv2d = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.conv2e = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.conv2f = nn.Conv2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1a = nn.Linear(128,80)
        self.fc1b = nn.Linear(80,40)

        self.fc2a = nn.Linear(40,80)
        self.fc2b = nn.Linear(80,128)

        self.convtrans2a = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=2,stride = 2)
        self.convtrans2b = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=4,stride = 2)
        self.convtrans2c = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.convtrans2d = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.convtrans2e = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.convtrans2f = nn.ConvTranspose2d(in_channels=128, out_channels=128,kernel_size=3,stride = 2)
        self.convtrans2g = nn.ConvTranspose2d(in_channels=128, out_channels=3,kernel_size=4,stride = 2)
        
        ## Here, we should define some smart layers
    def encode(self, dictionary):
        ## Use Deep NN to encode the image

        x = dictionary['img']
        batch_size = x.shape[0]
        
        e1 = nn.ReLU()(self.conv2a(x))
        e2 = nn.ReLU()(self.conv2b(e1))
        e3 = nn.ReLU()(self.conv2c(e2))
        e4 = nn.ReLU()(self.conv2d(e3))
        e5 = nn.ReLU()(self.conv2e(e4))
        e6 = nn.ReLU()(self.conv2f(e5))
        j1 = self.pool(e6)
        j1 = j1.view(batch_size,-1)
        j2 = nn.ReLU()(self.fc1a(j1))
        latent_info = self.fc1b(j2)
        return latent_info
    
    def decode(self, latent_info):


        ## use the NN to decode to mask
        batch_size = latent_info.shape[0]
        
        h1 = nn.ReLU()(self.fc2a(latent_info))
        h2 = self.fc2b(h1) 
        y = h2.view(batch_size,-1,1,1)

        d1 = nn.ReLU()(self.convtrans2a(y))
        
        d2 = nn.ReLU()(self.convtrans2b(d1))
        
        d3 = nn.ReLU()(self.convtrans2c(d2))
        
        d4 = nn.ReLU()(self.convtrans2d(d3))
        d5 = nn.ReLU()(self.convtrans2e(d4))
        d6 = nn.ReLU()(self.convtrans2f(d5))
        d7 = self.convtrans2g(d6)
        
        
        
        return {'img': d7}

    def forward(self, dictionary):
        latent_info = self.encode(dictionary)        
        poly_dict = self.decode(latent_info)
        return poly_dict

In [0]:
transform = transforms.Compose([transforms.ToTensor()])   

whale_path = './data'


whale_data = Killer_Whale_Dataset(whale_path,transform = transform)
print(whale_data[0]['img'].size())
ind_val = list(range(0,1))
ind_train = list(range(1,len(whale_data)))
print(len(whale_data))

val_set = torch.utils.data.Subset(whale_data,ind_val)
train_set = torch.utils.data.Subset(whale_data,ind_train)


train_loader = torch.utils.data.DataLoader(train_set,batch_size = 5,shuffle = True,drop_last = False)
val_loader = torch.utils.data.DataLoader(val_set,batch_size = 1,shuffle = True,drop_last = False)
print(len(val_set))
print(len(train_loader))

net_test = AE().cuda()

## Need to figure out what loss and optimizer to use
loss_fn = torch.nn.MSELoss()
optimizer = optim.Adam(net_test.parameters(), lr=0.001)

In [0]:
from IPython import display
losses = []

fig=plt.figure(figsize=(15, 5), dpi= 60, facecolor='w', edgecolor='k')
axes=fig.subplots(1,3)

for epoch in range(500):
    iterator = iter(train_loader)
   
    for i in range(len(train_loader)):
        batch = next(iterator)
        dict_to_device(batch, device)
        preds = net_test(batch)
        loss = loss_fn(preds['img'], batch['mask'])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        
    for ax in axes:
        ax.cla() 
        print(preds['img'].size())
        axes[0].imshow(Tensor2Image(preds['img'].cpu()))
        axes[0].set_title('good to see?')
        axes[1].imshow(Tensor2Image(batch['mask'].cpu()))
        axes[1].set_title('ground truth')
        axes[2].plot(losses)
        axes[2].set_yscale('log')
        axes[2].set_xlabel("distance")
        axes[2].set_title('Training loss') 
        display.clear_output(wait=True)
        display.display(plt.gcf())
        print("Plot after epoch {} (iteration {})".format(epoch, len(losses))) 

display.display(plt.gcf())

In [0]:
losses1 = []
fig1=plt.figure(figsize=(15, 5), dpi= 60, facecolor='w', edgecolor='k')
axes1=fig1.subplots(1,3)
iterator1 = iter(val_loader)
for j in range(len(val_loader)):
        batch1 = next(iterator1)
        dict_to_device(batch1, device)
        preds1 = net_test(batch1)
        loss = loss_fn(preds1['img'], batch1['mask'])
        print(loss)
        losses1.append(loss.item())


        for ax in axes1:
                ax.cla()
                axes1[0].imshow(Tensor2Image(preds1['img'].cpu()))
                axes1[1].imshow(Tensor2Image(batch1['img'].cpu()))
                axes1[2].imshow(Tensor2Image(batch1['mask'].cpu()))
display.display(plt.gcf())