# Task 3: Helper notebook for loading the data and saving the predictions

In [2]:
import pickle
import gzip
import numpy as np
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

from skimage import io
from skimage.transform import resize
from skimage import img_as_bool

from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torchvision

from torch.utils.data import Dataset
import torchvision.transforms as T
import torchio as tio 
from PIL import Image
import random
seed =0
random.seed(seed)
torch.manual_seed(seed)


from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import time
import os
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# determine the device to be used for training and evaluation
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# initialize learning rate, number of epochs to train for, and the
# batch size
INIT_LR = 0.001
NUM_EPOCHS = 20
BATCH_SIZE = 16
# define the path to the base output directory
BASE_OUTPUT = "output"
# define the path to the output serialized model, model training
# plot, and testing image paths
MODEL_PATH = os.path.join(BASE_OUTPUT, "unet_tgs_salt.pth")
PLOT_PATH = os.path.sep.join([BASE_OUTPUT, "plot.png"])
TEST_PATHS = os.path.sep.join([BASE_OUTPUT, "test_paths.txt"])
image_size_w,image_size_h  = 128,128 

### Helper functions

In [4]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

In [5]:
def save_zipped_pickle(obj, filename):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, 2)

### Load data, make predictions and save prediction in correct format

In [6]:
# load data
train_data = load_zipped_pickle("train.pkl")
test_data = load_zipped_pickle("test.pkl")
samples = load_zipped_pickle("sample.pkl")

In [7]:
v = [] # v is video
l = [] # l is label lol
f = []
b = []

for i in train_data:
    v.append(i["video"])
for i in train_data:
    l.append(i["label"])
for i in train_data:
    f.append(i["frames"])

for i in train_data:
    b.append(i["box"])

In [8]:

# tensor_x = torch.Tensor(v2) # transform to torch tensor
# tensor_y = torch.Tensor(ls)

# my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
# my_dataloader = DataLoader(my_dataset)
#data augmentation 
#Rotation, scaling, translation, deformation grid, maybe sheering

#plt.imshow(b[0])
bs = []
ls = []
for i in range(len(v)):
    res = resize(b[i], (image_size_w,image_size_h))
    bs.append(res)
    for j in [0,1,2]:
         res = resize(l[i][j], (image_size_w,image_size_h))
         ls.append(res)
ls = np.array(ls)
bs = np.array(bs)
#print(res)
#plt.imshow(res)


In [9]:
ls.shape

(195, 128, 128)

In [10]:
import cv2 as cv
image_size_w,image_size_h  = 128,128 
v2 = []
v_eualised =[]
for index, video in enumerate(v):
    for i in f[index]:
        
        res = cv.resize(video[:,:,i], dsize=(image_size_w,image_size_h), interpolation=cv.INTER_CUBIC)
        res2 = cv.equalizeHist(res)
        #showcase = np.hstack((res,res2))
        # cv.imwrite("img.png",showcase)
        # break
        res = res/255
        res = res - res.mean()
        res2 = res2/255
        res2 = res2 - res2.mean()
        #print(res.mean())
        v2.append(res) 
        v_eualised.append(res2)


In [11]:
class ImageDataset(Dataset):
    def __init__(self, xs, ys,transform,target_trans):
        self.xs = xs
        self.ys = ys
        self.trans = transform
        self.target_trans = target_trans
    def __len__(self):
            return self.xs.shape[0]
    def __getitem__(self, idx):
            y =self.ys[idx] 
            if self.target_trans != None:
                y = self.target_trans(y)
            x = self.xs[idx]
            if self.trans != None:
                x = self.trans(x)

            return x,y

In [30]:
trans(tensor_x[0].unsqueeze(0)).shape

torch.Size([1, 1, 128, 128])

In [49]:
tensor_y.shape

torch.Size([195, 1, 128, 128])

In [12]:


tensor_x = torch.Tensor(v2) # transform to torch tensor
tensor_y = torch.Tensor(ls)

tensor_x = tensor_x.unsqueeze(1)
tensor_y = tensor_y.unsqueeze(1)

trans = T.Compose([
   T.RandomRotation(10),
   T.RandomResizedCrop(128,scale=(0.5, 1.0)),
   T.RandomAffine(degrees=0,translate=(0.2,0.2)),
   #tio.RandomElasticDeformation(),
   
])
#trans =None
# could also calculate mean across all images??? just a single value...
target_trans = trans 
my_dataset = ImageDataset(tensor_x,tensor_y,trans,target_trans) # create your datset
train_size = int(0.9 * len(my_dataset))
val_size = len(my_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(my_dataset, [train_size, val_size])
train_dataloader = DataLoader(train_dataset,shuffle=True,batch_size=BATCH_SIZE,num_workers=2)
val_dataloader = DataLoader(val_dataset,shuffle=False,batch_size=BATCH_SIZE,num_workers=2)


#data augmentation 
#Rotation, scaling, translation, deformation grid, maybe sheering
# define the transform using torchvision transform. then define a custom dataset that takes a transform to init, then   


  tensor_x = torch.Tensor(v2) # transform to torch tensor


In [13]:
image_size_w,image_size_h  = 128,128 
class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3)
        self.relu  = nn.ReLU()
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3)
    
    def forward(self, x):
        return self.conv2(self.relu(self.conv1(x)))


class Encoder(nn.Module):
    def __init__(self, chs=(1,32,64,128)):
        super().__init__()
        self.enc_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
        self.pool       = nn.MaxPool2d(2)
    
    def forward(self, x):
        ftrs = []
        for block in self.enc_blocks:
            x = block(x)
            ftrs.append(x)
            x = self.pool(x)
        return ftrs


class Decoder(nn.Module):
    def __init__(self, chs=(128, 64, 32)):
        super().__init__()
        self.chs         = chs
        self.upconvs    = nn.ModuleList([nn.ConvTranspose2d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])
        self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
        
    def forward(self, x, encoder_features):
        for i in range(len(self.chs)-1):
            x        = self.upconvs[i](x)
            enc_ftrs = self.crop(encoder_features[i], x)
            x        = torch.cat([x, enc_ftrs], dim=1)
            #print("x shape",x.shape)
            x        = self.dec_blocks[i](x)
        return x
    
    def crop(self, enc_ftrs, x):
        _, _, H, W = x.shape
        enc_ftrs   = torchvision.transforms.CenterCrop([H, W])(enc_ftrs)
        return enc_ftrs


class UNet(nn.Module):
    def __init__(self, enc_chs=(1,32,64,128), dec_chs=( 128, 64,32), num_class=1, retain_dim=True, out_sz=(image_size_h,image_size_w)):
        super().__init__()
        self.encoder     = Encoder(enc_chs)
        self.decoder     = Decoder(dec_chs)
        self.head        = nn.Conv2d(dec_chs[-1], num_class, 1)
        self.retain_dim  = retain_dim
        self.out_sz = out_sz
    def forward(self, x):
        enc_ftrs = self.encoder(x)
        #print(enc_ftrs)
        out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
        out      = self.head(out)
        if self.retain_dim:
            out = F.interpolate(out, self.out_sz)
        return out

In [42]:
model = UNet()
x    = torch.randn(1, 1, 128, 128)
model(x).shape

torch.Size([1, 1, 128, 128])

In [34]:
torch.cuda.is_available()

False

In [None]:
#training loops

# initialize our UNet model
unet = UNet().to(DEVICE)
# initialize loss function and optimizer
lossFunc = BCEWithLogitsLoss()
opt = Adam(unet.parameters(), lr=INIT_LR)
# calculate steps per epoch for training and test set
trainSteps = len(train_dataset) // BATCH_SIZE
testSteps = len(val_dataset) // BATCH_SIZE
# initialize a dictionary to store training history
H = {"train_loss": [], "test_loss": []}

# loop over epochs
print("[INFO] training the network...")
startTime = time.time()
for e in tqdm(range(NUM_EPOCHS)):
    # set the model in training mode
    unet.train()
    # initialize the total training and validation loss
    totalTrainLoss = 0
    totalTestLoss = 0
    # loop over the training set
    for (i, (x, y)) in enumerate(train_dataloader):
        # send the input to the device
        (x, y) = (x.to(DEVICE), y.to(DEVICE))
        # perform a forward pass and calculate the training loss
        pred = unet(x)
        loss = lossFunc(pred, y)
        # first, zero out any previously accumulated gradients, then
        # perform backpropagation, and then update model parameters
        opt.zero_grad()
        loss.backward()
        opt.step()
        # add the loss to the total training loss so far
        totalTrainLoss += loss
    # switch off autograd
    with torch.no_grad():
        # set the model in evaluation mode
        unet.eval()
        # loop over the validation set
        for (x, y) in val_dataloader:
        # send the input to the device
            (x, y) = (x.to(DEVICE), y.to(DEVICE))
            # make the predictions and calculate the validation loss
            pred = unet(x)
            totalTestLoss += lossFunc(pred, y)
    # calculate the average training and validation loss
    avgTrainLoss = totalTrainLoss / trainSteps
    avgTestLoss = totalTestLoss / testSteps
    #update our training history
    H["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
    H["test_loss"].append(avgTestLoss.cpu().detach().numpy())
    # print the model training and validation information
    print("[INFO] EPOCH: {}/{}".format(e + 1, NUM_EPOCHS))
    print("Train loss: {:.6f}, Test loss: {:.4f}".format(
    avgTrainLoss, avgTestLoss))
# display the total time needed to perform the training
endTime = time.time()
print("[INFO] total time taken to train the model: {:.2f}s".format(
endTime - startTime))




[INFO] training the network...



  0%|                                                    | 0/20 [00:00<?, ?it/s]

In [None]:
# plot the training loss
plt.style.use("ggplot")
plt.figure()
plt.plot(H["train_loss"], label="train_loss")
plt.plot(H["test_loss"], label="test_loss")
plt.title("Training Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.savefig(PLOT_PATH)
# serialize the model to disk
torch.save(unet, MODEL_PATH)


In [None]:
#visualization
index = 48
for index in range(45,60):
    print(index)
    for i, index2 in enumerate(f[index]):
        plt.figure()
        plt.subplot(1,3,1)
        plt.imshow(l[index][:,:,index2], cmap='gray')
        plt.subplot(1,3,2)
        plt.imshow(l[index][:,:,index2], cmap='gray')
        image_mask = v[index][:,:,f[index][i]]
        plt.imshow(image_mask, cmap='jet', alpha=0.5)
        plt.subplot(1,3,3)
        plt.imshow(l[index][:,:,index2], cmap='gray')
        image_mask = b[index]
        plt.imshow(image_mask, cmap='jet', alpha=0.5)
        plt.show()
        plt.close()

In [38]:
#data standardization normalization and histogram correction??? est:1h
l =np.array(l) /255
l.shape

  l =np.array(l) /255


: 

: 

In [None]:
#data augmentation est: 1h


In [None]:
#model building unet: est: 1h

In [None]:
#training loop, validation, loss and metric and visualization 1h

In [None]:
# testing loop metric and visualization and store result 1h

In [None]:
# model structure , loss , and hyper parameter selection 2h

In [None]:
# buffer time 2h 

In [None]:
#optional model: optical flow 
# mask r cnn? for roi prediction?
# unsupervised low rank disentangle ? 4h

#finalise and submission 2h

In [None]:
# result visualization

In [None]:
for i in range(l[50].shape[2]):
    plt.figure()
    plt.imshow(l[50][:,:,i], cmap='gray')
    plt.show()
    plt.close()

In [19]:
np.sum(np.array(l)=='amateur')

46

In [21]:
video = np.array(train_data[0]['video'])
video.shape

(112, 112, 334)

In [41]:
box= np.array(train_data[0]['box'])
print(np.sum(box==True))
box.shape

1232


(112, 112)

In [53]:
#idea: use data augmentation
#      pretrain on amateur and fine tune on expert
#      can try image segmentation first
#      use a model from online source for video segmentation: challenge: no segmentation for all frames..? only 3 frame confirmed. 

label= np.array(train_data[63]['label'])
print(label.shape)
box= np.array(train_data[63]['box'])
print(box.shape)
print(np.sum(box))
for i in range(label.shape[2]):
    
    print(i,np.sum(label[:,:,i]))

(583, 743, 126)
(583, 743)
25868
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 2617
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 2892
29 0
30 0
31 0
32 0
33 0
34 0
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 0
43 0
44 0
45 0
46 0
47 0
48 0
49 0
50 0
51 0
52 0
53 0
54 0
55 2535
56 0
57 0
58 0
59 0
60 0
61 0
62 0
63 0
64 0
65 0
66 0
67 0
68 0
69 0
70 0
71 0
72 0
73 0
74 0
75 0
76 0
77 0
78 0
79 0
80 0
81 0
82 0
83 0
84 0
85 0
86 0
87 0
88 0
89 0
90 0
91 0
92 0
93 0
94 0
95 0
96 0
97 0
98 0
99 0
100 0
101 0
102 0
103 0
104 0
105 0
106 0
107 0
108 0
109 0
110 0
111 0
112 0
113 0
114 0
115 0
116 0
117 0
118 0
119 0
120 0
121 0
122 0
123 0
124 0
125 0


In [49]:
for i in range(65):
    label= np.array(train_data[i]['label'])
    print(i,np.sum(label[:,:,:]))

0 420
1 430
2 319
3 344
4 319
5 411
6 298
7 571
8 300
9 254
10 453
11 323
12 376
13 245
14 339
15 443
16 393
17 338
18 568
19 240
20 450
21 276
22 382
23 308
24 312
25 348
26 240
27 505
28 539
29 345
30 427
31 338
32 282
33 256
34 280
35 448
36 335
37 322
38 389
39 359
40 293
41 380
42 320
43 549
44 494
45 251
46 8236
47 6844
48 10290
49 8467
50 10007
51 16692
52 9076
53 7716
54 11061
55 9993
56 10650
57 7896
58 6942
59 8936
60 7492
61 6897
62 10296
63 8044
64 7910


In [None]:
# make prediction for test
predictions = []
for d in test_data:
    prediction = np.array(np.zeros_like(d['video']), dtype=np.bool)
    height = prediction.shape[0]
    width = prediction.shape[1]
    prediction[int(height/2)-50:int(height/2+50), int(width/2)-50:int(width/2+50)] = True
    
    # DATA Strucure
    predictions.append({
        'name': d['name'],
        'prediction': prediction
        }
    )

In [None]:
# save in correct format
save_zipped_pickle(predictions, 'my_predictions.pkl')