In [1]:
# %cd test
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import os
import time
import shutil
import torch.nn as nn
from skimage import io
import torchvision
import cv2
from tqdm import tqdm
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
from albumentations.pytorch import ToTensorV2
from torchvision import utils
import albumentations as A
from albumentations import (HorizontalFlip, ShiftScaleRotate, VerticalFlip, Normalize,Flip,
                            Compose, GaussNoise)
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = DEVICE
BASE_DIR = "../input/tensorflow-great-barrier-reef/train_images/"

In [2]:
train_df = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
# train_df['annotations'].iloc[3]
train_df['annotations'] = train_df['annotations'].apply(eval)
func = lambda x: "video_"+x.split("-")[0]+"/"+x.split("-")[1]+".jpg"
# vid_func = lambda x: "video_"+x.split("-")[0]

train_df["img_path"] = train_df["image_id"].apply(func)
# train_df["vid_path"] = train_df["image_id"].apply(vid_func)
# train_df.head()
train_df["no_of_bbox"] = train_df["annotations"].apply(lambda x: len(x))
train_df = train_df[train_df["no_of_bbox"]>0].reset_index(drop=True)
train_df.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,img_path,no_of_bbox
0,0,40258,16,16,0-16,"[{'x': 559, 'y': 213, 'width': 50, 'height': 32}]",video_0/16.jpg,1
1,0,40258,17,17,0-17,"[{'x': 558, 'y': 213, 'width': 50, 'height': 32}]",video_0/17.jpg,1
2,0,40258,18,18,0-18,"[{'x': 557, 'y': 213, 'width': 50, 'height': 32}]",video_0/18.jpg,1
3,0,40258,19,19,0-19,"[{'x': 556, 'y': 214, 'width': 50, 'height': 32}]",video_0/19.jpg,1
4,0,40258,20,20,0-20,"[{'x': 555, 'y': 214, 'width': 50, 'height': 32}]",video_0/20.jpg,1


In [3]:
class ReefDataset:

    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def can_augment(self, boxes):
        box_outside_image = ((boxes[:, 0] < 0).any() or (boxes[:, 1] < 0).any() 
                             or (boxes[:, 2] > 1280).any() or (boxes[:, 3] > 720).any())
        return not box_outside_image
    
    def __getitem__(self, i):

        row = self.df.iloc[i]
        
        image = cv2.imread(f'{BASE_DIR}/{row["img_path"]}', cv2.IMREAD_COLOR)
#         print(f'{BASE_DIR}/{row["img_path"]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float)
        image /=255.0
        
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(np.float).values
        
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
#         boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
#         boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        boxes[:, 2] = np.clip(boxes[:, 0] + boxes[:, 2],0,1280)
        boxes[:, 3] = np.clip(boxes[:, 1] + boxes[:, 3],0,720)
        
        n_boxes = boxes.shape[0]
        
        # Calculate the area
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        
        target = {
            'boxes': torch.as_tensor(boxes, dtype=torch.float32),
            'area': torch.as_tensor(area, dtype=torch.float32),
            
            'image_id': torch.tensor([i]),
            
            
            'labels': torch.ones((n_boxes,), dtype=torch.int64),
            
            
            'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)            
        }

        if self.transforms and self.can_augment(boxes):
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            if n_boxes > 0:
                target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        else:
            image = ToTensorV2(p=1.0)(image=image)['image']

        return image, target

    def __len__(self):
        return len(self.df)

In [4]:
# df_train = train_df.iloc[0: 4427]
# df_val = train_df.iloc[4428:4918]

# ds_train = ReefDataset(df_train)
# ds_val = ReefDataset(df_val)

ds_train = ReefDataset(train_df)

In [5]:
def collate_fn(batch):
    return tuple(zip(*batch))

dl_train = DataLoader(ds_train, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn)
# dl_val = DataLoader(ds_val, batch_size=2, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [6]:
# len(dl_train)

In [7]:
# from tqdm import tqdm

# mean = 0.
# std = 0.
# nb_samples = 0.
# for i in tqdm(range(len(ds_train))):
# #     for data in dl_train:
#     batch_samples = ds_train[i][0].size(0)
#     data = ds_train[i][0].view(batch_samples, ds_train[i][0].size(1), -1)
#     mean += data.mean(2).sum(0)
#     std += data.std(2).sum(0)
#     nb_samples += batch_samples

# mean /= nb_samples
# std /= nb_samples

In [8]:
# import torchvision.transforms as transforms
# import matplotlib.pyplot as plt

# img = plt.imread("../input/tensorflow-great-barrier-reef/train_images/video_0/1007.jpg")


# def get_train_transform():
#     return A.Compose([
#         ToTensorV2(p=1.0)
#     ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

# # transform = transforms.Compose([
# #     transforms.ToTensor()
# # ])

# transform =  A.Compose([
#         ToTensorV2(p=1.0)
#     ])
# # , bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}
# sample = {'image': img}
# # print(*sample)
# sample = transforms(**sample)
# # image = sample['image']
# # # transform the pIL image to tensor 
# # # image
# # img_tr = transform(img)

# # # calculate mean and std
# # mean, std = img_tr.mean([1,2]), img_tr.std([1,2])
  
# # # print mean and std
# # print("mean and std before normalize:")
# # print("Mean of the image:", mean)
# # print("Std of the image:", std)

In [9]:
# image = cv2.imread("../input/tensorflow-great-barrier-reef/train_images/video_0/1007.jpg").astype(np.float) / 255.0

# transform = A.Compose([
#      ToTensorV2(p=1.0)
# ])

# transformed = transform(image=image)["image"] # shape [chn,height,width]
# mean, std = transformed.mean([1,2]), transformed.std([1,2])

# # print mean and std
# print("mean and std before normalize:")
# print("Mean of the image:", mean)
# print("Std of the image:", std)
# # plt.imshow(transformed.permute(1,2,0).numpy())

In [10]:
# transformed.mean([1,2]).shape

In [11]:
transform = A.Compose([
     ToTensorV2(p=1.0)
])

mean_list = []
std_list = []
for i in tqdm(range(len(ds_train))):
    img = ds_train[i][0].numpy()
#     print(img.shape)
#     break
    transformed = transform(image=img)["image"] # shape [chn,height,width]
    mean, std = transformed.mean([0,2]).tolist(), transformed.std([0,2]).tolist()
#     print(transformed.mean([0,2]).shape)
#     break
    mean_list.append(mean)
    std_list.append(std)
    
# print mean and std
print("mean and std before normalize:")
print("Mean of the image:", np.mean(mean_list,axis=0))
print("Std of the image:", np.mean(std_list,axis=0))

100%|██████████| 4919/4919 [04:58<00:00, 16.48it/s]

mean and std before normalize:
Mean of the image: [0.22246136 0.57680758 0.65816097]
Std of the image: [0.17461663 0.17586531 0.1624069 ]





In [12]:
# print mean and std
print("mean and std before normalize:")
print("Mean of the image:", [np.round_(i,3) for i in np.mean(mean_list,axis=0)])
print("Std of the image:", [np.round_(i,3) for i in np.mean(std_list,axis=0)])

mean and std before normalize:
Mean of the image: [0.222, 0.577, 0.658]
Std of the image: [0.175, 0.176, 0.162]


In [13]:
# lst = [[0.6644307981004902, 0.5943366140727124, 0.20642771735430285],
#       [0.6644307981004902, 0.5943366140727124, 0.20642771735430285],
#       [0.6644307981004902, 0.5943366140727124, 0.20642771735430285],
#       [0.6644307981004902, 0.5943366140727124, 0.20642771735430285],
#       [0.6644307981004902, 0.5943366140727124, 0.20642771735430285]]

# np.mean(lst,axis=0)

In [14]:
# ds_train[0][0].numpy()