<a href="https://colab.research.google.com/github/lvllvl/segmentation10k/blob/main/Seg10k_050421.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [55]:
#from google.colab import drive
#drive.mount('/content/gdrive')

## Imports

In [4]:
import os 
from pathlib import Path 
import pandas as pd 
from torchvision.io import read_image

import torch 
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from collections import defaultdict 

## Create Dataset

In [38]:
class CustomImageDataset( Dataset ):

    def __init__( self, annotations_file, img_dir, mask_dir,  transform=None,
            target_transform=None ):
        self.img_labels = pd.read_csv( annotations_file ) # the whole dataframe
        self.img_dir = img_dir # directory for the images 
        self.mask_dir = mask_dir # directory for the masks 

        self.transform = transform
        self.target_transform = target_transform

    def __len__( self ):
        return len( self.img_labels )

    def __getitem__( self, idx ):
        img_path = os.path.join( self.img_dir, self.img_labels.iloc[ idx, 1 ] )
        mask_path = os.path.join( self.mask_dir, self.img_labels.iloc[ idx, 1 ]) 

        image = read_image( img_path )
        mask = read_image( mask_path )

        if self.transform:
            image = self.transform( image )
        
        if self.target_transform:
            mask = self.target_transform( mask )
        
        sample = {'image': image, "mask": mask } 
        return sample

In [6]:
def organize_files(): 
    file_dir = 'drive/MyDrive/projects/datasets/comma10k/imgs'
    file_path = Path( file_dir )
    fn_arr = [] 
    
    # loop through files 
    for fn in file_path.iterdir():
        fn_arr.insert( 0, fn.name )
    
    
    df_dic = {'filename': fn_arr, 
            'maskpath': 'drive/MyDrive/projects/datasets/comma10k/masks', 
            'imagepath': 'drive/MyDrive/projects/datasets/comma10k/imgs'}
    
    df = pd.DataFrame( df_dic ) # convert into dataframe
    filename = 'filesData.csv'
    df.to_csv( filename ) 

    return df 

In [7]:
df = organize_files() # create dataframe, save csv  
dframe = pd.read_csv( 'filesData.csv' ) # open csv 

In [39]:
# create a dataset 
dataset = CustomImageDataset('filesData.csv', 
                             'drive/MyDrive/projects/datasets/comma10k/imgs', 
                             'drive/MyDrive/projects/datasets/comma10k/masks' )

dataset

<__main__.CustomImageDataset at 0x7f1356bc0f50>

In [41]:
dataset[3]['image']

tensor([[[ 0,  1,  0,  ..., 10, 24, 24],
         [ 0,  0,  0,  ...,  9, 24, 24],
         [ 0,  0,  0,  ..., 13, 29, 29],
         ...,
         [25, 27, 26,  ..., 37, 32, 38],
         [ 2,  4, 10,  ..., 41, 40, 43],
         [ 1,  2,  6,  ..., 42, 42, 43]],

        [[ 0,  0,  6,  ..., 25, 19, 19],
         [ 0,  0,  4,  ..., 24, 19, 19],
         [ 0,  0,  2,  ..., 21, 18, 18],
         ...,
         [ 0,  0,  5,  ...,  4,  2,  8],
         [ 0,  2, 11,  ...,  0,  0,  1],
         [ 0,  0,  7,  ...,  0,  0,  1]],

        [[11, 19, 19,  ..., 11,  5,  5],
         [10, 16, 17,  ..., 10,  5,  5],
         [10, 14, 13,  ...,  9,  3,  3],
         ...,
         [ 0,  0,  0,  ...,  4,  3,  9],
         [ 0,  0,  3,  ..., 34, 29, 32],
         [ 0,  0,  0,  ..., 35, 31, 32]]], dtype=torch.uint8)

In [56]:
dataset[3]['image'].size()

torch.Size([3, 874, 1164])