In [12]:
import os
import json
import pandas as pd
import torch

from torchvision.io.image import read_image
from torch.utils.data import Dataset, DataLoader, random_split

In [13]:
source_path = './yolo_format/'
images_dir = os.path.join(source_path, 'images')
labels_dir = os.path.join(source_path, 'labels')
print(images_dir)
label_files = os.listdir(labels_dir)
label_files = [filename for filename in label_files if not filename.startswith('.')]

print(len(label_files))
print(type(label_files))
label_files[:5]

./yolo_format/images
4306
<class 'list'>


['frame_004065.txt',
 'frame_002414.txt',
 'frame_000203.txt',
 'frame_000565.txt',
 'frame_002372.txt']

In [18]:
source_path, labels_dir

('./yolo_format/', './yolo_format/labels')

In [27]:
'''
Create the json string
labels = [{'Image': 'frame_000000.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1238, 0.6733, 0.616, 0.9267],
  'yc': [0.2799, 0.3797, 0.4332, 0.5037, 0.531, 0.7879999999999999],
  'width': [0.0283, 0.0409, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]},
 {'Image': 'frame_000001.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1277, 0.6719, 0.6158, 0.9285],
  'yc': [0.2798, 0.3798, 0.4338, 0.5034, 0.531, 0.7891],
  'width': [0.0283, 0.0408, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]}]
]

'''
# list of dataframes
df_lst = []

for lf in label_files:
    file = pd.read_csv(os.path.join(labels_dir, lf), header=None, sep=' ')
    file['Image'] = lf.replace('.txt', '.jpg')
    df_lst.append(file)
    
lf_df = pd.concat(df_lst, axis=0, ignore_index=True)
lf_df.columns = ['Class', 'xc', 'yc', 'width', 'height', 'Image']
lf_df = lf_df[['Image', 'Class', 'xc', 'yc', 'width', 'height']]
lf_df.shape

(24617, 6)

In [35]:
lf_df.head(7)

Unnamed: 0,Image,Class,xc,yc,width,height
0,frame_004065.jpg,0,0.6524,0.2217,0.0509,0.0561
1,frame_004065.jpg,2,0.6948,0.3422,0.0478,0.0633
2,frame_004065.jpg,2,0.512,0.4533,0.0458,0.0524
3,frame_004065.jpg,0,0.5174,0.7258,0.0397,0.0506
4,frame_004065.jpg,1,0.48,0.5817,0.0549,0.0416
5,frame_004065.jpg,1,0.3274,0.782,0.0559,0.0452
6,frame_002414.jpg,0,0.2662,0.1213,0.0489,0.0597


* **Group indices by Image and sort the record indices by Image as well. Meanwhile aggregate values of other columns with the same groupby index as a list**

In [31]:
df1 = lf_df.groupby('Image').agg(lambda x: list(x)).reset_index('Image')
print(df1.shape)
df1[0:2]

(4306, 6)


Unnamed: 0,Image,Class,xc,yc,width,height
0,frame_000000.jpg,"[0, 0, 2, 2, 1, 1]","[0.0143, 0.0467, 0.1238, 0.6733, 0.616, 0.9267]","[0.2799, 0.3797, 0.4332, 0.5037, 0.531, 0.788]","[0.0283, 0.0409, 0.0476, 0.0488, 0.0522, 0.0589]","[0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]"
1,frame_000001.jpg,"[0, 0, 2, 2, 1, 1]","[0.0143, 0.0467, 0.1277, 0.6719, 0.6158, 0.9285]","[0.2798, 0.3798, 0.4338, 0.5034, 0.531, 0.7891]","[0.0283, 0.0408, 0.0476, 0.0488, 0.0522, 0.0589]","[0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]"


* **Create a list of dictionaries for all records**

In [38]:
df1_list = df1.to_dict(orient='records')
df1_list[0:2]

[{'Image': 'frame_000000.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1238, 0.6733, 0.616, 0.9267],
  'yc': [0.2799, 0.3797, 0.4332, 0.5037, 0.531, 0.788],
  'width': [0.0283, 0.0409, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]},
 {'Image': 'frame_000001.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1277, 0.6719, 0.6158, 0.9285],
  'yc': [0.2798, 0.3798, 0.4338, 0.5034, 0.531, 0.7891],
  'width': [0.0283, 0.0408, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]}]

*Let's check the records ina json file with sequential integer keys for each record*

In [39]:
with open('labels.json', 'w') as f:
    f.write(json.dumps(df1_list))

*Let's read back the json file*

In [41]:
#load json string
with open('labels.json', 'r') as f:
   data = json.load(f)
data[:2]

[{'Image': 'frame_000000.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1238, 0.6733, 0.616, 0.9267],
  'yc': [0.2799, 0.3797, 0.4332, 0.5037, 0.531, 0.788],
  'width': [0.0283, 0.0409, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]},
 {'Image': 'frame_000001.jpg',
  'Class': [0, 0, 2, 2, 1, 1],
  'xc': [0.0143, 0.0467, 0.1277, 0.6719, 0.6158, 0.9285],
  'yc': [0.2798, 0.3798, 0.4338, 0.5034, 0.531, 0.7891],
  'width': [0.0283, 0.0408, 0.0476, 0.0488, 0.0522, 0.0589],
  'height': [0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]}]

*Alternatively:*

In [44]:
df = pd.read_json('labels.json')
df.head(2)
# df.loc[1, 'height']

Unnamed: 0,Image,Class,xc,yc,width,height
0,frame_000000.jpg,"[0, 0, 2, 2, 1, 1]","[0.0143, 0.046700000000000005, 0.1238000000000...","[0.27990000000000004, 0.37970000000000004, 0.4...","[0.028300000000000002, 0.0409, 0.0476, 0.0488,...","[0.0504, 0.056400000000000006, 0.0625, 0.0585,..."
1,frame_000001.jpg,"[0, 0, 2, 2, 1, 1]","[0.0143, 0.046700000000000005, 0.1277, 0.6719,...","[0.2798, 0.3798, 0.4338, 0.5034000000000001, 0...","[0.028300000000000002, 0.0408, 0.0476, 0.0488,...","[0.0504, 0.056400000000000006, 0.0625, 0.0585,..."


In [65]:
torch.LongTensor(df.loc[0, 'Class'])

tensor([0, 0, 2, 2, 1, 1])

In [66]:
class LabelDataset(Dataset):
    def __init__(self, img_dir, json_file, transform=None):
        self.img_labels = pd.read_json(json_file)
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        """
        for accessing list items, dictionary entries, array elements etc
        """
        img_path = os.path.join(self.img_dir, self.img_labels.loc[idx, 'Image'])
        image = read_image(img_path)
        classes = labels = torch.LongTensor(self.img_labels.loc[idx, 'Class'])
        xcs = torch.FloatTensor(self.img_labels.loc[idx, 'xc'])
        ycs = torch.FloatTensor(self.img_labels.loc[idx, 'yc'])
        widths = torch.FloatTensor(self.img_labels.loc[idx, 'width'])
        heights = torch.FloatTensor(self.img_labels.loc[idx, 'height'])
        if self.transform:
            image = self.transform(image)
        return image, classes, xcs, ycs, widths, heights  

In [75]:
def collate_fn(batch):
    """
    batch : is a list of tuples of (tensor1, tensor2, ...) 
    with the length of batch size
    """
    images = []
    classes = []
    xcs = []
    ycs = []
    widths = []
    heights = []
        
    for b in batch:
        images.append(b[0])
        classes.append(b[1])
        xcs.append(b[2])
        ycs.append(b[3])
        widths.append(b[4])
        heights.append(b[5])
        
    images = torch.stack(images, dim=0)
        
    return images, classes, xcs, ycs, widths, heights

In [73]:
data = LabelDataset(images_dir, 'labels.json')
print(data.__len__())

print(data.__getitem__(0)[:])

4306
(tensor([[[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8), tensor([0, 0, 2, 2, 1, 1]), tensor([0.0143, 0.0467, 0.1238, 0.6733, 0.6160, 0.9267]), tensor([0.2799, 0.3797, 0.4332, 0.5037, 0.5310, 0.7880]), tensor([0.0283, 0.0409, 0.0476, 0.0488, 0.0522, 0.0589]), tensor([0.0504, 0.0564, 0.0625, 0.0585, 0.0605, 0.0625]))


In [76]:
type(data)

__main__.LabelDataset

In [77]:
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
train_data, test_data = random_split(data, [train_size, test_size])

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True, collate_fn=collate_fn)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True, collate_fn=collate_fn)