## Library imports

In [1]:
import os
import cv2
import sys
import numpy as np
from tqdm import tqdm
sys.path.append('/home/surya/Downloads/cam2bev-data-master/')
from utils import load_image, one_hot_encode_image

## Constants

In [3]:
TRAIN_NUM_TO_KEEP = 8000
VAL_NUM_TO_KEEP = 500
DATA_ROOT_DIR = '/home/surya/Downloads/cam2bev-data-master/1_FRLR'
TRAIN_DIR = os.path.join(DATA_ROOT_DIR, *['train'])
VAL_DIR = os.path.join(DATA_ROOT_DIR, *['val'])

In [4]:
inputPalette = [
    [np.array([128,  64, 128])],                                                       # road  
    [np.array([244,  35, 232]), np.array([250, 170, 160])],                            # sidewalk
    [np.array([255,   0,   0])],                                                       # person   
    [np.array([  0,   0, 142]), np.array([  0,   0, 110])],                            # car
    [np.array([ 0,  0, 70])],                                                          # Truck
    [np.array([  0,  60, 100]), np.array([ 0,  0, 90])],                               # Bus
    [np.array([220,  20,  60]), np.array([  0,   0, 230]), np.array([119,  11,  32])], # Two-wheelers
    [np.array([0, 0, 0]), np.array([111,  74,   0]), np.array([81,  0, 81]),           # static obstacles
    np.array([230, 150, 140]), np.array([70, 70, 70]), np.array([102, 102, 156]),      
    np.array([190, 153, 153]), np.array([180, 165, 180]), np.array([150, 100, 100]),   
    np.array([150, 120,  90]), np.array([153, 153, 153]), np.array([153, 153, 153]),
    np.array([250, 170,  30]), np.array([220, 220,   0]), np.array([  0,  80, 100])],
    [np.array([107, 142,  35]), np.array([152, 251, 152])],                            # Vegetation
    [np.array([ 70, 130, 180])]                                                        # Sky
]


# Sky is added to Static obstacles and Occlusion is added as 10th class
outputPalette = [
    [np.array([128,  64, 128])],
    [np.array([244,  35, 232]), np.array([250, 170, 160])],
    [np.array([255,   0,   0])],
    [np.array([  0,   0, 142]), np.array([  0,   0, 110])],
    [np.array([ 0,  0, 70])],
    [np.array([  0,  60, 100]), np.array([ 0,  0, 90])],
    [np.array([220,  20,  60]), np.array([  0,   0, 230]), np.array([119,  11,  32])],
    [np.array([0, 0, 0]), np.array([111,  74,   0]), np.array([81,  0, 81]),
     np.array([230, 150, 140]), np.array([70, 70, 70]), np.array([102, 102, 156]),
     np.array([190, 153, 153]), np.array([180, 165, 180]), np.array([150, 100, 100]),
     np.array([150, 120,  90]), np.array([153, 153, 153]), np.array([153, 153, 153]),
     np.array([250, 170,  30]), np.array([220, 220,   0]), np.array([  0,  80, 100]), np.array([ 70, 130, 180])],
    [np.array([107, 142,  35]), np.array([152, 251, 152])], 
    [np.array([150, 150, 150])]              # OCCLUSION CLASS
]

## Helper functions

In [5]:
def getFilesInDir(directory):
    return sorted(os.listdir(directory))

def fileNamesWithoutExtension(files):
    return [x.split('.')[-2] for x in files]

def checkFoldersContainSameFiles(folders):
    assert len(folders) > 0
    refFiles = getFilesInDir(folders[0])
    refFilesWithoutExt = set(fileNamesWithoutExtension(refFiles))
    numRefFiles = len(refFiles)
    
    filesMatch = True
    for folder in folders[1:]:
        files = getFilesInDir(folder)
        if (len(files) == numRefFiles):
            filesWithoutExt = set(fileNamesWithoutExtension(files))
            if(len(filesWithoutExt - refFilesWithoutExt) == 0):
                continue
            else:
                filesMatch = False
                print(f"{folder} file names mismatch")
                break
        else:
            filesMatch = False
            print(f"{folder} contains {len(files)} files, while numRefFiles = {numRefFiles}")
            break
            
    return filesMatch

def getRandomIndices(size, numToKeep):
    indices = np.random.choice(size, size=numToKeep, replace=False)
    return indices

def filterListByIndices(data, indices):
    return [data[i] for i in indices]

def deleteFile(filePath):
    if os.path.exists(filePath):
        os.remove(filePath)

def getLastIndices(size, numToKeep):
    return np.arange(size - numToKeep, size)

def getLastIndices(size, numToDelete):
    return np.arange(size - numToDelete, size)

def reduceDataset(inputDir, numberToKeep, reduceType='random'):
    DATASET_FOLDERS = [os.path.join(inputDir,x) for x in os.listdir(inputDir) \
                            if os.path.isdir(os.path.join(inputDir,x))]

    refFiles = getFilesInDir(DATASET_FOLDERS[0])
    numRefFiles = len(refFiles)

    # randomly choose indices to delete
    if reduceType == 'random':
        indicesToDelete = getRandomIndices(numRefFiles, numRefFiles - numberToKeep)
    else:
        indicesToDelete = getLastIndices(numRefFiles, numRefFiles - numberToKeep)
        
    filesToBeDeteled = filterListByIndices(refFiles, indicesToDelete)

    # delete extra files
    for folder in DATASET_FOLDERS:
        print(folder)
        for file in tqdm(filesToBeDeteled):
            absFilePath = os.path.join(folder, file)
            deleteFile(absFilePath)

    # check folders for files
    checkFoldersContainSameFiles(DATASET_FOLDERS)        
    

def resizeDataset(inputDir, newWidth, newHeight):
    DATASET_FOLDERS = [os.path.join(inputDir,x) for x in os.listdir(inputDir) \
                            if os.path.isdir(os.path.join(inputDir,x))]

    for folder in DATASET_FOLDERS:
        print(folder)
        filesInDir = getFilesInDir(folder)
        for file in tqdm(filesInDir):
            absFilePath = os.path.join(folder, file)
            image = cv2.imread(absFilePath)
            image = cv2.resize(image, (newWidth, newHeight), interpolation=cv2.INTER_CUBIC)
            cv2.imwrite(absFilePath, image)

In [6]:
reduceDataset(TRAIN_DIR, numberToKeep=TRAIN_NUM_TO_KEEP)
# reduceDataset(VAL_DIR, numberToKeep=VAL_NUM_TO_KEEP, reduceType='last')

/home/surya/Downloads/cam2bev-data-master/1_FRLR/train/left


100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 16387.23it/s]


/home/surya/Downloads/cam2bev-data-master/1_FRLR/train/right


100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 16212.16it/s]


/home/surya/Downloads/cam2bev-data-master/1_FRLR/train/front


100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 15472.48it/s]


/home/surya/Downloads/cam2bev-data-master/1_FRLR/train/rear


100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 15377.33it/s]


/home/surya/Downloads/cam2bev-data-master/1_FRLR/train/bev+occlusion


100%|████████████████████████████████████| 2000/2000 [00:00<00:00, 23149.61it/s]


In [None]:
resizeDataset(TRAIN_DIR, newWidth=512, newHeight=256)
resizeDataset(VAL_DIR, newWidth=512, newHeight=256)

In [None]:
def replaceWithOhEncoding(inputDir, labelDir, inputColorMap, outputColorMap):
    DATASET_FOLDERS = [os.path.join(inputDir,x) for x in os.listdir(inputDir) \
                            if os.path.isdir(os.path.join(inputDir,x))]

    for folder in DATASET_FOLDERS:
        print(folder)
        if(labelDir in folder):
            colorMap = outputColorMap
            print('outputColorMap')
        else:
            colorMap = inputColorMap
            print('inputColorMap')
        
        filesInDir = getFilesInDir(folder)
        for file in tqdm(filesInDir):
            absFilePath = os.path.join(folder, file)
            image = load_image(absFilePath)
            image = one_hot_encode_image(image, colorMap).astype(np.bool_)
            np.save(absFilePath.replace('png', 'npy'), image)

In [None]:
replaceWithOhEncoding(TRAIN_DIR, labelDir='bev+occlusion', 
                      inputColorMap=inputPalette, outputColorMap=outputPalette)

In [None]:
replaceWithOhEncoding(VAL_DIR, labelDir='bev+occlusion',
                      inputColorMap=inputPalette, outputColorMap=outputPalette)

In [None]:
def get_class_distribution(folder, palette):
    # get filepaths
    files = [os.path.join(folder, f) for f in os.listdir(folder) if not f.startswith(".")]
    n_classes = len(palette)

    def get_img(file, interpolation=cv2.INTER_NEAREST):
        img = load_image(file)
        img = one_hot_encode_image(img, palette)
        return img

    px = 256 * 512
    distribution = {}
    for k in range(n_classes):
        distribution[str(k)] = 0

    i = 0
    bar = tqdm(files)
    for f in bar:
        img = get_img(f)
        classes = np.argmax(img, axis=-1)
        unique, counts = np.unique(classes, return_counts=True)
        occs = dict(zip(unique, counts))
        
        for k in range(n_classes):
            occ = occs[k] if k in occs.keys() else 0
            distribution[str(k)] = (distribution[str(k)] * i + occ / px) / (i+1)

        bar.set_postfix(distribution)
        i += 1
    return distribution

In [None]:
# get_class_distribution(os.path.join(TRAIN_DIR, *['bev+occlusion']), outputPalette)