In [1]:
#!pip install opencv-python 

## dataset link
- https://www.kaggle.com/mbornoe/lisa-traffic-light-dataset

## annotation explanation
- The first annotation type contains information of the entire TL area and what state the TL is in. This annotation file is called frameAnnotationsBOX, and is generated from the second annotation file by enlarging all annotation larger than 4x4. The second one is annotation marking only the area of the traffic light which is lit and what state it is in. This second annotation file is called frameAnnotationsBULB.

In [2]:
# Libraries 
import pandas as pd 
import os
import shutil

## REGROUP FRAMES: DAY TRAIN, NIGHT TRAIN, DAY TEST, NIGHT TEST

In [3]:
# create train, test directory
os.mkdir('../data/train')
os.mkdir('../data/test')
os.mkdir('../data/annotation')

## load image into train folder

In [4]:
# get day image dir list (before filename.jpg)
base_day_dir = '../data/lisa-traffic-light-dataset/dayTrain/dayTrain/'
folder_day_list = [f'dayClip{i}/' for i in range(1,14)]      # dayClip1 to dayClip13
# day image directory
img_day_dir = [f'{base_day_dir}{i}frames/' for i in folder_day_list] # ../.../frames/ (need image name)


# get night image directory list (before filename.jpg)
base_night_dir = '../data/lisa-traffic-light-dataset/nightTrain/nightTrain/'
folder_night_list = [f'nightClip{i}/' for i in range(1,6)]  # nightClip1 to nightClip5
# night image directory
img_night_dir = [f'{base_night_dir}{i}frames/' for i in folder_night_list] # ../.../frames/ (need image name)



# get annotations csv directory
file_list = ['frameAnnotationsBOX.csv','frameAnnotationsBULB.csv']

# annotation train day
base_image_dir_daytrain = '../data/lisa-traffic-light-dataset/Annotations/Annotations/dayTrain/'
# annotation train night
base_image_dir_nighttrain = '../data/lisa-traffic-light-dataset/Annotations/Annotations/nightTrain/'


# get image filename.jpg list from Annotation csv
def get_train_file(path, folder_name, file):
    '''
        path = annotaion train day, annotation train night
        folder_name = dayClip{}, nightClip{}
        file = file_list (use BOX)
    '''
    file_name = []
    # ../daytrain/ + dayclip{}/ annotationBOX
    file_dir = path + folder_name + file
    df = pd.read_csv(file_dir, delimiter = ';')
    
    # get all image name (no directory)
    for i in range(len(df)):
        img = df['Filename'][i].split('/')[1]     # get rid of part before image name
        file_name.append(img)    
      

    # output image name (no directory) from train folder
    return file_name    


# load day_image.jpg into train folder
for i in range(len(folder_day_list)):

        day_image = []

        # get image filename.jpg
        img = get_train_file(base_image_dir_daytrain, folder_day_list[i], file_list[0])

        # import image filename.jpg to day_image
        for x in img:
            day_image.append(x)

        # drop duplicate
        day_image_set = set(day_image)
        day_image = list(day_image_set)
        
        # add image from day_image to new train folder
        for image in day_image:
            src = os.path.join(img_day_dir[i], image)
            dst = os.path.join('../data/train', image)
            shutil.copyfile(src,dst)


# load night_image.jpg into train folder
for i in range(len(folder_night_list)):
        night_image = []

        # get image filename
        img = get_train_file(base_image_dir_nighttrain, folder_night_list[i], file_list[0])

        # import image filename to night_image
        for x in img:
            night_image.append(x)

        # drop duplicate
        night_image_set = set(night_image)
        night_image = list(night_image_set)

        for image in night_image:
            src = os.path.join(img_night_dir[i], image)
            dst = os.path.join('../data/train', image)
            shutil.copyfile(src,dst)


## load image into test folder

In [6]:
# get day image dir list (before filename.jpg)
base_day_dir = [f'../data/lisa-traffic-light-dataset/daySequence{i}/daySequence{i}/' for i in [1,2]]
img_day_dir = [f'{i}frames/' for i in base_day_dir]


# get night image directory list (before filename.jpg)
base_night_dir = [f'../data/lisa-traffic-light-dataset/nightSequence{i}/nightSequence{i}/' for i in [1,2]]
img_night_dir = [f'{i}frames/' for i in base_night_dir]


# get annotations csv directory
file_list = ['frameAnnotationsBOX.csv','frameAnnotationsBULB.csv']
base_image_dir_daytest = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/daySequence{i}/' for i in [1,2]]
base_image_dir_nighttest = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/nightSequence{i}/' for i in [1,2]]

# get image filename.jpg list from annotation csv
def get_test_file(path, file):
    '''
        path = base_image_dir_daytest, base_image_dir_nighttest
        file = file_list (START WITH BOX)
    '''
    file_name = []
    file_dir = path + file
    df = pd.read_csv(file_dir, delimiter = ';')
    for i in range(len(df)):
        img = df['Filename'][i].split('/')[1]
        file_name.append(img)    
      
    # return list of 'folder_name -- number.jpg'
    return file_name    



# load day_image.jpg into test folder
for i in range(len(base_day_dir)):

        day_image = []

        # get image filename
        img = get_test_file(base_image_dir_daytest[i], file_list[0])

        # import image filename to day_image
        for x in img:
            day_image.append(x)

        # drop duplicate
        day_image_set = set(day_image)
        day_image = list(day_image_set)

        for image in day_image:
            src = os.path.join(img_day_dir[i], image)
            dst = os.path.join('../data/test', image)
            shutil.copyfile(src,dst)
 
    
# load night_image.jpg into test folder
for i in range(len(base_night_dir)):
 
        night_image = []

        # get image filename.jpg
        img = get_test_file(base_image_dir_nighttest[i], file_list[0])

        # import image filename to night_image
        for x in img:
            night_image.append(x)

        # drop duplicate
        night_image_set = set(night_image)
        night_image = list(night_image_set)

        for image in night_image:
            src = os.path.join(img_night_dir[i], image)
            dst = os.path.join('../data/test', image)
            shutil.copyfile(src,dst)


## load train annotation

In [7]:
file_name = 'frameAnnotationsBOX.csv'
path_day = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/dayTrain/dayClip{i}/' for i in range(1,14)]
path_night = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/nightTrain/nightClip{i}/' for i in range(1,6)]

df_day = pd.DataFrame()
for i in path_day:
    df = pd.read_csv(i + file_name, delimiter = ';')
    df_day = pd.concat([df_day, df])

df_night = pd.DataFrame()
for i in path_night:
    df = pd.read_csv(i + file_name, delimiter = ';')
    df_night = pd.concat([df_night,df])

df_total = pd.concat([df_day,df_night])
df_total.to_csv('../data/annotation/train_annotation.csv')

## load test annotation

In [8]:
file_name = 'frameAnnotationsBOX.csv'
path_day = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/daySequence{i}/' for i in [1,2]]
path_night = [f'../data/lisa-traffic-light-dataset/Annotations/Annotations/nightSequence{i}/' for i in [1,2]]

df_day = pd.DataFrame()
for i in path_day:
    df = pd.read_csv(i + file_name, delimiter = ';')
    df_day = pd.concat([df_day, df])

df_night = pd.DataFrame()
for i in path_night:
    df = pd.read_csv(i + file_name, delimiter = ';')
    df_night = pd.concat([df_night,df])

df_total = pd.concat([df_day,df_night])
df_total.to_csv('../data/annotation/test_annotation.csv')

## remove original folder

In [9]:
shutil.rmtree('../data/lisa-traffic-light-dataset')