In [1]:
import os
import pandas as pd

# Separating Datasets

This section entails separating the dataset into training and test datasets

In [2]:
base_path = '../../' # file is in the scripts/preprocessing folder
source = 'annotations/lisa-traffic-light-dataset.csv'
source = os.path.join(base_path, source)

In [3]:
df = pd.read_csv(source)
df.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,dayTest/daySequence1--00000.jpg,stop,706,478,718,500,1280,960
1,dayTest/daySequence1--00001.jpg,stop,705,475,720,497,1280,960
2,dayTest/daySequence1--00002.jpg,stop,707,476,719,494,1280,960
3,dayTest/daySequence1--00005.jpg,stop,708,474,720,492,1280,960
4,dayTest/daySequence1--00006.jpg,stop,707,470,722,492,1280,960


In [4]:
# Cleaning the filename to include only the file name and not the path
df['filename'] = df['filename'].apply(lambda x: x[8:])

In [5]:
# Creating a dataframe with 90%
# values of original dataframe
train = df.sample(frac = 0.9).reset_index(drop=True)
  
# Creating dataframe with 
# rest of the 10% values
test = df.drop(train.index).reset_index(drop=True)

In [6]:
train_dest = os.path.join(base_path, 'annotations/train.csv')
train.to_csv(train_dest, index=False)

test_dest = os.path.join(base_path, 'annotations/test.csv')
test.to_csv(test_dest, index=False)

# Separating the Images

Using the separated datasets, move the respective images into their relevant folders.

In [7]:
train.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--02603.jpg,go,883,0,946,95,1280,960
1,daySequence1--01080.jpg,go,607,373,625,403,1280,960
2,daySequence1--01093.jpg,go,743,425,761,455,1280,960
3,daySequence1--02197.jpg,stop,538,129,580,187,1280,960
4,daySequence1--02843.jpg,go,534,47,594,147,1280,960


In [8]:
train = pd.read_csv(os.path.join(base_path, 'annotations/train.csv'))
test = pd.read_csv(os.path.join(base_path, 'annotations/test.csv'))

In [9]:
train.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--02603.jpg,go,883,0,946,95,1280,960
1,daySequence1--01080.jpg,go,607,373,625,403,1280,960
2,daySequence1--01093.jpg,go,743,425,761,455,1280,960
3,daySequence1--02197.jpg,stop,538,129,580,187,1280,960
4,daySequence1--02843.jpg,go,534,47,594,147,1280,960


In [10]:
test.head()

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,height,width
0,daySequence1--03717.jpg,go,453,446,468,466,1280,960
1,daySequence1--03718.jpg,go,453,442,468,467,1280,960
2,daySequence1--03719.jpg,go,453,442,468,467,1280,960
3,daySequence1--03720.jpg,go,453,442,468,467,1280,960
4,daySequence1--03721.jpg,go,453,442,468,467,1280,960


In [11]:
from shutil import copyfile

In [12]:
train_img_dest = os.path.join(base_path, 'images/train/')
for filename in train['filename']:
    src = os.path.join(base_path, 'images/frames/', filename)
    dst = os.path.join(train_img_dest, filename)
    copyfile(src, dst)

In [13]:
test_img_dest = os.path.join(base_path, 'images/test/')
for filename in test['filename']:
    src = os.path.join(base_path, 'images/frames/', filename)
    dst = os.path.join(test_img_dest, filename)
    copyfile(src, dst)

# Creating Label Map
The label map maps all the classes to integer values. This will be saved to a .pbtxt file for easy use in Tensorflow.

In [15]:
train['class'].unique()



In [25]:
label_map = ''

for i, item in enumerate(train['class'].unique()):
    text = 'item {\n\t' + 'id: ' + str(i+1) + \
                 '\n\tname: ' + str(item) + \
                 '\n}\n\n'
    label_map += text

print(label_map)

item {
	id: 1
	name: go
}

item {
	id: 2
	name: stop
}

item {
	id: 3
}

item {
	id: 4
}

item {
	id: 5
	name: stopLeft
}




In [26]:
# Writing to .pbtxt file
label_dest = os.path.join(base_path, 'annotations/label_map.pbtxt')
with open(label_dest, "w") as text_file:
    print(label_map, file=text_file)