In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10376550926337413516
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 145227776
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17768915914651481093
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"
]


In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import timeit
import matplotlib.pyplot as plt
from preprocess_func_new import *
from matplotlib.image import imread
import random
import sys
import os
%matplotlib inline


#tf.logging.set_verbosity(tf.logging.INFO)

In [3]:
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [4]:
img_path = os.path.join('..','datasets','stixels')
img_path

'../datasets/stixels'

In [5]:
batch_size = 16

tfrec_train_directory = os.path.join('..','datasets','stixels','train','tfrec_batch_size_'+str(batch_size))
if os.path.exists(tfrec_train_directory):
    print('WARNIING: dir '+tfrec_train_directory+ ' already exists!')

In [6]:
if not os.path.exists(tfrec_train_directory):
    os.makedirs(tfrec_train_directory)

In [7]:
def add_no_obstacles_stixels(labels_df, percent = 2):
    random.seed(481)
    num_stx_with_obst = len(labels_df.index[labels_df['Label'] != 46].tolist())    
    no_obst_train_idx = labels_df.index[labels_df['Label'] == 46].tolist()
    use_idx = random.sample(no_obst_train_idx, int(num_stx_with_obst*percent/100))
    for idx in use_idx:
        labels_df.at[idx, 'Use_stixel'] = 1
    return labels_df

In [8]:
#merging all TEST csv files, keeping ALL precent of "no obstical"
labels_test = []
for root, dirs, files in os.walk(os.path.join(img_path,'test')):
    for file in files:
        if '.csv' in file and not 'labels_test.csv' in file:
            tmp=pd.read_csv(os.path.join(img_path,'test',file))
            if(tmp.isnull().values.any()):
                print('Nan in ',file)
            if len(labels_test)==0:
                labels_test = tmp
            else:
                labels_test = labels_test.append(tmp, ignore_index=True)
                
labels_test = add_no_obstacles_stixels(labels_df=labels_test, percent = 100) #we took all the stixels, 100%!!
labels_test = labels_test[labels_test['Use_stixel'] == 1]
labels_test = labels_test.reset_index(drop=True)

In [9]:
sum_csv_test_path = os.path.join(img_path,'test', 'sum_csv')
if not os.path.exists(sum_csv_test_path):
    os.makedirs(sum_csv_test_path)
    print('new dir created: ')
    print(sum_csv_test_path)
else:
    print('WARNING: dir '+ sum_csv_test_path +' already exists')



In [10]:
labels_test.to_csv( os.path.join(sum_csv_test_path,'labels_test_b16.csv'))

In [11]:
#merging all VAL csv files, keeping only 10 precent of "no obstical"

labels_val = []
for root, dirs, files in os.walk(os.path.join(img_path,'val')):
    for file in files:
        if '.csv' in file and not 'labels_val.csv' in file:
            tmp=pd.read_csv(os.path.join(img_path,'val',file))
            if(tmp.isnull().values.any()):
                print('Nan in ',file)
            if len(labels_val)==0:
                labels_val = tmp
            else:
                labels_val = labels_val.append(tmp, ignore_index=True)
                
labels_val = add_no_obstacles_stixels(labels_df=labels_val, percent = 2)
labels_val = labels_val[labels_val['Use_stixel'] == 1]
labels_val = labels_val.reset_index(drop=True)

In [12]:
sum_csv_val_path = os.path.join(img_path,'val', 'sum_csv')
if not os.path.exists(sum_csv_val_path):
    os.makedirs(sum_csv_val_path)
    print('new dir created: ')
    print(sum_csv_val_path)
else:
    print('WARNING: dir '+ sum_csv_val_path +' already exists')



In [13]:
labels_val.to_csv( os.path.join(sum_csv_val_path,'labels_val_b16.csv'))

In [14]:
labels_train = []
for root, dirs, files in os.walk(os.path.join(img_path,'train')):
    for file in files:
        if '.csv' in file and not 'labels_train.csv' in file:
            tmp=pd.read_csv(os.path.join(img_path,'train',file))
            if(tmp.isnull().values.any()):
                print('Nan in ',file)
            if len(labels_train)==0:
                labels_train = tmp
            else:
                labels_train = labels_train.append(tmp, ignore_index=True)
                
labels_train = add_no_obstacles_stixels(labels_df=labels_train, percent = 2)
labels_train = labels_train[labels_train['Use_stixel'] == 1]
labels_train = labels_train.reset_index(drop=True)


In [15]:
sum_csv_path = os.path.join(img_path,'train', 'sum_csv')
if not os.path.exists(sum_csv_path):
    os.makedirs(sum_csv_path)
    print('new dir created: ')
    print(sum_csv_path)
    

In [16]:
labels_train.to_csv(os.path.join(sum_csv_path,'labels_train_b16.csv'))

In [None]:
labels_train=pd.read_csv(os.path.join(sum_csv_path,'labels_train.csv'))

In [17]:
idxs=list(range(len(labels_train)))
random.seed(481)
random.shuffle(idxs) 

batches_idx = [idxs[x:x+batch_size] for x in range(0, len(idxs), batch_size)]

In [None]:
def print_progress(count, total):
    # Percentage completion.
    pct_complete = float(count) / total

    # Status-message.
    # Note the \r which means the line should overwrite itself.
    msg = "\r- Progress: {0:.1%}".format(pct_complete)

    # Print it.
    sys.stdout.write(msg)
    sys.stdout.flush()

In [18]:
def wrap_int64(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [19]:
def wrap_bytes(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [20]:
def convert(image_paths, labels, out_path):
    # Args:
    # image_paths   List of file-paths for the images.
    # labels        Class-labels for the images.
    # out_path      File-path for the TFRecords output file.
    
    #print('')
    #print("Converting: " + out_path)
    
    # Number of images. Used when printing the progress.
    num_images = len(image_paths)
    
    # Open a TFRecordWriter for the output-file.
    with tf.python_io.TFRecordWriter(out_path) as writer:
        
        # Iterate over all the image-paths and class-labels.
        for i, (path, label) in enumerate(zip(image_paths, labels)):
            # Print the percentage-progress.
            #######print_progress(count=i, total=num_images-1)
            
            with open(path, 'rb') as f:
                img_raw = f.read()
           
            # Create a dict with the data we want to save in the
            # TFRecords file. You can add more relevant data here.
            data = \
                {
                    'image': wrap_bytes(img_raw),
                    'label': wrap_int64(label)
                } 

            # Wrap the data as TensorFlow Features.
            feature = tf.train.Features(feature=data)

            # Wrap again as a TensorFlow Example.
            example = tf.train.Example(features=feature)

            # Serialize the data.
            serialized = example.SerializeToString()
            
            # Write the serialized data to the TFRecords file.
            writer.write(serialized)
    return 

In [None]:
for batch in batches_idx:
    batch_labels=labels_train.loc[batch]
    batch_names_list=list(batch_labels['Name'])
    batch_labels=np.array(batch_labels['Label'])
    batch_image_paths_train=[]
    for name in batch_names_list:
        batch_image_paths_train.append(os.path.join(img_path, 'train', str(name)+'.png')) 
    batch_path_tfrecords_train = os.path.join(tfrec_train_directory, "train"+str(batch[0]).zfill(6)+".tfrecords") 
    convert(image_paths=batch_image_paths_train,
        labels=batch_labels,
        out_path=batch_path_tfrecords_train)

In [None]:
#VAL: make batches of different sizes

In [None]:
#run this cell only if sum_scv already exists:
labels_val=pd.read_csv(os.path.join(sum_csv_val_path,'labels_val.csv'))

In [None]:
val_batch_size = batch_size
test_batch_size = batch_size

In [None]:
val_idxs=list(range(len(labels_val)))
val_batches_idx = [val_idxs[x:x+val_batch_size] for x in range(0, len(val_idxs), val_batch_size)]

In [None]:
tfrec_val_directory = os.path.join('..','datasets','stixels','val','tfrec_batch_size_'+str(val_batch_size))
if os.path.exists(tfrec_val_directory):
    print('WARNIING: dir '+tfrec_val_directory+ ' already exists!')
if not os.path.exists(tfrec_val_directory):
    os.makedirs(tfrec_val_directory)

In [None]:
for batch in val_batches_idx:
    val_batch_labels=labels_val.loc[batch]
    val_batch_names_list=list(val_batch_labels['Name'])
    val_batch_labels=np.array(val_batch_labels['Label'])
    batch_image_paths_val=[]
    for name in val_batch_names_list:
        batch_image_paths_val.append(os.path.join(img_path, 'val', str(name)+'.png')) 
    batch_path_tfrecords_val = os.path.join(tfrec_val_directory, "val"+str(batch[0]).zfill(6)+".tfrecords") 
    convert(image_paths=batch_image_paths_val,
        labels=val_batch_labels,
        out_path=batch_path_tfrecords_val)

In [None]:
# #THIS CELL IS ONLY FOR TFRECS WITH NO BATCHES:
# val_names_list=list(labels_val['Name'])
# val_labels=np.array(labels_val['Label'])
# image_paths_val=[]
# for name in val_names_list:
#     image_paths_val.append(os.path.join(img_path, 'val', name + '.png')) 
# path_tfrecords_val = os.path.join(tfrec_val_directory, "val.tfrecords")

# convert(image_paths=image_paths_val,
#         labels=val_labels,
#         out_path=path_tfrecords_val)


In [None]:
#TEST:

In [None]:
#run this cell only if sum_scv already exists:
labels_test=pd.read_csv(os.path.join(sum_csv_test_path,'labels_test.csv'))

In [None]:
test_idxs=list(range(len(labels_test)))
test_batches_idx = [test_idxs[x:x+test_batch_size] for x in range(0, len(test_idxs), test_batch_size)]

In [None]:
tfrec_test_directory = os.path.join('..','datasets','stixels','test','tfrec_batch_size_'+str(test_batch_size))
if os.path.exists(tfrec_test_directory):
    print('WARNIING: dir '+tfrec_test_directory+ ' already exists!')
if not os.path.exists(tfrec_test_directory):
    os.makedirs(tfrec_test_directory)

In [None]:
for batch in test_batches_idx:
    test_batch_labels=labels_test.loc[batch]
    test_batch_names_list=list(test_batch_labels['Name'])
    test_batch_labels=np.array(test_batch_labels['Label'])
    batch_image_paths_test=[]
    for name in test_batch_names_list:
        batch_image_paths_test.append(os.path.join(img_path, 'test', str(name)+'.png')) 
    batch_path_tfrecords_test = os.path.join(tfrec_test_directory, "test"+str(batch[0]).zfill(6)+".tfrecords") 
    convert(image_paths=batch_image_paths_test,
        labels=test_batch_labels,
        out_path=batch_path_tfrecords_test)

In [None]:
# #THIS CELL IS ONLY FOR TFRECS WITH NO BATCHES:
# test_names_list=list(labels_test['Name'])
# test_labels=np.array(labels_test['Label'])
# image_paths_test=[]
# for name in test_names_list:
#     image_paths_test.append(os.path.join(img_path, 'test', name+'.png')) 
# path_tfrecords_test = os.path.join(tfrec_test_directory, "test.tfrecords")
# convert(image_paths=image_paths_test,
#         labels=test_labels,
#         out_path=path_tfrecords_test)