## Install PyDrive & Verify

In [7]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

[?25l[K     |▎                               | 10kB 18.6MB/s eta 0:00:01[K     |▋                               | 20kB 1.7MB/s eta 0:00:01[K     |█                               | 30kB 2.5MB/s eta 0:00:01[K     |█▎                              | 40kB 1.7MB/s eta 0:00:01[K     |█▋                              | 51kB 2.0MB/s eta 0:00:01[K     |██                              | 61kB 2.4MB/s eta 0:00:01[K     |██▎                             | 71kB 2.8MB/s eta 0:00:01[K     |██▋                             | 81kB 3.2MB/s eta 0:00:01[K     |███                             | 92kB 3.5MB/s eta 0:00:01[K     |███▎                            | 102kB 2.7MB/s eta 0:00:01[K     |███▋                            | 112kB 2.7MB/s eta 0:00:01[K     |████                            | 122kB 2.7MB/s eta 0:00:01[K     |████▎                           | 133kB 2.7MB/s eta 0:00:01[K     |████▋                           | 143kB 2.7MB/s eta 0:00:01[K     |█████                     

## Download dataset from drive

In [0]:
download = drive.CreateFile({'id': 'YOUR_FILE_ID'})
download.GetContentFile('self_driving_data.zip')

!unzip self_driving_data.zip

## Delete __MACOSX dir & List Files

In [10]:
rm -fr __MACOSX && ls

adc.json  data	sample_data  self_driving_data.zip


In [11]:
!cd data && ls

labels_car.csv	labels.csv  labels_trafficLight.csv  object-dataset


## Write Code & Flying

In [0]:
# helper

import csv
import cv2
from sklearn.utils import shuffle
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import os

deliver = 15
origin_size = (1920, 1200)
image_size = (int(origin_size[0] / deliver), int(origin_size[1] / deliver))


def augment_brightness_camera_images(image):
    result = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    random_bright = .25 + np.random.uniform()
    result[:, :, 2] = result[:, :, 2] * random_bright
    result = cv2.cvtColor(result, cv2.COLOR_HSV2RGB)

    return result


def filter_label(label):
    csv_file = csv.reader(open("data/labels.csv"))
    out = open('data/labels_{}.csv'.format(label), 'a', newline='')
    csv_write = csv.writer(out, dialect='excel')
    for csv_line in csv_file:
        csv_lines = csv_line[0].split(' ')
        if csv_lines[6] == '"{}"'.format(label):
            csv_lines[6] = csv_lines[6].replace("\"", "")
            if len(csv_lines) >= 8:
                csv_lines[7] = csv_lines[7].replace("\"", "")
            csv_write.writerow(csv_lines)


def filter_car():
    filter_label("car")


def filter_tl():
    filter_label("trafficLight")


def merge_image(img, mask, image_size=image_size):
    img = cv2.resize(img, image_size, interpolation=cv2.INTER_CUBIC)

    im = np.array(255 * mask, dtype=np.uint8)
    im = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB)
    im[:, :, 1:3] = 0 * im[:, :, 1:2]

    return cv2.addWeighted(im, 0.4, img, 1.0, 0)


def mask_image(bb_boxes, img, color):
    result = np.zeros_like(img[:, :, 0])

    for i in range(len(bb_boxes)):
        bb_box = bb_boxes[i]
        result[bb_box[0]:bb_box[1], bb_box[2]:bb_box[3]] = color

    result = np.reshape(result, (np.shape(result)[0], np.shape(result)[1], 1))

    return result


def get_batch(file_name):
    data = glob("data/object-dataset/*.jpg")

    def get_batch_fn(batch_size):
        csv_file = csv.reader(open(file_name))
        csv_lines = []
        for csv_line in csv_file:
            csv_lines.append(csv_line)

        for batch_i in range(0, len(data), batch_size):
            origin_images = []
            images = []
            masked = []

            for image_path in data[batch_i:batch_i+batch_size]:
                bb_boxes = []
                image_name = os.path.basename(image_path)

                lines = [line for line in csv_lines if line[0] == image_name]

                for line in lines:
                    bb_box = [int(int(line[2]) / deliver), int(int(line[4]) / deliver),
                              int(int(line[1]) / deliver), int(int(line[3]) / deliver)]
                    bb_boxes.append(bb_box)

                if len(bb_boxes) == 0:
                    continue

                image = cv2.imread(image_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                image = cv2.resize(image, image_size, interpolation=cv2.INTER_CUBIC)
                image_normalizer = image / 127.5 - 1.0

                origin_images.append(image)
                images.append(image_normalizer)

                masked_image = mask_image(bb_boxes, image_normalizer, 1)
                masked.append(masked_image)

                aug_img = augment_brightness_camera_images(image)
                aug_image_normalizer = aug_img / 127.5 - 1.0

                origin_images.append(aug_img)
                images.append(aug_image_normalizer)

                masked_image = mask_image(bb_boxes, aug_image_normalizer, 1)
                masked.append(masked_image)

            yield shuffle(np.array(origin_images), np.array(images), np.array(masked))

    return get_batch_fn

In [0]:
# unet model

import tensorflow as tf


def build_model():
    inputs = tf.placeholder(tf.float32, shape=[None, image_size[1], image_size[0], 3])

    conv_1_layer1 = tf.layers.conv2d(inputs, filters=8, kernel_size=(3, 3), padding='same', activation='relu')
    conv_1_layer2 = tf.layers.conv2d(conv_1_layer1, filters=8, kernel_size=(3, 3), padding='same', activation='relu')
    pool_1_layer3 = tf.layers.max_pooling2d(conv_1_layer2, pool_size=(2, 2), strides=(2, 2))

    conv_2_layer1 = tf.layers.conv2d(pool_1_layer3, filters=16, kernel_size=(3, 3), padding='same', activation='relu')
    conv_2_layer2 = tf.layers.conv2d(conv_2_layer1, filters=16, kernel_size=(3, 3), padding='same', activation='relu')
    pool_2_layer3 = tf.layers.max_pooling2d(conv_2_layer2, pool_size=(2, 2), strides=(2, 2))

    conv_3_layer1 = tf.layers.conv2d(pool_2_layer3, filters=32, kernel_size=(3, 3), padding='same', activation='relu')
    conv_3_layer2 = tf.layers.conv2d(conv_3_layer1, filters=32, kernel_size=(3, 3), padding='same', activation='relu')
    pool_3_layer3 = tf.layers.max_pooling2d(conv_3_layer2, pool_size=(2, 2), strides=(2, 2))

    conv_4_layer1 = tf.layers.conv2d(pool_3_layer3, filters=64, kernel_size=(3, 3), padding='same', activation='relu')
    conv_4_layer2 = tf.layers.conv2d(conv_4_layer1, filters=64, kernel_size=(3, 3), padding='same', activation='relu')
    pool_4_layer3 = tf.layers.max_pooling2d(conv_4_layer2, pool_size=(2, 2), strides=(2, 2))

    conv_5_layer1 = tf.layers.conv2d(pool_4_layer3, filters=128, kernel_size=(3, 3), padding='same', activation='relu')
    conv_5_layer2 = tf.layers.conv2d(conv_5_layer1, filters=128, kernel_size=(3, 3), padding='same', activation='relu')

    up6 = tf.layers.conv2d_transpose(conv_5_layer2, filters=64, kernel_size=(2, 2), strides=(2, 2), padding='same', activation='relu')
    c6 = tf.concat(values=[up6, conv_4_layer2], axis=-1)
    conv_6_layer3 = tf.layers.conv2d(c6, filters=64, kernel_size=(3, 3), padding='same', activation='relu')
    conv_6_layer4 = tf.layers.conv2d(conv_6_layer3, filters=64, kernel_size=(3, 3), padding='same', activation='relu')

    up7 = tf.layers.conv2d_transpose(conv_6_layer4, filters=32, kernel_size=(2, 2), strides=(2, 2), padding='same', activation='relu')
    c7 = tf.concat(values=[up7, conv_3_layer2], axis=-1)
    conv_7_layer3 = tf.layers.conv2d(c7, filters=32, kernel_size=(3, 3), padding='same', activation='relu')
    conv_7_layer4 = tf.layers.conv2d(conv_7_layer3, filters=32, kernel_size=(3, 3), padding='same', activation='relu')

    up8 = tf.layers.conv2d_transpose(conv_7_layer4, filters=16, kernel_size=(2, 2), strides=(2, 2), padding='same', activation='relu')
    c8 = tf.concat(values=[up8, conv_2_layer2], axis=-1)
    conv_8_layer3 = tf.layers.conv2d(c8, filters=16, kernel_size=(3, 3), padding='same', activation='relu')
    conv_8_layer4 = tf.layers.conv2d(conv_8_layer3, filters=16, kernel_size=(3, 3), padding='same', activation='relu')

    up9 = tf.layers.conv2d_transpose(conv_8_layer4, filters=8, kernel_size=(2, 2), strides=(2, 2), padding='same', activation='relu')
    c9 = tf.concat(values=[up9, conv_1_layer2], axis=-1)
    conv_9_layer3 = tf.layers.conv2d(c9, filters=8, kernel_size=(3, 3), padding='same', activation='relu')
    conv_9_layer4 = tf.layers.conv2d(conv_9_layer3, filters=8, kernel_size=(3, 3), padding='same', activation='relu')

    logits = tf.layers.conv2d(conv_9_layer4, filters=1, kernel_size=(1, 1), padding='same')
    outputs = tf.nn.sigmoid(logits)

    return inputs, logits, outputs



In [0]:
# train unet model

import tensorflow as tf
import os
from tensorflow.python.client import device_lib

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "99"
print(device_lib.list_local_devices())

batch_size = 512
epochs = 200


def iou(y_pred, y_true):
    H, W, _ = y_pred.get_shape().as_list()[1:]

    pred_flat = tf.reshape(y_pred, [-1, H * W])
    true_flat = tf.reshape(y_true, [-1, H * W])

    intersection = 2 * tf.reduce_sum(pred_flat * true_flat, axis=1) + 1e-7
    denominator = tf.reduce_sum(
        pred_flat, axis=1) + tf.reduce_sum(
            true_flat, axis=1) + 1e-7

    return tf.reduce_mean(intersection / denominator)


def iou_loss(y_pred, y_true):
    return -iou(y_pred, y_true)


inputs, logits, outputs = build_model()
labels = tf.placeholder(tf.float32, shape=(None, image_size[1], image_size[0], 1))

# loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
loss = iou_loss(outputs, labels)
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)

batch_fn = get_batch("data/labels_car.csv")

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        print("epoch:{}/{}".format(epoch, epochs))

        for _, im, lb in batch_fn(batch_size):
            _, loss_value = sess.run([optimizer, loss], feed_dict={inputs: im, labels: lb})
            print("loss:{}".format(loss_value))

        saver.save(sess, "result/check_point_{}.ckpt".format(epoch))
        print("save check point {}".format(epoch))



[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14906923346909726069
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 2606429132171298281
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 6381673875727344767
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11276946637
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7617718501660930686
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"
]
epoch:0/200
loss:-0.10577987134456635
loss:-0.10439885407686234
loss:-0.0925341546535492
loss:-0.10404960811138153
loss:-0.09765829145908356
loss:-0.0945257917046547
loss:-0.09531645476818085
loss:-0.09741801768541336
loss:-0.09682949632406235
loss:-0.10026245564222336
loss:-0.09767024964094162
loss:-