In [217]:
import csv
import cv2
import numpy as np
import sklearn
import os

def load_samples(file):
    '''
    Load driving_log generated by the simulator.
    '''
    samples = []
    with open(file) as csvfile:
        reader = csv.reader(csvfile)
        for line in reader:
            samples.append(line)
    
    return samples

def generator(samples, batch_size=16, grayscale=False, 
              img_folder='../../data/IMG_udacity/', sequence_length=10):
    '''
    Generate batches in a memory-efficient way. 
    '''
    num_samples = len(samples)
    count = 0
    
    while 1: # Loop forever so the generator never terminates
        
        sklearn.utils.shuffle(samples)
        
        for offset in range(0, num_samples, batch_size):
            
            images = []
            angles = []
            
            for offset_seq in range(offset, offset + batch_size):
                
                #print(offset, offset_seq)
                #count += 1
                seq_samples = samples[offset_seq:offset_seq+sequence_length]
                
                #print(len(seq_samples))
                
                images_seq = []
                angles_seq = []

                for seq_sample in seq_samples:
                    name = os.path.join(img_folder, seq_sample[0].split('/')[-1])
                    center_image = cv2.imread(name)

                    if grayscale:
                        center_image = cv2.cvtColor(center_image, cv2.COLOR_BGR2GRAY)
                        center_image = center_image.reshape(center_image.shape + (1,))

                    try:
                        #center_angle = float(seq_sample[3])
                        center_angle = int((float(seq_sample[3]) + 1.) * 49)
                    except ValueError:
                        print("value is not a number")
                        continue

                    # resize image
                    center_image = cv2.resize(center_image, None, 
                                              fx=0.125, fy=0.125, 
                                              interpolation=cv2.INTER_CUBIC)

                    '''if count % 2 == 0: # Flipping Images And Steering Measurements
                        center_image = np.fliplr(center_image)
                        center_angle = -center_angle'''

                    images_seq.append(center_image)
                    angles_seq.append(center_angle)
                
                #print(len(angles_seq))
                
                if len(angles_seq) == 0:
                    break
                
                # padding
                padding = angles_seq[-1]
                for i in range(abs(len(angles_seq) - sequence_length)):
                    angles_seq.append(padding)
                
                images.append(images_seq)
                angles.append(angles_seq)
                
            # trim image to only see section with road
            images = np.array(images)
            angles = np.array(angles)
            #print(angles.shape)
            #angles = angles.reshape(angles.shape[0], angles.shape[1], -1)
            #yield sklearn.utils.shuffle(X_train, y_train)
            #yield (images, angles)
            yield (angles, angles)

In [218]:
samples = load_samples('../../data/driving_log_udacity.csv')
angle_generator = generator(samples)
next(angle_generator)[0].shape

(16, 10)

In [238]:
from keras.layers import LSTM, RepeatVector, Input, Dropout, Embedding
from keras.models import Model

SEQUENCE_LENGTH = 5
EMBEDDING_SHAPE = 50
VOCAB_SIZE = 100

inputs = Input(shape=(SEQUENCE_LENGTH,))

embedding = Embedding(VOCAB_SIZE, EMBEDDING_SHAPE, 
                      input_length=SEQUENCE_LENGTH,
                      #weights=[embeddings], 
                      mask_zero=False)(inputs)
#embedding.trainable = False'''

encoded = LSTM(300, 
               input_shape=(None, EMBEDDING_SHAPE),
               return_sequences=True)(embedding)

encoded = LSTM(EMBEDDING_SHAPE)(encoded)

decoded = RepeatVector(SEQUENCE_LENGTH)(encoded)
decoded = LSTM(EMBEDDING_SHAPE, return_sequences=True)(decoded)

#decoded = Dropout(0.2)(decoded)

decoded = LSTM(EMBEDDING_SHAPE, return_sequences=True)(decoded)

model = Model(inputs, decoded)
encoder = Model(inputs, encoded)

model.compile(loss='cosine_proximity', optimizer='adam')
#model.compile(loss='mse', optimizer='adam')

model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_33 (InputLayer)            (None, 5)             0                                            
____________________________________________________________________________________________________
embedding_9 (Embedding)          (None, 5, 50)         5000        input_33[0][0]                   
____________________________________________________________________________________________________
lstm_93 (LSTM)                   (None, 5, 300)        421200      embedding_9[0][0]                
____________________________________________________________________________________________________
lstm_94 (LSTM)                   (None, 50)            70200       lstm_93[0][0]                    
___________________________________________________________________________________________

In [222]:
from sklearn.metrics.pairwise import cosine_similarity

def get_most_similar(query, vecs, vocabulary=None, k=3):
    cosine = cosine_similarity(query.reshape(1,-1), vecs)[0]
    max_idxs = np.array(cosine).argsort()[::-1]
    #mask = np.in1d(max_idxs, list(vocabulary.keys()))
    #max_idxs = max_idxs[mask][:k]
    max_idxs = max_idxs[:k]
    #print('result', max_idxs)
    #return ['' if vocabulary.get(idx) == '</PAD>' else vocabulary.get(idx) for idx in max_idxs]
    return max_idxs

In [239]:
from sklearn.model_selection import train_test_split
from keras.callbacks import CSVLogger

BATCH_SIZE = 32
NUM_EPOCHS = 1000

# read driving_log.csv
samples = load_samples('../../data/driving_log_udacity.csv')

# train/validation split
train_samples, validation_samples = train_test_split(samples, test_size=0.2)

# train/validation generators
train_generator = generator(train_samples, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH)
validation_generator = generator(validation_samples, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH)


print('training...')
    
#csv_logger = CSVLogger('./training.log')

for iteration in range(1, NUM_EPOCHS):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    
    X, y = next(train_generator)
    X_val, y_val = next(validation_generator)

    model.fit(X, np.array([model.layers[1].get_weights()[0][xt] for xt in X]), 
              batch_size=BATCH_SIZE, nb_epoch=1,
              validation_data=(X_val, np.array([model.layers[1].get_weights()[0][xt] for xt in X_val])), 
              verbose=1)
    
    # Select samples from the validation set at random so we can visualize errors.
    for i in range(1):

        ind = np.random.randint(0, len(X_val))
        test = np.array(X_val[ind]).reshape(1, SEQUENCE_LENGTH)

        print('Orig:', test.reshape(SEQUENCE_LENGTH).tolist())
        
        #preds = autoencoder.predict(rowx, verbose=0)
        pred = model.predict(test, verbose=0)
        
        #print(get_most_similar(pred[0][0], model.layers[1].get_weights()[0])[0])
        #print('Prediction:', pred[0].reshape(SEQUENCE_LENGTH)[:5])
        
        print('Pred:', [get_most_similar(word_vec, 
                                             model.layers[1].get_weights()[0])[0] for word_vec in pred[0]])

# training
'''history = model.fit_generator(train_generator, 
                              samples_per_epoch=len(train_samples), 
                              validation_data=validation_generator, 
                              nb_val_samples=len(validation_samples), 
                              nb_epoch=NUM_EPOCHS)'''
# save model
#model.save("model-autoencoder.h5", True)

training...

--------------------------------------------------
Iteration 1
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49, 49, 49, 70, 49]
Pred: [49, 49, 49, 49, 49]

--------------------------------------------------
Iteration 2
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49, 49, 49, 49, 46]
Pred: [49, 49, 49, 49, 49]

--------------------------------------------------
Iteration 3
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49, 49, 49, 49, 49]
Pred: [49, 49, 49, 49, 49]

--------------------------------------------------
Iteration 4
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49, 49, 50, 49, 49]
Pred: [49, 49, 49, 49, 49]

--------------------------------------------------
Iteration 5
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49, 49, 49, 39, 49]
Pred: [49, 49, 49, 49, 49]

--------------------------------------------------
Iteration 6
Train on 32 samples, validate on 32 samples
Epoch 1/1
Orig: [49,

'history = model.fit_generator(train_generator, \n                              samples_per_epoch=len(train_samples), \n                              validation_data=validation_generator, \n                              nb_val_samples=len(validation_samples), \n                              nb_epoch=NUM_EPOCHS)'

In [231]:
import pandas as pd
data = pd.read_csv('../../data/driving_log_udacity.csv')

In [236]:
data.describe()

Unnamed: 0,0,0.1,0.2,22.14829
count,8035.0,8035.0,8035.0,8035.0
mean,0.00407,0.869768,0.00197,28.170589
std,0.128848,0.301188,0.036567,6.149343
min,-0.942695,0.0,0.0,0.50249
25%,0.0,0.985533,0.0,30.1831
50%,0.0,0.985533,0.0,30.1864
75%,0.0,0.985533,0.0,30.18664
max,1.0,0.985533,1.0,30.70936
