In [1]:
# encoding: utf-8

import re
import string
import numpy as np
import os
from os.path import dirname, abspath, join
from keras.layers.recurrent import GRU
from keras.layers.wrappers import TimeDistributed
from keras.models import Sequential, model_from_json
from keras.layers.core import Dense, RepeatVector
from src import feagen

Using TensorFlow backend.


In [2]:
train_dates = ('2016-09-19', '2016-10-10')
train1_dates = ('2016-09-19', '2016-09-30')
train2_dates = ('2016-10-08', '2016-10-10')
valid_dates = ('2016-10-11', '2016-10-17')
test_dates = ('2016-10-18', '2016-10-24')

vol_tolls = (1, 1, 2, 3, 3)
vol_dires = (0, 1, 0, 0, 1)
toll_dire = zip(vol_tolls, vol_dires)

tra_intes = ('A', 'A', 'B', 'B', 'C', 'C')
tra_tolls = (2, 3, 1, 3, 1, 3)
inte_toll = zip(tra_intes, tra_tolls)

try:
    PROJECT_ROOT = dirname(abspath(__file__))
except:
    PROJECT_ROOT = dirname(abspath(os.getcwd()))

MODEL_PATH = '.'
MODEL_STRUCT_FILE = 'seq2seq_struct.json'
MODEL_WEIGHTS_FILE = 'seq2seq_weights.h5'

# MAX_INPUT_LEN = 18
# MAX_OUTPUT_LEN = 20
BATCH_SIZE = 128

feat = None
feat_test = None

In [3]:
feat = feagen.Features(
    '../dataSets/training/',
    'weather (table 7)_training.csv',
    'volume(table 6)_training.csv',
    'trajectories(table 5)_training.csv'
)

Reading files...
Finish reading files.


In [32]:
# am/pm separated

def build_data(ampm, use_all, predict_all):
    global feat
    if not feat:
        feat = feagen.Features(
            '../dataSets/training/',
            'weather (table 7)_training.csv',
            'volume(table 6)_training.csv',
            'trajectories(table 5)_training.csv'
        )

    X_train, y_train = feat.get_tra_Xy(dates=train_dates, ampm=ampm, normalize=True, window_onehot=False, use_all=use_all)
    X_valid, y_valid = feat.get_tra_Xy(dates=valid_dates, ampm=ampm, normalize=True, window_onehot=False, use_all=use_all)

    # D P W F : Date/Prediction/Window/Feature

    X_train = X_train[:, 14:-6]
    # PD, PWF

    PD, PWF = X_train.shape
    P = len(inte_toll) if use_all else 1
    W = 6
    F = PWF / P / W
    
    X_train = X_train.reshape((PD, P, W, F))
    # PD, P, W, F

    X_train = X_train.swapaxes(1, 2)
    # PD, W, P, F

    X_train = X_train.reshape((PD, W, P*F))
    # PD, W, PF

    PDW = y_train.shape[0]
    D = PDW / P / W
    
    if not predict_all:
        y_train = y_train.reshape((PDW / W, W, 1))
        # PD, W, 1
    else:
        X_train = X_train[:D, :, :]
        # D, W, PF
        
        y_train = y_train.reshape((P, D, W))
        # P, D, W
        
        y_train = y_train.swapaxes(0, 1).swapaxes(1, 2)
        # D, W, P
    
    Xshape = X_train.shape
#     print (Xshape[0], Xshape[1], Xshape[2]/2), y_train.shape
        
    return X_train, X_valid, y_train, y_valid


def build_model_from_file(struct_file, weights_file):
    model = model_from_json(open(struct_file, 'r').read())
    model.compile(loss="mse", optimizer='adam')
    model.load_weights(weights_file)

    return model


def build_model(input_size, seq_len, hidden_size):
    model = Sequential()
#     model.add(GRU(input_dim=input_size, input_length=MAX_INPUT_LEN, output_dim=hidden_size, return_sequences=False))
    model.add(Dense(hidden_size, activation="relu"))
    model.add(RepeatVector(seq_len))
    model.add(GRU(hidden_size, return_sequences=True))
    model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear")))
    model.compile(loss="mse", optimizer='adam')

    return model


def save_model_to_file(model, struct_file, weights_file):
    model_struct = model.to_json()
    open(struct_file, 'w').write(model_struct)

    model.save_weights(weights_file, overwrite=True)


def train(epoch=100, model_path=join(PROJECT_ROOT, MODEL_PATH)):
    X_train, X_valid, y_train, y_valid = build_data()

#     model = build_model(CHAR_NUM, MAX_OUTPUT_LEN, hidden_size=128)
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=BATCH_SIZE, nb_epoch=epoch)

    struct_file = join(model_path, MODEL_STRUCT_FILE)
    weights_file = join(model_path, MODEL_WEIGHTS_FILE)
    save_model_to_file(model, struct_file, weights_file)


def test(model_path=join(PROJECT_ROOT, MODEL_PATH)):
    if not feat_test:
        feat_test = feagen.Features(
            '../dataSets/testing_phase1/',
            'weather (table 7)_test1.csv',
            'volume(table 6)_test1.csv',
            'trajectories(table 5)_test1.csv'
        )
    
    struct_file = join(model_path, MODEL_STRUCT_FILE)
    weights_file = join(model_path, MODEL_WEIGHTS_FILE)
    model = build_model_from_file(struct_file, weights_file)
    
#     x = np.zeros((1, MAX_INPUT_LEN, CHAR_NUM), dtype=int)
#     word = BEGIN_SYMBOL + word.lower().strip() + END_SYMBOL
#     x[0] = vectorize(word, MAX_INPUT_LEN, CHAR_NUM)

#     pred = model.predict(x)[0]
#     print ''.join([
#         INDICES_TO_CHAR[i] for i in pred.argmax(axis=1)
#         if INDICES_TO_CHAR[i] not in (BEGIN_SYMBOL, END_SYMBOL)
#     ])

def main():
    pass
#     train(epoch=10)
#     test()

if __name__ == '__main__':
    main()

In [33]:
use_predict = (
    ('A', True, True),
    ('B', True, False),
    ('C', False, False)
)

# model A (6->6) :     use_all,     predict_all : ( 22, 6, 6*2) ( 22, 6, 6)
# model B (6->1) :     use_all, not predict_all : (132, 6, 6*2) (132, 6, 1)
# model C (1->1) : not use_all, not predict_all : (132, 6, 1*2) (132, 6, 1)

for model, use_all, predict_all in use_predict:
    print model,
    build_data(ampm='am', use_all=use_all, predict_all=predict_all)


A (22, 6, 6) (22, 6, 6)
B (132, 6, 6) (132, 6, 1)
C (132, 6, 1) (132, 6, 1)
