### IMPORT REQUIRED LIBRARIES

In [4]:
import pandas as pd
import matplotlib
matplotlib.use('agg')
import datetime
import matplotlib.dates as mdates
import os 

from models import build_deep_parse_model, build_basic_model
from data_generator import DataGen
from utils import train, create_graphs

Using TensorFlow backend.


###1. Prepration

#### Training parameters

In [9]:
experiment_dir = "../experiments/experiment_1"
data_dir = '../data'

experiment_type = "derived"
loss_function = "mape"
windows = [2, 6, 12]
model_names = ['LSTM', 'RNN', 'GRU', 'BLSTM', 'BRNN', 'BGRU']
resume = False
dropout = 0.2
verbose = 0

#For generating graphs
labels = ['Actual', 'LSTM', 'RNN', 'GRU', 'BLSTM', 'BRNN', 'BGRU']
graph_data = {}
graph_data[0] = ['*', 'black', 'solid'] #Actual
graph_data[1] = ['+', 'green', 'dashed']#LSTM
graph_data[2] = ['.', 'yellow', 'dashed']#RNN
graph_data[3] = ['^', 'black', 'dashdot']#GRU
graph_data[4] = ['o', 'orange', 'dashdot']#BLSTM
graph_data[5] = ['-', 'red', 'dotted']#BRNN
graph_data[6] = ['x', 'blue', 'dotted']#BGRU


info = f"Performed experiment with following parameters: {experiment_type} features, {loss_function} loss function, , dropout {dropout} \
Time_stamps {windows}, models {model_names}, resume {resume} on {datetime.datetime.now()}"

#### Making dirctories

In [7]:
weights_dir = os.path.join(experiment_dir , 'weights')
plots_dir = os.path.join(experiment_dir , 'plots')
results_dir = os.path.join(experiment_dir , 'results')
graphs_dir = os.path.join(experiment_dir , 'graphs')

if(not os.path.isdir(weights_dir)):
    os.makedirs(weights_dir)

if(not os.path.isdir(plots_dir)):
    os.makedirs(plots_dir)

if(not os.path.isdir(results_dir)):
    os.makedirs(results_dir)

if(not os.path.isdir(graphs_dir)):
    os.makedirs(graphs_dir)

with open(os.path.join(experiment_dir, "info.txt"), "w") as f:
  f.write(info)

### 2. Running experiments

In [None]:
for window in windows:
    data_path =  os.path.join(data_dir,f'{window} derived.csv')

    timestamps_done, models_done = [], []

    if(resume):
        #check for completed houses
        timestamps_done = np.asarray([file.split('.')[0] for file in os.listdir(results_dir)])

        #if the current house id is in completed houses skip it
        if(window in timestamps_done):
            print('{0} already done'.format(window))
            continue
        
        #check for compeleted experiments
        models_done = np.asarray([int(file.split('.')[0].split('_')[0]), file.split('.')[0].split('_')[1]] for file in os.listdir(weights_dir))

    print(f'Working on timestamp: {window}')

    #generate data with train,val,test split
    dataset = DataGen(data_path, [0.9, 0.1])

    train_x, train_y, train_d = dataset.get_data('train', window)  
    test_x, test_y, test_d = dataset.get_data('test', window)

    results = pd.DataFrame(columns=['MODEL_NAME', 'MAPE'])

    for model_name in model_names:
        if([window ,model_name] in models_done):
          print(f'{model_name} model on timestamp {window} already trained')

        if(experiment_type == "derived"):
            model = build_deep_parse_model(model_name, window, train_x.shape[2], train_d.shape[2], loss_function, dropout)
            train_data = [[train_x, train_d] , train_y]
            test_data =  [[test_x, test_d] , test_y]

        elif(experiment_type == "basic"):
            model = build_basic_model(model_name, window, train_x.shape[2], loss_function, dropout)
            train_data = [train_x, train_y]
            test_data =  [test_x, test_y] 

        else:
            raise ValueError(f'{experiment_type} not defined')

        weight_file = os.path.join(weights_dir , str(window) + '_' + model_name + '.h5')
        plot_file = os.path.join(plots_dir , str(window) + '_' + model_name + '.png')

        mape = train(model, train_data, test_data, weight_file, plot_file, doTrain = not resume, verbose = verbose)
        print(f"MAPE on {model_name} is {mape}")

        results = results.append([{'MODEL_NAME': model_name, 'MAPE': mape}])

    results.to_csv(os.path.join(results_dir , str(window) +".csv"))

Working on timestamp: 2
MAPE on LSTM is 25.577459051775502
MAPE on RNN is 25.62366256978083
MAPE on GRU is 25.48197845515432
MAPE on BLSTM is 25.447076462859876
MAPE on BRNN is 24.77983662554445
MAPE on BGRU is 25.085131510647766
Working on timestamp: 6
MAPE on LSTM is 26.000486536390582
MAPE on RNN is 25.3243698772961
MAPE on GRU is 25.335073448628847
MAPE on BLSTM is 25.98402111310999
MAPE on BRNN is 25.471511472390834
MAPE on BGRU is 24.642263992524132
Working on timestamp: 12
MAPE on LSTM is 25.817969588973284
MAPE on RNN is 25.871915061306943
MAPE on GRU is 25.17749569508923
MAPE on BLSTM is 26.62648853253311
MAPE on BRNN is 25.354322686080128
MAPE on BGRU is 25.196431729653558


###3 Generate Graphs

In [None]:
test_x, test_y, test_d = dataset.get_data('test', window)

In [None]:
test_x.shape

(1736, 12, 58)

In [13]:
dates = mdates.drange(datetime.datetime(2013, 2, 24, 0, 0), datetime.datetime(2013, 3, 2, 0, 0),  datetime.timedelta(minutes=30))

for window in windows:
    data_path =  os.path.join(data_dir,f'{window} derived.csv')
    dataset = DataGen(data_path, [0.9, 0.1])
    
    test_x, test_y, test_d = dataset.get_data('test', window)
    test_x = test_x[0:288]
    test_d = test_d[0:288]
    test_y = test_y[0:288]

    data = []
    data.append(test_y)

    for model_name in model_names:
    
        if(experiment_type == "derived"):
              model = build_deep_parse_model(model_name, window, test_x.shape[2], test_d.shape[2], loss_function, dropout)
              test_in = [test_x, test_d]

        elif(experiment_type == "basic"):
              model = build_basic_model(model_name, window, test_x.shape[2], loss_function, dropout)

        weight_file = os.path.join(weights_dir , str(window) + '_' + model_name + '.h5')
        model.load_weights(weight_file)

        data.append(model.predict(test_in))
    create_graphs(data, labels, graph_data, dates, os.path.join(graphs_dir, str(window)+".pdf"))