In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import sys
import re
import warnings
import itertools
from tensorflow.keras.regularizers import l2
from sklearn.metrics import mean_absolute_error, explained_variance_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer
warnings.simplefilter(action='ignore', category=FutureWarning)
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv1D, MaxPooling1D, Reshape
from tensorflow.keras.layers import AveragePooling1D, SeparableConv2D, Activation, concatenate, Conv2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import KLDivergence
from tensorflow.keras.preprocessing.sequence import pad_sequences
warnings.resetwarnings()

In [None]:
def country_groupby(df):
    return [df[df.location==country].index for country in df.location.unique()]

def country_search(df, country):
    return df[df.location==country].index

def column_search(df, name, return_style='loc', threshold='contains'):
    if threshold=='contains':
        func = df.columns.str.contains
    else:
        func = df.columns.str.match
        
    if return_style == 'loc':
        return df.columns[func(name)]
    elif return_style== 'iloc':
        return np.where(func(name))[0]
    else:
        return None

def create_Xy(model_data, start_date, frame_size, n_days_into_future, n_countries):
    # can't include the max date because need at least 1 day in future to predict. +1 because of how range doesn't include endpoint
    for max_date_in_window in range(start_date, model_data.time_index.max() - n_days_into_future + 1):
        # Take all model_data with date proxy less than numerical value, leading_window_date_not_included
        frame_data = model_data[(model_data.time_index <= max_date_in_window) & (model_data.time_index > max_date_in_window-frame_size)]
        #     print(frame_data.shape)
        # Reshape the array such that each element along axis=0 is a time series of all feature model_data of a specific country.
        reshaped_frame_data = frame_data.values.reshape(n_countries, frame_size, -1)
        #     print(reshaped_frame_data.shape)
        # Truncate / pad the windows along the "time" axis, axis=1. (pad_sequences takes in an iterable of iterables;
        # the first axis is always the default iteration axis. 
        # *********************** WARNING: pad_sequences converts to integers by default *********************
        resized_frame_data = pad_sequences(reshaped_frame_data, maxlen=frame_size, dtype=np.float64)
        frame_data_4D = resized_frame_data[np.newaxis, :, :, :]
        if max_date_in_window == start_date:
            X = frame_data_4D.copy()
        else:
            X = np.concatenate((X, frame_data_4D),axis=0)
    y = model_data.new_cases_weighted.values.reshape(-1, model_data.time_index.nunique()).transpose()[-X.shape[0]:,:]
    return X, y


def split_and_normalize_Xy(X, y, n_time_steps, n_validation_frames, n_test_frames, date_normalization=True,
                          train_test_only=False):
    """ Split into training, validation and test data.
    """
    # Note that the last frame (date_range) that exists in X has already been determined by the choice of the number
    # of steps to predict in the future, this is only slicing the frames. 
    if train_test_only:
        X_train= X[:-n_test_frames,:,:,:]
        y_train =  y[:-n_test_frames,:]
        X_test = X[-n_test_frames:, :, :, :] 
        y_test = y[-n_test_frames:, :]
    else:
        X_train= X[:-(n_validation_frames+n_test_frames),:,:,:]
        y_train =  y[:-(n_validation_frames+n_test_frames),:]
        X_validate = X[-(n_validation_frames+n_test_frames):-n_test_frames, :, :, :]
        y_validate = y[-(n_validation_frames+n_test_frames):-n_test_frames, :]
        X_test = X[-n_test_frames:, :, :, :] 
        y_test = y[-n_test_frames:, :]

    X_means = X_train.mean(axis=(1,2))
    X_stds = X_train.std(axis=(1,2))

    # To avoid division by zero. This is a big assumption but this typically occurs when the frame's feature
    # value is identically zero, which would result in x-x_mean / x_std = 0 / 1 = 0. So it doesn't matter what 
    # the x_std value is changed to as they are always divided into 0.
    X_stds[np.where(X_stds==0.)] = 1

#     # First two features are time_index and time_index (days_since_first_case)
    if date_normalization==False:
        X_means[:,:2] = 0
        X_stds[:, :2] = 1

    # To encapsulate the time-dependent nature of the problem and ignore the dramatic difference between current
    # and initial behavior, only rescale the validation and testing frames by the most recent frame's values.
    # There is only a single value per feature in this case, meaning that to rescale, the values need to
    # be repeated for each validation, test frame for each country for each timestep.
    latest_training_mean = X_means[-1,:][np.newaxis, np.newaxis, np.newaxis, :]
    latest_training_std = X_stds[-1,:][np.newaxis, np.newaxis, np.newaxis, :]
    latest_training_std[np.where(latest_training_std==0)] = 1
    
    if train_test_only:
    # Normalize the training data by each frame's specific mean and std deviation. 
        X_train_means = np.tile(X_means[:, np.newaxis, np.newaxis, :],
                                (1, n_countries, n_time_steps, 1))
        X_train_stds =  np.tile(X_stds[:, np.newaxis, np.newaxis, :],
                                (1, n_countries, n_time_steps, 1))                
        X_test_means = np.tile(latest_training_mean, 
                               (X_test.shape[0],X_test.shape[1],X_test.shape[2],1))
        X_test_stds = np.tile(latest_training_std, 
                              (X_test.shape[0],X_test.shape[1],X_test.shape[2],1))   
        
        X_train = ((X_train - X_train_means) /  X_train_stds)
        X_test = ((X_test - X_test_means) /  X_test_stds)
        
        splits =  (X_train, y_train, X_test, y_test)
        normalizing_values = (X_train_means, X_train_stds, X_test_means, X_test_stds)
    else:
        X_train_means = np.tile(X_means[:, np.newaxis, np.newaxis, :],
                                (1, n_countries, n_time_steps, 1))
        X_train_stds =  np.tile(X_stds[:, np.newaxis, np.newaxis, :],
                                (1, n_countries, n_time_steps, 1))                
        X_validate_means = np.tile(latest_training_mean, 
                                   (X_validate.shape[0],X_validate.shape[1],X_validate.shape[2],1))
        X_validate_stds = np.tile(latest_training_std, 
                                  (X_validate.shape[0],X_validate.shape[1],X_validate.shape[2],1))
        X_test_means = np.tile(latest_training_mean, 
                               (X_test.shape[0],X_test.shape[1],X_test.shape[2],1))
        X_test_stds = np.tile(latest_training_std, 
                              (X_test.shape[0],X_test.shape[1],X_test.shape[2],1))    

        X_train = ((X_train - X_train_means) /  X_train_stds)
        X_validate = ((X_validate - X_validate_means) / X_validate_stds)
        X_test = ((X_test - X_test_means) /  X_test_stds)


        splits =  (X_train, y_train, X_validate, y_validate,
                   X_test, y_test)
        normalizing_values = (X_train_means, X_train_stds, X_validate_means, X_validate_stds,
                              X_test_means, X_test_stds)
                          
    return splits, normalizing_values

def concatenate_4d_into_3d(splits, train_test_only=False):
    
    if train_test_only:
        (X_train, y_train, X_test, y_test) = splits
        X_train = np.concatenate(X_train, axis=0)
        y_train = np.concatenate(y_train, axis=0)
        X_test = np.concatenate(X_test, axis=0)
        y_test = np.concatenate(y_test, axis=0)
        concat_splits = (X_train, y_train, X_test, y_test) 
    else:
        (X_train, y_train, X_validate, y_validate, X_test, y_test) = splits
        X_train = np.concatenate(X_train, axis=0)
        y_train = np.concatenate(y_train, axis=0)
        X_validate = np.concatenate(X_validate, axis=0)
        y_validate = np.concatenate(y_validate, axis=0)
        X_test = np.concatenate(X_test, axis=0)
        y_test = np.concatenate(y_test, axis=0)
        concat_splits = (X_train, y_train, X_validate, y_validate, X_test, y_test) 
    return concat_splits

def transpose_for_separable2d(splits, train_test_only=False):
    if train_test_only:
        (X_train, y_train, X_test, y_test) = splits
        X_train = np.transpose(X_train, axes=[0,2,1,3])
        X_test = np.transpose(X_test, axes=[0,2,1,3])
        transpose_split = (X_train, y_train, X_test, y_test) 
    else:
        (X_train, y_train, X_validate, y_validate, X_test, y_test) = splits
        X_train = np.transpose(X_train, axes=[0,2,1,3])
        X_validate = np.transpose(X_validate, axes=[0,2,1,3])
        X_test = np.transpose(X_test, axes=[0,2,1,3])
        transpose_split = (X_train, y_train, X_validate, y_validate, X_test, y_test) 
    return transpose_split

    
def true_predict_plot(y_test, y_naive, y_predict, title=''):
    fig, axes = plt.subplots(1, 2, figsize=(20,5))
    ymax = np.max([y_test.max(), y_predict.max()])
    axes[0].scatter(y_test, y_naive, s=5)
    axes[0].plot([0, ymax], [0, ymax])

    axes[1].scatter(y_test, y_predict, s=5)
    axes[1].plot([0, ymax], [0, ymax])

    axes[0].set_xlabel('True value')
    axes[0].set_ylabel('Predicted value')
    axes[0].set_title('Naive model')

    axes[1].set_xlabel('True value')
    axes[1].set_ylabel('Predicted value')
    axes[1].set_title('CNN model')
    plt.show()
    return None

def residual_plot(y_test,y_predict,title='', ax=None):
    if ax is None:
        fig, ax = plt.subplots()
    ax.scatter(y_test, y_test-y_predict.ravel(), s=5)
    ax.set_ylabel('Residual')
    ax.set_xlabel('True value')
    ax.grid(True)
#     plt.show()
    return None

def residual_diff_plots(y_naive, y_predict, y_true, n_test_frames,n_days_into_future, n_countries):
    
    fig, axes = plt.subplots(2, 2, figsize=(20,5), sharey=True)
    (ax1,ax2,ax3,ax4) = axes.flatten()
    for i in range(n_test_frames):
        xrange = range(n_countries*i, n_countries*(i+1))
        ax1.plot(xrange, y_true.reshape(-1,n_countries)[i,:]-y_naive.reshape(-1,n_countries)[i,:])
        ax2.plot(xrange, y_true.reshape(-1,n_countries)[i,:]-y_predict.reshape(-1,n_countries)[i,:])
    fig.suptitle('{}-day-into-future predictions'.format(n_days_into_future))
    ax1.set_title('True minus Naive baseline')
    ax2.set_title('True minus CNN')
    residual_plot(y_true,y_naive,title='Naive residual',ax=ax3)
    residual_plot(y_true,y_predict,title='CNN residual',ax=ax4)
    plt.show()

def n_step_model_predictions(model_data, model_generator, frame_size, start_date, n_countries,
                             n_validation_frames, n_test_frames, predict_steps, f, k, epochs, batch_size,
                             train_test_only=False, Xy_truncation=None):
    
    """ wrapper for iteration loop 
    
    data : DataFrame of very specific make
    
    model : one of my custom models, sequential_Conv1D_model, SeparableConv2D_model, parallel_Conv1D_model
    
    
    
    """
    new_cases_weighted_index = column_search(model_data,'new_cases_weighted', return_style='iloc')[0]#-n_pruned
    prediction = []
    naive = []
    test = []
    mae_naive_list = []
    mae_predict_list = []
    model_list = []

    for n_days_into_future in predict_steps:
        X, y = create_Xy(model_data, start_date, frame_size, n_days_into_future, n_countries)
        if Xy_truncation is not None:
            X = X[:Xy_truncation,:,:,:]
            y = y[:Xy_truncation,:]
        splits, normalizing = split_and_normalize_Xy(X, y,frame_size, n_validation_frames,
                                                     n_test_frames,train_test_only=train_test_only)
        A_splits = concatenate_4d_into_3d(splits, train_test_only=train_test_only) 
        
        if model_generator == SeparableConv2D_model:
            B_splits = splits
            if train_test_only:
                X_train_A, y_train_A, X_test_A, y_test_A = A_splits
                X_train_B, y_train_B, X_test_B, y_test_B = B_splits
                
                # model building
                X_train = [X_train_A, np.tile(X_train_B, (n_countries, 1,1,1))]
                y_train = y_train_A
                X_validate = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_validate = y_test_A
                X_test = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_test = y_test_A
            else:
                X_train_A, y_train_A, X_validate_A, y_validate, X_test_A, y_test_A = A_splits
                X_train_B, y_train_B, X_validate_B, y_validate, X_test_B, y_test_B = B_splits  
                X_train = [X_train_A, np.tile(X_train_B, (n_countries, 1,1,1))]
                y_train = y_train_A.ravel()
                X_validate = [X_validate_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_validate = y_test_A.ravel()
                X_test = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_test = y_test_A.ravel()
        else: 
            if train_test_only:
                X_train, y_train, X_test, y_test = A_splits
                X_validate, y_validate = X_test, y_test
            else:
                X_train, y_train, X_validate, y_validate, X_test, y_test = A_splits

                

        model = model_generator(X_train, f, k)
        model.compile(loss='mae', optimizer='adam')
        # fit network
        history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_validate, y_validate), 
                  batch_size=batch_size)
        
        ### analysis
        y_test = y_test.ravel()
        y_naive = X[-n_test_frames:, :, -1, new_cases_weighted_index].ravel()
        y_predict = model.predict(X_test).ravel()
        # evaluate model

        mae_naive = mean_absolute_error(y_test, y_naive)
        mae_predict = mean_absolute_error(y_test, y_predict)
        r2_naive = explained_variance_score(y_test, y_naive)
        r2_predict = explained_variance_score(y_test, y_predict)
        mae_naive_list.append(mae_naive)
        mae_predict_list.append(mae_predict)
        model_list.append(model)
        print('{}-step MAE [Naive, CNN] = [{},{}]'.format(
        n_days_into_future, mae_naive, mae_predict))
        print('{}-step R^2 [Naive, CNN] = [{},{}]'.format(
        n_days_into_future, r2_naive, r2_predict))
        
        true_predict_plot(y_test, y_naive, y_predict, title='')
        residual_diff_plots(y_naive, y_predict, y_test, n_test_frames, n_days_into_future, n_countries)
        
    return test, naive, prediction, mae_naive_list, mae_predict_list, model_list

def parallel_Conv1D_model(X_train, f, k):
    (f11,f21,f31,f41,f51,f61,f12,f22,f32,f42,f52,f62) = f
    k1,k2,k3,k4,k5,k6 = k
    model_input = Input(shape=X_train.shape[1:])

    # the first branch operates on the first input
    A1 = Conv1D(filters=int(f11),
               kernel_size=int(k1),        
               padding='valid',
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A1 = MaxPooling1D(pool_size=2)(A1)
    A1 = Activation('relu')(A1)
    A1 = Conv1D(filters=int(f12),
               kernel_size=int(k1),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A1)
    # A1 = MaxPooling1D(pool_size=2)(A1)
    A1 = Activation('relu')(A1)
    A1 = Flatten()(A1)
    # A1 = Dense(A1.shape[-1], activation='relu')(A1)
    A1 = Model(inputs=model_input, outputs=A1)



    A2 = Conv1D(filters=int(f21),
               kernel_size=int(k2),        
               padding='valid',
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A2 = MaxPooling1D(pool_size=2)(A2)
    A2 = Activation('relu')(A2)
    A2 = Conv1D(filters=int(f22),
               kernel_size=int(k2),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A2)
    # A2 = MaxPooling1D(pool_size=2)(A2)
    A2 = Activation('relu')(A2)
    A2 = Flatten()(A2)
    # A2 = Dense(A2.shape[-1], activation='relu')(A2)
    A2 = Model(inputs=model_input, outputs=A2)



    A3 = Conv1D(filters=int(f31),
               kernel_size=int(k3),        
               padding='valid',
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A3 = MaxPooling1D(pool_size=2)(A3)
    A3 = Activation('relu')(A3)
    A3 = Conv1D(filters=int(f32),
               kernel_size=int(k3),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A3)
    # A3 = MaxPooling1D(pool_size=2)(A3)
    A3 = Activation('relu')(A3)
    A3 = Flatten()(A3)
    # A3 = Dense(A3.shape[-1], activation='relu')(A3)
    A3 = Model(inputs=model_input, outputs=A3)



    A4 = Conv1D(filters=int(f41),
               kernel_size=int(k4),        
               padding='valid',
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A4 = MaxPooling1D(pool_size=2)(A4)
    A4 = Activation('relu')(A4)
    A4 = Conv1D(filters=int(f42),
               kernel_size=int(k4),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A4)
    # A4 = MaxPooling1D(pool_size=2)(A4)
    A4 = Activation('relu')(A4)
    A4 = Flatten()(A4)
    # A4 = Dense(A4.shape[-1], activation='relu')(A4)
    A4 = Model(inputs=model_input, outputs=A4)



    A5 = Conv1D(filters=int(f51),
               kernel_size=int(k5),        
               padding='valid',
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A5 = MaxPooling1D(pool_size=2)(A5)
    A5 = Activation('relu')(A5)
    A5 = Conv1D(filters=int(f52),
               kernel_size=int(k5),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A5)
    # A5 = MaxPooling1D(pool_size=2)(A5)
    A5 = Activation('relu')(A5)
    A5 = Flatten()(A5)
    # A5 = Dense(A5.shape[-1], activation='relu')(A5)
    A5 = Model(inputs=model_input, outputs=A5)


    A6 = Conv1D(filters=int(f61),
               kernel_size=int(k6),        
               padding='valid',
               #kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(model_input)
    # A6 = MaxPooling1D(pool_size=2)(A6)
    A6 = Activation('relu')(A6)
    A6 = Conv1D(filters=int(f62),
               kernel_size=int(k6),        
               padding='valid',
    #            dilation_rate=2,
    #            kernel_regularizer=l2(0.001),
    #            activation='relu'
                )(A6)
    # A6 = MaxPooling1D(pool_size=2)(A6)
    A6 = Activation('relu')(A6)
    A6 = Flatten()(A6)
    # A6 = Dense(A6.shape[-1], activation='relu')(A6)
    A6 = Model(inputs=model_input, outputs=A6)

    # combine the output of the parallel branches
    # combine the output of the parallel branches

    combined = concatenate([A1.output, A2.output, A3.output, A4.output, A5.output, A6.output], axis=1)

    # apply a FC layer and then a regression prediction on the
    # combined outputs


    # FC1 = SeparableConv2D(filters=int(f2),
    # #                     kernel_size=[int(k2),2],
    #                     kernel_size=[2, int(k2)],
    #                     activation='relu',
    #                     padding='valid'
    #                    )(combined)

    FC = Flatten()(combined)
    FC = Dense(FC.shape[-1]//2, activation='relu')(FC)
    FC = Dense(1, activation='relu')(FC)

    # our model will accept the inputs of the two branches and
    # then output a single value
    model = Model(inputs=model_input, outputs=FC)
    return model

def n_step_model_predictions(model_data, model_generator, frame_size, start_date, n_countries,
                             n_validation_frames, n_test_frames, predict_steps, f, k, epochs, batch_size,
                             train_test_only=False, Xy_truncation=None):
    
    """ wrapper for iteration loop 
    
    data : DataFrame of very specific make
    
    model : one of my custom models, sequential_Conv1D_model, SeparableConv2D_model, parallel_Conv1D_model
    
    
    
    """
    new_cases_weighted_index = column_search(model_data,'new_cases_weighted', return_style='iloc')[0]#-n_pruned
    prediction = []
    naive = []
    test = []
    mae_naive_list = []
    mae_predict_list = []
    model_list = []

    for n_days_into_future in predict_steps:
        X, y = create_Xy(model_data, start_date, frame_size, n_days_into_future, n_countries)
        if Xy_truncation is not None:
            X = X[:Xy_truncation,:,:,:]
            y = y[:Xy_truncation,:]
        splits, normalizing = split_and_normalize_Xy(X, y,frame_size, n_validation_frames,
                                                     n_test_frames,train_test_only=train_test_only)
        A_splits = concatenate_4d_into_3d(splits, train_test_only=train_test_only) 
        
        if model_generator == SeparableConv2D_model:
            B_splits = splits
            if train_test_only:
                X_train_A, y_train_A, X_test_A, y_test_A = A_splits
                X_train_B, y_train_B, X_test_B, y_test_B = B_splits
                
                # model building
                X_train = [X_train_A, np.tile(X_train_B, (n_countries, 1,1,1))]
                y_train = y_train_A
                X_validate = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_validate = y_test_A
                X_test = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_test = y_test_A
            else:
                X_train_A, y_train_A, X_validate_A, y_validate, X_test_A, y_test_A = A_splits
                X_train_B, y_train_B, X_validate_B, y_validate, X_test_B, y_test_B = B_splits  
                X_train = [X_train_A, np.tile(X_train_B, (n_countries, 1,1,1))]
                y_train = y_train_A.ravel()
                X_validate = [X_validate_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_validate = y_test_A.ravel()
                X_test = [X_test_A, np.tile(X_test_B, (n_countries, 1,1,1))]
                y_test = y_test_A.ravel()
        else: 
            if train_test_only:
                X_train, y_train, X_test, y_test = A_splits
                X_validate, y_validate = X_test, y_test
            else:
                X_train, y_train, X_validate, y_validate, X_test, y_test = A_splits

                

        model = model_generator(X_train, f, k)
        model.compile(loss='mae', optimizer='adam')
        # fit network
        history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_validate, y_validate), 
                  batch_size=batch_size)
        
        ### analysis
        y_test = y_test.ravel()
        y_naive = X[-n_test_frames:, :, -1, new_cases_weighted_index].ravel()
        y_predict = model.predict(X_test).ravel()
        # evaluate model

        mae_naive = mean_absolute_error(y_test, y_naive)
        mae_predict = mean_absolute_error(y_test, y_predict)
        r2_naive = explained_variance_score(y_test, y_naive)
        r2_predict = explained_variance_score(y_test, y_predict)
        mae_naive_list.append(mae_naive)
        mae_predict_list.append(mae_predict)
        model_list.append(model)
        print('{}-step MAE [Naive, CNN] = [{},{}]'.format(
        n_days_into_future, mae_naive, mae_predict))
        print('{}-step R^2 [Naive, CNN] = [{},{}]'.format(
        n_days_into_future, r2_naive, r2_predict))
        
        true_predict_plot(y_test, y_naive, y_predict, title='')
        residual_diff_plots(y_naive, y_predict, y_test, n_test_frames, n_days_into_future, n_countries)
        
    return test, naive, prediction, mae_naive_list, mae_predict_list, model_list

In [None]:
# The more complex 1-D model

Convolve a single input in parallel, using different kernel sizes. 
Send the same input to different kernel-size convolutional layers before aggregating. 
architecture from https://www.youtube.com/watch?v=nMkqWxMjWzg

    different kernel sizes the entire point
    combinations tested: 
    
    1. equal filters
    2. weighted n_filters
    3. pooling (not good)
    4. dilation (not good)
    5. invidual dense layers on top of last 2 FC layers                 

In [None]:
model_data = data.iloc[:,1:]

frame_size = 32
start_date = n_dates - 2 * frame_size

k = 1,2,3,4,5,6
# first 6 values are first each of the first parallel layer, last 6 are second parallel layer.
f = (8,8,8,8,8,8,16,16,16,16,16,16)

n_validation_frames = 1
n_test_frames = 3
epochs = 3
batch_size = 32
predict_steps = [1, 7, 14]
naive_scores = []
predict_scores = []
train_test_only=True
model_generator = parallel_Conv1D_model
results = n_step_model_predictions(model_data, model_generator, frame_size, start_date, n_countries,
                             n_validation_frames, n_test_frames, predict_steps, f, k, epochs, batch_size,
                             train_test_only=train_test_only)
test, naive, prediction, mae_naive_list, mae_predict_list,model = results