In [1]:
from models import RNNModel
import numpy as np
import pandas as pd
import joblib
from data import readLocally
from sklearn.metrics import confusion_matrix
from visualization import plot_roc_auc, pr_curve, format_vertical_headers, print_confusion_matrix, printModelData

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
X_train, y_train, X_test, y_test, X_val, y_val, labels_hash, scaler = readLocally()




_ _ _ _ _ _ _ _ _ _   READ DATA LOCALLY  _ _ _ _ _ _ _ _ _ _ 




SHAPES & KEYS:
    X_train          : (285428, 25, 12)   -> 48%
    y_train          : (285428,)
    X_test           : (237858, 25, 12)   -> 40%
    y_test           : (237858,)
    X_val            : (71357, 25, 12)   -> 12%
    y_val            : (71357,)
    ______________________
    Total Data Size  : 594643
    labels_hash Keys : dict_keys(['customer', 'age', 'gender', 'merchant', 'category'])
    
    TRAIN DATA
    ______________________
    Positives        : 3461   -> 1.21%
    Negatives        : 281967   -> 98.79%
    
    TEST DATA
    ______________________
    Positives        : 2839   -> 1.19%
    Negatives        : 235019   -> 98.81%   
    
    VAL DATA
    ______________________
    Positives        : 900   -> 1.26%
    Negatives        : 70457   -> 98.74%
    


In [4]:
#DIVIDE X_TEST IN HALF AND ADD IT TO X_TRAIN
test_half_pos   = int(len(X_test)/2)
total_data_size = len(X_train) + len(X_test) + len(X_val)

X_train = np.vstack( ( X_train ,  X_test[0: test_half_pos ] ))
y_train = np.append( y_train ,  y_test[0: test_half_pos ]  )

X_test  = X_test[ test_half_pos : ]
y_test  = y_test[ test_half_pos : ]

print("""INCREASED TRAIN SET -> NEW SIZES
X_train: {}
y_train: {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
X_test : {}
y_test : {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
X_val  : {}
y_val  : {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
""".format(
    X_train.shape, y_train.shape, len(X_train) * 100 / total_data_size,  len(y_train[y_train==1]) * 100 / len(y_train) , len(y_train[y_train==0]) * 100 / len(y_train) ,
    X_test.shape, y_test.shape,   len(X_test)  * 100  / total_data_size, len(y_test[y_test==1])   * 100 / len(y_test)  , len(y_test[y_test==0])   * 100 / len(y_test) ,
    X_val.shape,  y_val.shape,    len(X_val)   * 100  / total_data_size, len(y_val[y_val==1])     * 100 / len(y_val)   , len(y_val[y_val==0])     * 100 / len(y_val) ,
))

#Reduce Data to half the size per batch

X_train1 = X_train[:,12:]
X_test1 = X_test[:,  12:]
X_val1 = X_val[:,  12: ]

X_train2 = X_train[:,-1].reshape( len(X_train), 1, 12 )
X_test2  = X_test[:,  -1].reshape( len(X_test), 1, 12 )
X_val2   = X_val[:,  -1].reshape( len(X_val), 1, 12 )

print("X_train 1: {}->{}".format(X_train.shape, X_train1.shape))
print("X_test  1: {}->{}".format(X_test.shape,  X_test1.shape))
print("X_val   1: {}->{}\n\n".format(X_val.shape,   X_val1.shape))

print("X_train 2: {}->{}".format(X_train.shape, X_train2.shape))
print("X_test  2: {}->{}".format(X_test.shape,  X_test2.shape))
print("X_val   2: {}->{}\n\n".format(X_val.shape,   X_val2.shape))

print("X_train 1 Sample: \n\n")
print(pd.DataFrame(X_train1[-1]))

print("X_train 2 Sample: \n\n")
print(pd.DataFrame(X_train2[-1]))

INCREASED TRAIN SET -> NEW SIZES
X_train: (404357, 25, 12)
y_train: (404357,)         68% 1.20P% 98.80N%
X_test : (118929, 25, 12)
y_test : (118929,)         20% 1.23P% 98.77N%
X_val  : (71357, 25, 12)
y_val  : (71357,)         12% 1.26P% 98.74N%

X_train 1: (404357, 25, 12)->(404357, 13, 12)
X_test  1: (118929, 25, 12)->(118929, 13, 12)
X_val   1: (71357, 25, 12)->(71357, 13, 12)


X_train 2: (404357, 25, 12)->(404357, 1, 12)
X_test  2: (118929, 25, 12)->(118929, 1, 12)
X_val   2: (71357, 25, 12)->(71357, 1, 12)


X_train 1 Sample: 


          0      1    2     3         4         5         6         7   \
0   0.633333  0.875  0.5  0.38  0.866667  0.005721  0.333333  0.005527   
1   0.638889  0.875  0.5  0.38  0.866667  0.002008  0.333333  0.005551   
2   0.644444  0.875  0.5  0.38  0.866667  0.001388  0.333333  0.005500   
3   0.650000  0.875  0.5  0.38  0.866667  0.000975  0.333333  0.005439   
4   0.655556  0.875  0.5  0.38  0.866667  0.006718  0.333333  0.005371   
5   0.661111  

In [5]:
n_batches        = X_train1.shape[0]
batch_size       = X_train1.shape[1]
n_features       = X_train1.shape[2]

print(n_batches, batch_size, n_features)

404357 13 12


In [6]:
gru_param_grid = {
    'modelType': ['GRU'], 
    'dropout': [True],
    'dropout_rate': [0.2], 
    'epochs': [50], 
    'hidden_layer_activation': ['sigmoid'], 
    'hidden_layers': [2], 
    'hidden_layers_neurons': [300], 
    'loss': ['binary_crossentropy'], 
    'optimizer': ['adam'], 
    'output_layer_activation': ['sigmoid'], 
    'rnn_hidden_layers': [0], 
    'rnn_hidden_layers_neurons': [50], 
    'rnn_layer_activation': ['sigmoid']
}

In [None]:
gru_model_1 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_half_batch_checkpoint_no_l1.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_history = gru_model_1.train( X_train1, y_train, X_test1, y_test )
gru_model_1.model.best_estimator_.model.save( "gru_double_data_half_batch_no_l1.h5" )

In [None]:
y_pred_val = gru_model_1.model.predict_proba(X_val1)
print(y_pred_val.shape, y_pred_val)
print("\n\nCONFUSION MATRIX OVER TEST DATA\n\n")
tn, fp, fn, tp = confusion_matrix(
    y_val, y_pred_val[:, 1].round()).ravel()
print_confusion_matrix( tn, fp, fn, tp  )

print("\n\nPLOTS OVER TEST DATA\n\n")
plot_roc_auc(y_val, y_pred_val[:, 1])
pr_curve(y_val, y_pred_val[:, 1])

In [None]:
n_batches        = X_train2.shape[0]
batch_size       = X_train2.shape[1]
n_features       = X_train2.shape[2]

In [None]:
gru_model_2 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_one_val_batch_checkpoint_no_l1.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_history_2 = gru_model_2.train( X_train2, y_train, X_test2, y_test )
gru_model_2.model.best_estimator_.model.save( "gru_double_data_one_val_batch_no_l1.h5" )

In [None]:
y_pred_val = gru_model_2.model.predict_proba(X_val2)
print(y_pred_val.shape, y_pred_val)
print("\n\nCONFUSION MATRIX OVER TEST DATA\n\n")
tn, fp, fn, tp = confusion_matrix(
    y_val, y_pred_val[:, 1].round()).ravel()
print_confusion_matrix( tn, fp, fn, tp  )

print("\n\nPLOTS OVER TEST DATA\n\n")
plot_roc_auc(y_val, y_pred_val[:, 1])
pr_curve(y_val, y_pred_val[:, 1])

In [None]:
n_batches        = X_train3.shape[0]
batch_size       = X_train3.shape[1]
n_features       = X_train3.shape[2]

In [None]:
gru_model_3 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_double_batch_checkpoint_no_l1.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_history_3 = gru_model_3.train( X_train3, y_train3, X_test3, y_test3 )
gru_model_3.model.best_estimator_.model.save( "gru_double_data_double_batch_no_l1.h5" )

In [None]:
n_batches        = X_train2.shape[0]
batch_size       = X_train2.shape[1]
n_features       = X_train2.shape[2]
n_batches,batch_size,n_features

In [None]:
gru_model_4 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_one_batch_double_weights_no_l1_checkpoint.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)


from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train.flatten()), y_train.flatten())
modified_weights = np.array(class_weights) 
modified_weights[1] = class_weights[1] * 2
print("Original Wieghts {}\nModified Weights: {}".format( dict(enumerate(class_weights)), dict(enumerate(modified_weights)) ))

In [None]:
gru_history_4 = gru_model_4.train( X_train2, y_train, X_test2, y_test, class_weights=modified_weights )
gru_model_4.model.best_estimator_.model.save( "gru_double_data_one_batch_double_weights_no_l1.h5" )

In [None]:
gru_param_grid_2 = gru_param_grid.copy()
gru_param_grid_2['hidden_layers'] = [1]
gru_param_grid_2

In [None]:
gru_model_5 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid_2,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_one_batch_1_layer_no_l1_checkpoint.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_history_5 = gru_model_5.train( X_train2, y_train, X_test2, y_test, class_weights=None )
gru_model_5.model.best_estimator_.model.save( "gru_double_data_one_batch_1_layer_no_l1.h5" )

In [None]:
gru_param_grid_3 = gru_param_grid.copy()
gru_param_grid_3['modelType']= ['SimpleRNN']

gru_model_6 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid_3,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_double_data_one_batch_SimpleRNN_no_l1_checkpoint.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)

In [None]:
gru_history_6 = gru_model_6.train( X_train2, y_train, X_test2, y_test, class_weights=None )
gru_model_6.model.best_estimator_.model.save( "gru_double_data_one_batch_SimpleRNN_no_l1.h5" )

In [17]:
n_batches        = X_train.shape[0]
batch_size       = X_train.shape[1]
n_features       = X_train.shape[2]
n_batches, batch_size, n_features

(404357, 25, 12)

In [7]:
n_batches        = X_train2.shape[0]
batch_size       = X_train2.shape[1]
n_features       = X_train2.shape[2]

print(n_batches, batch_size, n_features)

404357 1 12


In [8]:
from imblearn.over_sampling import SMOTE 

X_train_2D = X_train2.reshape(X_train2.shape[0], X_train2.shape[2])
print('Original dataset shape X {} => {}, y {}'.format( X_train2.shape, X_train_2D.shape, y_train.shape ))
print('P {} N {}'.format( len(y_train[y_train == 1]), len(y_train[y_train == 0]) ))
sm = SMOTE(random_state=42)
X_train_ov_2D, y_train_ov = sm.fit_resample(X_train_2D, y_train)
X_train_ov = X_train_ov_2D.reshape(X_train_ov_2D.shape[0], 1, X_train_ov_2D.shape[1])

print('Resampled dataset shape ',X_train_ov.shape,  X_train_ov_2D.shape, y_train_ov.shape, )
print('P {} N {}'.format( len(y_train_ov[y_train_ov == 1]), len(y_train_ov[y_train_ov == 0]) ))


Using TensorFlow backend.


Original dataset shape X (404357, 1, 12) => (404357, 12), y (404357,)
P 4838 N 399519
Resampled dataset shape  (799038, 1, 12) (799038, 12) (799038,)
P 399519 N 399519


In [11]:

%load_ext autoreload
%autoreload 2

gru_model_8 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=1,
  output_file= "gru_oversampled_data_one_batch_no_l1_checkpoints.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


_ _ _ _ _ _ _ _ _ _  CREATING RNN MODEL WITHOUT L1 REGULARIZATION _ _ _ _ _ _ _ _ _ _ 




_ _ _ _ _ _ _ _ _ _  INITIALIZING GRID SEARCH RNN MODEL _ _ _ _ _ _ _ _ _ _ 



        PARAMETERS:
        ________________________________
        input_shape :  (1, 12)
        output_dim  :  1
        main scoring:  recall
        all scoring :  ['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision']
        early_stopping_monitor   : val_recall
        model_checkpoint_monitor : val_recall
        verbose: 1
        callbacks: 

[<tensorflow.python.keras.callbacks.EarlyStopping object at 0x7fee2c7d6390>, <tensorflow.python.keras.callbacks.ModelCheckpoint object at 0x7fee2c7d64e0>, <tensorflow.python.keras.callbacks.CSVLogger object at 0x7fee2c7d6518>, <tensorflow.python.keras.callbacks.TensorBoard object at 0x7fee2c7d6550>, <tensorflow.python.keras.callbacks.ProgbarLogger object at 0x7

In [12]:
gru_history_8 = gru_model_8.train( X_train_ov, y_train_ov, X_test2, y_test, class_weights=None )
print("SAVING..")
gru_model_8.model.best_estimator_.model.save( "gru_oversampled_data_one_batch_no_l1.h5" )



_ _ _ _ _ _ _ _ _ _  TRAINING RNN _ _ _ _ _ _ _ _ _ _ 


Generating Class Weights.

        Class weights: 
[1. 1.]
{0: 1.0, 1: 1.0}

        for classes: 
[0. 1.]

        # Frauds: 399519
        # of Non-Frauds: 399519
        
INPUTS
        X:      (799038, 1, 12)
        y:      (799038,)
        X_test: (118929, 1, 12)
        y_test: (118929,)
        
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.




_ _ _ _ _ _ _ _ _ _  CREATING ML MODEL _ _ _ _ _ _ _ _ _ _ 



        PARAMETERS:
        ________________________________ 
          rnn_hidden_layers:         0 
          rnn_hidden_layers_neurons: 50 
          hidden_layers:             2 
          hidden_layers_neurons:     300
          loss:                      binary_crossentropy
          optimizer:                 adam
          modelType:                 GRU
          dropout:                   True
          dropout_rate:              0.2
          input_shape:               (1, 12)
          output_dim:                1
          output_layer_activation:   sigmoid
          rnn_layer_activation:      sigmoid
          hidden_layer_activation:   sigmoid
          keras_eval_metric:         [[<tensorflow.python.keras.metrics.TruePositives object at 0x7fee2c7d60b8>, <tensorflow.python.keras.metrics.FalsePositives object at 0x7fee2c7d6c50>, <tensorflow.python.keras.metrics.TrueNegatives object at 0x7fee2c7d6ef0>, <tensor

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [None]:
%load_ext autoreload
%autoreload 2

gru_param_grid_4 = gru_param_grid.copy()

gru_model_7 = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_param_grid_4,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=1,
  output_file= "gru_double_data_2_rnn_layers_one_batch_1_layer_no_l1_checkpoints.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)

gru_history_7 = gru_model_7.train( X_train, y_train, X_test, y_test, class_weights=None )
print("SAVING..")
gru_model_7.model.best_estimator_.model.save( "gru_double_data_2_rnn_layers_one_batch_1_layer_no_l1.h5" )

# TRAINING WITH DOUBLE DATA

In [None]:
#MODIFIED DATASET
from models import RNNModel
import numpy as np
import pandas as pd
import joblib
from data import readLocally
from sklearn.metrics import confusion_matrix
from visualization import plot_roc_auc, pr_curve, format_vertical_headers, print_confusion_matrix, printModelData

In [None]:
X_train, y_train, X_test, y_test, X_val, y_val, labels_hash, scaler = readLocally()

#DIVIDE X_TEST IN HALF AND ADD IT TO X_TRAIN
test_half_pos   = int(len(X_test)/2)
total_data_size = len(X_train) + len(X_test) + len(X_val)

X_train = np.vstack( ( X_train ,  X_test[0: test_half_pos ] ))
y_train = np.append( y_train ,  y_test[0: test_half_pos ]  )

X_test  = X_test[ test_half_pos : ]
y_test  = y_test[ test_half_pos : ]

print("""INCREASED TRAIN SET -> NEW SIZES
X_train: {}
y_train: {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
X_test : {}
y_test : {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
X_val  : {}
y_val  : {}         {:0.0f}% {:0.2f}P% {:0.2f}N%
""".format(
    X_train.shape, y_train.shape, len(X_train) * 100 / total_data_size,  len(y_train[y_train==1]) * 100 / len(y_train) , len(y_train[y_train==0]) * 100 / len(y_train) ,
    X_test.shape, y_test.shape,   len(X_test)  * 100  / total_data_size, len(y_test[y_test==1])   * 100 / len(y_test)  , len(y_test[y_test==0])   * 100 / len(y_test) ,
    X_val.shape,  y_val.shape,    len(X_val)   * 100  / total_data_size, len(y_val[y_val==1])     * 100 / len(y_val)   , len(y_val[y_val==0])     * 100 / len(y_val) ,
))

In [None]:
gru_3_param_grid = {
    'dropout': [True], 
    'dropout_rate': [0.2], 
    'epochs': [50], 
    'hidden_layer_activation': ['sigmoid'], 
    'hidden_layers': [0],  #[2], 
    'hidden_layers_neurons': [300], #[300], 
    'loss': ['binary_crossentropy'], 
    'modelType': ['GRU'], 
    'optimizer': ['adam'], 
    'output_layer_activation': ['sigmoid'], 
    'rnn_hidden_layers': [0], 
    'rnn_hidden_layers_neurons': [50], 
    'rnn_layer_activation': ['sigmoid']
}


n_batches        = X_train.shape[0]
batch_size       = X_train.shape[1]
n_features       = X_train.shape[2]

gru_3_model = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_3_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_3_double_data_checkpoint.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_3_history = gru_3_model.train( X_train, y_train, X_test, y_test )

In [None]:
gru_3_model.model.best_estimator_.model.save( "gru3_0_200_double_data.h5" )

In [None]:
gru_4_param_grid = {
    'dropout': [True], 
    'dropout_rate': [0.2], 
    'epochs': [25], 
    'hidden_layer_activation': ['sigmoid'], 
    'hidden_layers': [2],  #[2], 
    'hidden_layers_neurons': [300], #[300], 
    'loss': ['binary_crossentropy'], 
    'modelType': ['GRU'], 
    'optimizer': ['adam'], 
    'output_layer_activation': ['sigmoid'], 
    'rnn_hidden_layers': [0], 
    'rnn_hidden_layers_neurons': [50], 
    'rnn_layer_activation': ['sigmoid']
}


n_batches        = X_train.shape[0]
batch_size       = X_train.shape[1]
n_features       = X_train.shape[2]

gru_4_model = RNNModel(
  input_shape=( batch_size , n_features  ),
  output_dim = 1,
  param_grid=gru_4_param_grid,
  scoring=['accuracy', 'precision', 'recall', 'roc_auc', 'f1', 'average_precision' ],  
  refit= "recall",   
  verbose=2,
  output_file= "gru_4_double_data_checkpoint.h5",
  early_stopping_monitor="val_recall",
  model_checkpoint_monitor="val_recall"
)
gru_4_history = gru_4_model.train( X_train, y_train, X_test, y_test )

In [None]:
gru_4_model.model.best_estimator_.model.save( "gru_4_2_300_nol1_double_data.h5" )

In [None]:
import tensorflow
tensorflow.keras.models.load_model("gru_4_2_300_nol1_double_data.h5").summary()

In [None]:
tensorflow.keras.models.load_model("gru_3_0_200_lr_double_data.h5").summary()

# DOUBLE SIZE BATCHES

In [None]:
from data import downloadFromKaggle, normalizing_data, generating3DRNNInput
from data import generateNewFeatures, separateInBatches, separateLabel, separatingTrainTest, normalize3DInput 
from data import read_data, readLocally, saveLocally, readDataFromCloud, saveToCloud 
from visualization import plot_roc_auc, pr_curve, print_confusion_matrix, visualize_data, printModelData, acc_plot, loss_plot, format_vertical_headers

import joblib
import pickle

In [None]:
empty_padding_value                = -1

data                               = read_data(input_file_path="bs140513_032310.csv")
visualize_data(data)
rnn_data, smaller_batches_rnn_data = generating3DRNNInput(data) 
rnn_mod_data                       = generateNewFeatures(rnn_data)
X, grouped_X, y, grouped_y         = separateInBatches(rnn_mod_data, min_batch_size=50)

# print("CT", data['customer'].value_counts() > 200 )
# print("CT", data['merchant'].value_counts())
# data.head()

In [None]:
X_norm                             = normalize3DInput(X)
y[y==empty_padding_value] = 0
X_train3, X_test3, y_train3, y_test3, X_val3, y_val3 = separatingTrainTest(X_norm, y, test_size=0.1, val_size=0.1)

In [None]:
print("""SHAPES & KEYS:
X_train          : {}
y_train          : {}
________________________
X_test           : {}
y_test           : {}
________________________
X_val            : {}
y_val            : {}
________________________
labels_hash Keys : 
""".format(
    X_train3.shape, y_train3.shape,
    X_test3.shape,  y_test3.shape,
    X_val3.shape, y_val3.shape, 
#     labels_hash.keys() 
))

In [None]:
pickle.dump( rnn_data      , open( "rnn_data_db.data"      , "wb" ) ) 
pickle.dump( rnn_mod_data  , open( "rnn_mod_data_db.data"  , "wb" ) ) 
pickle.dump( X_train3      , open( "X_train_db.data"       , "wb" ) ) 
pickle.dump( X_test3       , open( "X_test_db.data"        , "wb" ) )
pickle.dump( X_val3        , open( "X_val_db.data"         , "wb" ) )
pickle.dump( y_train3      , open( "y_train_db.data"       , "wb" ) )
pickle.dump( y_test3       , open( "y_test_db.data"        , "wb" ) )
pickle.dump( y_val3        , open( "y_val_db.data"         , "wb" ) )