In [None]:
# Load packages
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import numpy as np
import pandas as pd
import os
import scipy as scp
import scipy.stats as scps
import time
from datetime import datetime


# Load my own functions
import dnnregressor_train_eval_keras as dnnk
from kde_training_utilities import kde_load_data
from kde_training_utilities import kde_make_train_test_split
import make_data_wfpt as mdw

In [25]:
# Handle some cuda business

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 517495730777844184
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 2763179331810708776
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 12048773940
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11339983372263015286
physical_device_desc: "device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:03:00.0, compute capability: 5.2"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 384169191045836116
physical_device_desc: "device: XLA_GPU device"
]


In [3]:
# Make dnnk class (cpm for choice probability model)
cpm = dnnk.dnn_trainer()

# Define folder in which dataset lies
data_folder = '/media/data_cifs/afengler/data/kde/linear_collapse/train_test_data/'

In [None]:
# Make train test split
kde_make_train_test_split(folder = data_folder,
                          p_train = 0.8)

In [4]:
# Load train test split
cpm.data['train_features'], cpm.data['train_labels'], cpm.data['test_features'], cpm.data['test_labels'] = kde_load_data(folder = data_folder)

In [None]:
cpm.data['test_features'].shape

In [None]:
cpm.data['train_features']

In [None]:
cpm.data['train_features'].iloc[171247010, :]

In [None]:
cpm.data['train_features']['log_l'] = cpm.data['train_labels']

In [None]:
cpm.data['train_features'].sort_values(by = 'log_l')

In [None]:
cpm.data['train_features']

In [None]:
cpm.data['train_features'].iloc[22428, :]

In [None]:
cpm.data['train_labels'][22428, ]

In [17]:
# Make all parameters we can specify explicit
# Model parameters
cpm.model_params

{'input_shape': 7,
 'output_shape': 1,
 'output_activation': 'linear',
 'hidden_layers': [20, 40, 60, 80, 100, 120],
 'hidden_activations': ['relu', 'relu', 'relu', 'relu', 'relu', 'relu'],
 'l1_activation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l2_activation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l1_kernel': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l2_kernel': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'optimizer': 'Nadam',
 'loss': 'mse',
 'metrics': ['mse']}

In [18]:
# Parameters governing training
cpm.train_params

{'callback_funs': ['ReduceLROnPlateau', 'EarlyStopping', 'ModelCheckpoint'],
 'plateau_patience': 10,
 'min_delta': 0.0001,
 'early_stopping_patience': 15,
 'callback_monitor': 'loss',
 'min_learning_rate': 1e-07,
 'red_coef_learning_rate': 0.1,
 'ckpt_period': 10,
 'ckpt_save_best_only': True,
 'ckpt_save_weights_only': True,
 'max_train_epochs': 200,
 'batch_size': 200000,
 'warm_start': False,
 'checkpoint': 'ckpt',
 'model_cnt': 0}

In [19]:
# Parameters concerning data storage
cpm.data_params

{'data_type': 'kde',
 'model_directory': '/media/data_cifs/afengler/data/kde/linear_collapse/keras_models',
 'checkpoint': 'ckpt',
 'model_name': 'dnnregressor',
 'data_type_signature': '_ddm_linear_collapse_',
 'timestamp': '06_22_19_23_20_30',
 'training_data_size': 143268157,
 'timestep': '06_22_19_23_16_39'}

In [22]:
# If necessary, specify new set of parameters here:
# Model params
cpm.model_params['output_activation'] = 'linear'
cpm.model_params['hidden_layers'] = [20, 40, 60, 80, 100, 120]
cpm.model_params['hidden_activations'] = ['relu', 'relu', 'relu', 'relu', 'relu', 'relu']
cpm.model_params['input_shape'] = cpm.data['train_features'].shape[1]
# cpm.model_params['l1_activation'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# cpm.model_params['l2_activation'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
cpm.model_params['l1_kernel'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
cpm.model_params['l2_kernel'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# Train params
cpm.train_params['batch_size'] = 1000000
cpm.train_params['max_train_epochs'] = 250
cpm.train_params['min_delta'] = 0.00001


# Data params
cpm.data_params['data_type'] = 'kde'
cpm.data_params['data_type_signature'] = '_ddm_linear_collapse_'
cpm.data_params['training_data_size'] = cpm.data['train_features'].shape[0]
cpm.data_params['timestamp'] = datetime.now().strftime('%m_%d_%y_%H_%M_%S')
cpm.data_params['model_directory'] = '/media/data_cifs/afengler/data/kde/linear_collapse/keras_models'

In [27]:
# Make model
cpm.keras_model_generate(save_model = True)

In [None]:
# Train model
cpm.run_training(save_history = True, 
                 warm_start = False)

Train on 143268157 samples, validate on 35823203 samples
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: val_loss improved from inf to 0.00936, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_10

Consider using a TensorFlow optimizer from `tf.train`.
Instructions for updating:
Use tf.train.CheckpointManager to manage checkpoints rather than manually editing the Checkpoint proto.
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 00020: val_loss improved from 0.00936 to 0.00392, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_20

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 2

Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 00040: val_loss did not improve from 0.00318
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 00048: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 49/250
Epoch 50/250
Epoch 00050: val_loss improved from 0.00318 to 0.00216, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_50

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 00060: val_loss improved from 0.00216 to 0.00188, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_60

Consider using a TensorFlow optimizer from `tf.train`

Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 00070: val_loss improved from 0.00188 to 0.00157, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_70

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78/250
Epoch 79/250
Epoch 80/250
Epoch 00080: val_loss improved from 0.00157 to 0.00126, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_80

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250
Epoch 85/250
Epoch 86/250
Epoch 87/250
Epoch 88/250
Epoch 89/250
Epoch 90/250
Epoch 00090: val_loss improved from 0.00126 to 0.00104, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_9

Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 00110: val_loss improved from 0.00089 to 0.00081, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_110

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 00118: ReduceLROnPlateau reducing learning rate to 2.0000000949949027e-05.
Epoch 119/250
Epoch 120/250
Epoch 00120: val_loss improved from 0.00081 to 0.00074, saving model to /media/data_cifs/afengler/data/kde/linear_collapse/keras_models/dnnregressor_ddm_linear_collapse_06_22_19_23_27_28/ckpt_0_120

Consider using a TensorFlow optimizer from `tf.train`.
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 00128: Reduce

Epoch 131/250
Epoch 132/250
Epoch 133/250