# Classification of MACHO VS - example 1

- Data-type     : MACHO VS (normalized, phase-folded, fixed-length light-curves)
- Network type  : Composite network
- Layer type    : tCNN
- Configuration : (nFilters=16, nLayers=1)

***
__Note__
-      The autoencoders and composite networks require fixed-length data to compile/fit the model. Limitation arises from the type of the bottleneck layers in the decoder modules.
-      Direct classifiers can process fixed-length data or the initial time-series. The latter is processed through generator functions (fit_generator) to take into account the different length of each observed light-curves.
***
### DATA

__Source__ : MACHO VS multiband photometry [red and blue bands] (Alcock et al., 1996)

__Preprocessed data__ 
-      Fixed-length LCs   [raw/normalized], [phase-folded, time-series]
-      Initial-length LCs [raw], [phase-folded/time-series]


__Processing of multiband light-curves__ (Jamal and Bloom, 2020)
-      '_Bband_'  &emsp;&nbsp; `data_id='blue'` 
-      '_merged_' &emsp;`data_id='rb'` 
-      '_hybrid_' &emsp;&nbsp; `data_id='multiple'`

<br>


## Settings

In [1]:
%run ../setup_notebook

Using TensorFlow backend.


In [2]:
import script_MACHO as m_runs

SEED=0; np.random.seed(SEED)
# SEED_tf=42; tf.compat.v1.set_random_seed(SEED_tf)   ## set in "functions_keras.py"****"

## [1] Network hyperparameters





        dict_nruns = {0 : 'classifier_MLP_meta',
                      #
                      1 : 'classifier_direct_RNN',
                      2 : 'classifier_direct_tCNN',
                      3 : 'classifier_direct_dTCN',
                      #
                      4 : 'autoencoder_RNN',
                      5 : 'autoencoder_tCNN',
                      6 : 'autoencoder_dTCN',
                      #
                      7 : 'composite_net_RNN',
                      8 : 'composite_net_tCNN',
                      9 : 'composite_net_dTCN',
                     }

In [3]:
## ---------------------------------------------- ##
## HYPERPARAMS CONFIGS
## ---------------------------------------------- ##
data_id            = 'blue'
model_type         = 'tCNN'
run_id             = 8             ## [0:9] in dict_nruns

sizenet            = 16
num_layers         = 1

m_raw              = False         ## TRUE: normalized data,   FALSE: initial obs.
m_padding          = True          ## TRUE: fixed-length data, FALSE: initial lengths
m_fold             = True          ## TRUE: phase-folded LCs,  FALSE: time-series
m_meta             = True          ## TRUE: metadata as ancillary input,  FALSE: none



## ---------------------------------------------- ##
## COMMON HYPERPARAMS
## ---------------------------------------------- ##
sim_type           = m_runs.dict_nruns[run_id]
nb_epoch           = 200  
drop_frac          = 0.25
batch_size         = 128
learning_rate      = 5e-4
validation_split   = 0.20
m_reductionfactor  = 2
gpu_frac           = 0.00
m_causal           = True
m_categorical      = True

## dTCN
if run_id in [3,6,9]:
    n_stacks       = num_layers
    kernel_wavenet = 1
    max_dilation   = 2
    kernel_size    = 3
    m_activation   = 'wavenet'
## tCNN
if run_id in [2,5,8]:
    max_dilation   = 2
    kernel_size    = 5
    m_activation   = 'tanh'
## RNN (LSTM; GRU)
if run_id in [1,4,7]:
    bidirectional  = True

m_embedding        = 8 if run_id in np.r_[m_runs.list_ae, m_runs.list_composite] else None


#data_store         = '/Users/sjamal/git/deepnets_vs/example_data/MACHO/'
#output_store       = '/Users/sjamal/git/deepnets_vs/outputs/trained_models/'

data_store         = '/../example_data/MACHO/'
output_store       = '/../trained_models/'

In [4]:
arg_dict = {'data_id'           : data_id,
            'run_id'            : run_id ,
            #
            'data_store'        : data_store ,
            'output_store'      : output_store,
            #
            'sizenet'           : sizenet ,
            'num_layers'        : num_layers, 
            'drop_frac'         : drop_frac ,
            'batch_size'        : batch_size,
            'nb_epoch'          : nb_epoch,
            'model_type'        : model_type, 
            'learning_rate'     : learning_rate ,
            #
            'embedding'         : m_embedding,
            'add_dense'         : True ,
            'validation_split'  : validation_split ,
            #
            'categorical'       : m_categorical,
            'causal'            : m_causal ,
            'sim_type'          : sim_type,
            'gpu_frac'          : gpu_frac, 
            #
            'loss_AE'           : 'mae',
            'loss_CLF'          : 'categorical_crossentropy' if m_categorical else 'logcosh',
            'metrics_CLF'       : 'categorical_accuracy'     if m_categorical else 'accuracy',
            #
            'use_raw'           : m_raw,        ## TRUE: normalized data,   FALSE: initial obs.
            'padding'           : m_padding ,   ## TRUE: fixed-length data, FALSE: initial lengths
            'period_fold'       : m_fold ,      ## TRUE: phase-folded LCs,  FALSE: time-series
            'add_metadata'      : m_meta,       ## TRUE: metadata as ancillary input,  FALSE: none
            #
            'no_train'          : False
           }

## RNN (LSTM; GRU)
if run_id in [1,4,7]:
    arg_dict['bidirectional']     = bidirectional      
## tCNN
if run_id in [2,5,8]:
    arg_dict['m_activation']      = m_activation       
    arg_dict['kernel_size']       = kernel_size
    arg_dict['max_dilation']      = max_dilation
## dTCN
if run_id in [3,6,9]:    
    arg_dict['m_reductionfactor'] = m_reductionfactor  
    arg_dict['m_activation']      = m_activation
    arg_dict['kernel_size']       = kernel_size
    arg_dict['kernel_wavenet']    = kernel_wavenet
    arg_dict['n_stacks']          = n_stacks
    arg_dict['max_dilation']      = max_dilation
#


In [5]:
arg_dict = m_runs.set_params_cline(m_func.parse_model_args(arg_dict))


	# ------------------------------------------ #
	# --------[ SESSION - HYPERPARAMS ] -------- # 
	# ------------------------------------------ #

	 data_id 	: blue
	 run_id 	: 8
	 sim_type 	: composite_net_tCNN_fixedlength_06182020
	 data_store 	: /Users/sjamal/git/deepnets_vs/notebooks/../example_data/MACHO/
	 output_store 	: /Users/sjamal/git/deepnets_vs/notebooks/../trained_models/
	 nb_passbands 	: 1
	 sizenet 	: 16
	 embedding 	: 8
	 num_layers 	: 1
	 drop_frac 	: 0.25
	 batch_size 	: 128
	 nb_epoch 	: 200
	 model_type 	: tCNN
	 learning_rate 	: 0.0005
	 decode_type 	: None
	 decode_layers 	: None
	 bidirectional 	: False
	 output_size_cw 	: None
	 n_stacks 	: None
	 max_dilation 	: 2
	 m_reductionfactor 	: 2
	 kernel_size 	: 5
	 kernel_wavenet 	: 1
	 m_activation 	: tanh
	 do_featurizer 	: False
	 config_wavenet 	: False
	 use_skip_connections 	: False
	 add_dense 	: True
	 use_raw 	: False
	 add_metadata 	: True
	 causal 	: True
	 aux_in 	: False
	 categorical 	: True
	 loss_we

## [2] Load stored datastructures

In [14]:
input_lcs=None; input_metadata=None

input_lcs, input_metadata, output_dict = m_runs.get_data(arg_dict)

/Users/sjamal/git/deepnets_vs/notebooks/../example_data/MACHO/preprocessed_data/pkl_fileformat/phasefold/fixed_lengths/Xnorm_fold_blue.pkl


In [None]:
print('Metadata :', input_metadata['selected'].columns)

## [3] Train networks & store logs

In [None]:
stime = time.time() 

if True:
    m_runs.run_network(arg_dict, input_lcs, input_metadata, output_dict)
    
hours, rem = divmod(time.time() - stime, 3600) #timeit.default_timer()-stime
minutes, seconds = divmod(rem, 60)
print("\n*Execution time : {:0>2} h {:0>2} min {:05.2f} s".format(int(hours), int(minutes), seconds))