In [1]:
import numpy as np

### General parameters
run_on_cpu = False
nEpochs = [1e2]
batch_size = [1e4] # when this is not the tested variable
train_set_size = [3e5] # how many examples will be used for training+validation+testing
input_features = ['Halo_mass', 'Halo_mass_peak','Concentration', 'Halo_spin']
output_features = ['Stellar_mass']

tested_param = 'neurons_per_layer'     #batch_size, nLayers, actFun or neurons_per_layer
tested_values = [10, 50, 100, 500, 1000]

### Network parameters
nLayers = [10] # when this is not the tested variable
activation_func = 'tanh' # when this is not the tested variable
neurons_per_layer = [1000] # when this is not the tested variable

data_dict = {'X_pos': 0, 'Y_pos': 1, 'Z_pos': 2, 'X_vel': 3, 'Y_vel': 4, 'Z_vel': 5, 'Halo_mass': 6, 
             'Stellar_mass': 7, 'SFR': 8, 'Intra_cluster_mass': 9, 'Halo_mass_peak': 10, 'Stellar_mass_obs': 11, 
             'SFR_obs': 12, 'Halo_radius': 13, 'Concentration': 14, 'Halo_spin': 15, 'Scale_peak_mass': 16, 
             'Scale_half_mass': 17, 'Scale_last_MajM': 18, 'Type': 19}
unit_dict = {'X_pos': '', 'Y_pos': '', 'Z_pos': '', 'X_vel': '', 'Y_vel': '', 
             'Z_vel': '', 'Halo_mass': 'log($M_{G}/M_{S}$)', 'Stellar_mass': 'log($M_{G}/M_{S}$)', 'SFR': '', 
             'Intra_cluster_mass': '', 'Halo_mass_peak': 'log($M_{G}/M_{S}$)', 
             'Stellar_mass_obs': '', 'SFR_obs': '', 'Halo_radius': '', 
             'Concentration': '', 'Halo_spin': '', 'Scale_peak_mass': 'a', 
             'Scale_half_mass': 'a', 'Scale_last_MajM': 'a', 'Type': ''}

In [2]:
import os
if run_on_cpu:
    os.environ["CUDA_VISIBLE_DEVICES"] = ""
from os.path import expanduser
home_dir = expanduser("~")
result_dir = 'results/'
import datetime
import codecs, json
import time
import random
import tensorflow as tf
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats
np.random.seed(999)
random.seed(999)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [13]:
keras_info = json.load(open(home_dir + '/.keras/keras.json'))
float_prec = keras_info['floatx']
print(float_prec)


float16


In [4]:
json_info_dict = {
    'On_CPU_only': run_on_cpu,
    'number_of_epochs': nEpochs,
    'training_set_size': train_set_size,
    'input_features': input_features,
    'output_features': output_features,
    'tested_parameter': tested_param,
    'tested_values': tested_values,
    'batch_size': batch_size,                                                                       
    'nr_of_layers': nLayers,
    'activation_function': activation_func,
    'neurons_per_layer': neurons_per_layer
}

In [5]:
galfile = pd.read_hdf('/scratch/data/galcats/P200/galaxies.Z01.h5')
galaxies = galfile.as_matrix()
gal_header = galfile.keys().tolist()

### Remove data points with halo mass below 10.5
galaxies = galaxies[galaxies[:,6] > 10.5, :]

In [6]:
n_data_points = galaxies.shape[0]
train_indices = np.random.choice(n_data_points, int(train_set_size[0]), replace=False)

x_train = np.zeros((len(train_indices), len(input_features)))

y_train = np.zeros((len(train_indices), len(output_features)))


for i in range(len(input_features)):
    x_train[:,i] = galaxies[train_indices, data_dict[input_features[i]]]
    
for i in range(len(output_features)):
    y_train[:,i] = galaxies[train_indices, data_dict[output_features[i]]]

In [7]:
bSize = batch_size[0]
nLay = nLayers[0]
neurPerLay = neurons_per_layer[0]
actFun = activation_func

In [8]:
if run_on_cpu:
    pu_string = 'CPU'
else:
    pu_string = 'GPU'

timing_array = np.zeros(len(tested_values))
tot_nr_comb = np.size(timing_array)
comb_tried = 0
glob_start = time.time()
date_string = datetime.datetime.now().strftime("%Y-%m-%d")
with open(result_dir + pu_string + '_' + tested_param + '_' + date_string + '.txt', 'w+') as f:
    
    date_string_proper = datetime.datetime.now().strftime("%H:%M, %Y-%m-%d")
    f.write('Benchmark done on ' + pu_string + ' at ' + date_string_proper + '\n')
    f.write('Parameter checked is %s\n\n' % (tested_param))
    f.flush()
    
    for i_value, value in enumerate(tested_values):
        
        if tested_param == 'batch_size': bSize = value
        elif tested_param == 'nLayers': nLay = value
        elif tested_param == 'neurons_per_layer': neurPerLay = value
        elif tested_param == 'actfun': actFun = value
        else: 
            print('error')

        comb_tried += 1

        # create model
        model = Sequential()
        model.add(Dense(neurPerLay, input_dim = len(input_features), activation = actFun))

        for i in range(0, nLay):
            model.add(Dense(neurPerLay, activation = actFun))

        model.add(Dense(len(output_features), activation = None))

        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

        start = time.time()
        # Fit the model
        history = model.fit(x_train , y_train, epochs=int(nEpochs[0]), 
                batch_size=int(bSize), verbose=0)
        end = time.time()

        timing_array[i_value] = (end - start) / 60

        progress_end = time.time()
        elapsed_so_far = (progress_end - glob_start) / 60
        time_remaining = elapsed_so_far / comb_tried * (tot_nr_comb - comb_tried)

        f.write('%s      ' % (datetime.datetime.now().strftime("%H:%M:%S")))
        f.write('Combinations tried: %d/%d     ' % (comb_tried, tot_nr_comb))
        f.write('Elapsed time: %dmin     ' % (elapsed_so_far))
        f.write('Time remaining: %dmin.\n' % (time_remaining))
        f.flush()

                    
f.close()
                


In [14]:
json_data = timing_array.tolist()
json_data = [json_data]
json_data.append(json_info_dict)

with open(result_dir + pu_string + '_' + tested_param + '_' + float_prec + '_' + date_string + '.json', 'w+') as f:
    json.dump(json_data, f)
f.close()