In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import os
from os.path import expanduser
home_dir = expanduser("~")
module_path = home_dir + '/code/modules/'
models_path = home_dir + '/models/'
import sys
sys.path.append(module_path)
fig_dir = 'figures/'
bp_network_dir = home_dir + '/trained_networks/backprop_trained/'
import time
import random
from loading_datasets import *
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats
from scipy.special import comb
import multiprocessing as mp
import datetime
import codecs, json
import corner
import pickle
from itertools import combinations
%load_ext autoreload
%autoreload 1
%aimport data_processing
%aimport multiprocessing_modules
%aimport plotting
%aimport model_setup
from data_processing import *
from observational_data_management import binned_loss, csfrd_loss, clustering_loss
from multiprocessing_modules import train_net, init
from plotting import *
from model_setup import *

np.random.seed(999)
random.seed(999)

os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
# y_pred = data_processing.predict_points(model, training_data_dict, original_units=False, as_lists=False, data_type=data_type)
            
# sfr_index = training_data_dict['network_args']['output_features'].index('SFR')
# stellar_mass_index = training_data_dict['network_args']['output_features'].index('Stellar_mass')

tot_nr_points = 'all' # how many examples will be used for training+validation+testing, 'all' or a number
train_frac = 1
val_frac = 0
test_frac = 0

redshifts = [0,.1,.2,.5,1,2,3,4,6,8]
same_n_points_per_redshift = False
input_features = ['Halo_mass_peak', 'Scale_peak_mass', 'Halo_growth_rate', 'Halo_radius', 'Redshift']
output_features = ['Stellar_mass', 'SFR']
outputs_to_weigh = ['Stellar_mass']
weigh_by_redshift = True

norm = {'input': 'zero_mean_unit_std',
        'output': 'none'} # 'none',   'zero_mean_unit_std',   'zero_to_one'

network_args = {        
    'nr_hidden_layers': 8,
    'nr_neurons_per_lay': 8,
    'input_features': input_features,
    'output_features': output_features,
    'activation_function': 'tanh', # 'tanh', 'leaky_relu'
    'output_activation': {'SFR': None, 'Stellar_mass': None},
    'reg_strength': 0
}
### Loss parameters
stellar_mass_bin_width = 0.2 # concerns smf, fq, ssfr losses
loss_dict = {
    'fq_weight': 1,
    'ssfr_weight': 1,
    'smf_weight': 1, 
    'shm_weight': 2, # only available when using mock observations
    'csfrd_weight': 1,
    'clustering_weight': 1,
    'nr_redshifts_per_eval': 'all', # a nr or the string 'all'
    'stellar_mass_bins': np.arange(7, 12.5, stellar_mass_bin_width),
    'stellar_mass_bin_width': stellar_mass_bin_width
}

# load the selected galaxyfile
galaxies, data_keys = load_galfiles(redshifts=redshifts, equal_numbers=same_n_points_per_redshift)
    
# prepare the training data
training_data_dict = divide_train_data(galaxies, data_keys, network_args, redshifts, outputs_to_weigh=outputs_to_weigh, 
                                       weigh_by_redshift=weigh_by_redshift, total_set_size=tot_nr_points, train_frac=train_frac, val_frac=val_frac, 
                                       test_frac=test_frac, real_observations=True, emerge_targets=True, loss_dict=loss_dict)
training_data_dict = normalise_data(training_data_dict, norm)

max resolvable stellar masses are, for redshifts 0.00, 0.10, 0.20, 0.50, 1.00, 2.00, 3.00, 4.00, 6.00, 8.00:  [11.917698348869193, 11.919560018029657, 11.919560018029657, 12.008884738658642, 11.950992202758789, 11.871992912521591, 11.946076673215574, 11.745095703480121, 10.917188149815923, 9.582553102113343]


In [5]:
predicted_sfr = np.power(10, training_data_dict['output_train_dict']['SFR'])

predicted_stellar_mass_log = training_data_dict['output_train_dict']['Stellar_mass']
predicted_stellar_mass = np.power(10, predicted_stellar_mass_log)

try:
    ssfr = np.divide(predicted_sfr, predicted_stellar_mass)
except:
    print(np.dtype(predicted_sfr[0]), np.dtype(predicted_stellar_mass[0]))
    print('predicted_sfr: ',predicted_sfr)
    print('predicted_stellar_mass: ', predicted_stellar_mass)
    sys.exit('overflow error while dividing')

try:
    ssfr_log = np.log10(ssfr)
except:
    print(np.dtype(ssfr[0]))
    print('ssfr: ',ssfr)
    sys.exit('divide by zero error while taking log')
    
data_type = 'train'

loss = 0

############### mean SSFR ###############

if loss_dict['ssfr_weight'] > 0:
    loss_ssfr = \
        binned_loss(training_data_dict, predicted_stellar_mass_log, ssfr, 'ssfr', data_type, loss_dict, True) 
    loss += loss_dict['ssfr_weight'] * loss_ssfr

############### SMF ###############  

if loss_dict['smf_weight'] > 0:
    loss_smf = \
        binned_loss(training_data_dict, predicted_stellar_mass_log, predicted_stellar_mass_log, 'smf', data_type, loss_dict,
                    True)
    loss += loss_dict['smf_weight'] * loss_smf

############### FQ ###############

if loss_dict['fq_weight'] > 0:
    loss_fq = \
        binned_loss(training_data_dict, predicted_stellar_mass_log, ssfr_log, 'fq', data_type, loss_dict, True)
    loss += loss_dict['fq_weight'] * loss_fq

############### CSFRD ###############

if loss_dict['csfrd_weight'] > 0:
    loss_csfrd = csfrd_loss(
        training_data_dict, predicted_sfr, loss_dict, data_type
    )
    loss += loss_dict['csfrd_weight'] * loss_csfrd

############### Clustering ###############

if loss_dict['clustering_weight'] > 0:
    loss_clustering = clustering_loss(
        training_data_dict, predicted_stellar_mass_log, loss_dict, data_type
    )
    loss += loss_dict['clustering_weight'] * loss_clustering

loss /= (loss_dict['ssfr_weight'] + loss_dict['smf_weight'] + loss_dict['fq_weight'] + loss_dict['clustering_weight']
         + loss_dict['csfrd_weight'])

print(loss)

4.04092176373937
