In [1]:
import os
from os.path import expanduser
home_dir = expanduser("~")
module_path = home_dir + '/code/modules/'
models_path = home_dir + '/models/'
import sys
sys.path.append(module_path)
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from scipy import stats
import json
import pandas as pd
%load_ext autoreload
%autoreload 1
%aimport data_processing
from data_processing import *

Using TensorFlow backend.


## Construct mock SMF

In [125]:
original_directory = '/home/magnus/data/galcats_nonzero_sfr_no_density_with_growth_rate_no_lastMajM/'
destination_directory = '/home/magnus/data/mock_data/stellar_mass_functions/'

redshifts = [0, .1, .2, .5, 1, 2, 3, 4, 6, 8]
bin_width = .1
error = .3

for redshift in redshifts:

    file_name = 'galaxies.Z{:02.0f}'.format(redshift*10)

    galfile = pd.read_hdf(original_directory + file_name + '.h5')
    galaxies = galfile.values
    gal_header = galfile.keys().tolist()

    data_keys = {}
    for col_nr, key in enumerate(gal_header):
        data_keys[key] = col_nr

    # Remove the smaller haloes
    galaxies = galaxies[galaxies[:,data_keys['Halo_mass']] > 10.5, :]

  #  print(np.amax(galaxies[:, data_keys['Halo_mass']]))
  #  print(np.amin(galaxies[:, data_keys['Halo_mass']]))

    max_stellar_mass = np.amax(galaxies[:, data_keys['Stellar_mass']])
    min_stellar_mass = np.amin(galaxies[:, data_keys['Stellar_mass']])
    lower_bin_edge = np.floor(min_stellar_mass * 10)/10
    upper_bin_edge = np.ceil(max_stellar_mass* 10)/10
   # print(min_stellar_mass, max_stellar_mass)
   # print(lower_bin_edge, upper_bin_edge)

    bin_edges = np.arange(lower_bin_edge, upper_bin_edge + bin_width, bin_width)
    n_bins = len(bin_edges)-1
    bin_stats = stats.binned_statistic(galaxies[:, data_keys['Stellar_mass']], galaxies[:, data_keys['Stellar_mass']], 
                                            bins=bin_edges)

    # separate the data, based on redshift, into separate lists 
    bin_counts = [np.sum(bin_stats[2] == i) for i in range(1, n_bins+1)]

    bin_centers = [(bin_edges[i] + bin_edges[i+1])/2 for i in range(len(bin_edges)-1)]
  #  print(bin_centers)

    #plt.hist(galaxies[:, data_keys['Stellar_mass']], bins = bin_edges, density = False, label = 'Stellar_mass')
  #  plt.scatter(bin_centers, bin_counts)

  #  print(bin_counts)
    bin_counts = [float('nan') if count == 0 else count for count in bin_counts]
    bin_counts_arr = np.array(bin_counts, dtype=np.float)
    bin_counts_per_mpc3 = bin_counts_arr / 200**3 / bin_width
    bin_counts_per_mpc3[bin_counts_per_mpc3 > 0] = np.log10(bin_counts_per_mpc3[bin_counts_per_mpc3 > 0])
  #  print(bin_counts_per_mpc3)

    full_data_list = [[stell_mass, phi, error] for (stell_mass, phi) in zip(bin_centers, bin_counts_per_mpc3) 
                      if (not np.isnan(phi)) ]
    full_data_list.insert(0, {'bin_widths': bin_width, 'min_bin_edge': lower_bin_edge, 'max_bin_edge': upper_bin_edge})
  #  print(full_data_list)

    with open(destination_directory + file_name + '.json', 'w+') as f:
        json.dump(full_data_list, f)
    f.close()

In [126]:
### Try loading a data file
file_name = 'galaxies.Z00'
with open(destination_directory + file_name + '.json', 'r') as f:
    test = json.load(f)
f.close()
print(test)

[{'bin_widths': 0.1, 'min_bin_edge': 7.0, 'max_bin_edge': 12.2}, [7.05, -1.7528556280340475, 0.3], [7.1499999999999995, -1.7619225183457112, 0.3], [7.249999999999999, -1.7735612131331673, 0.3], [7.349999999999999, -1.7900495689041231, 0.3], [7.449999999999998, -1.8018242020006086, 0.3], [7.549999999999998, -1.8287856429905087, 0.3], [7.649999999999998, -1.841788330785899, 0.3], [7.749999999999997, -1.8611843477682943, 0.3], [7.849999999999997, -1.880414225038216, 0.3], [7.949999999999997, -1.8964528279233575, 0.3], [8.049999999999997, -1.9220872970505438, 0.3], [8.149999999999995, -1.9472579767595217, 0.3], [8.249999999999996, -1.9627727654177256, 0.3], [8.349999999999994, -1.9897000433601881, 0.3], [8.449999999999996, -2.0125013192864563, 0.3], [8.549999999999994, -2.021135015652343, 0.3], [8.649999999999995, -2.0415555761329056, 0.3], [8.749999999999993, -2.050000457140078, 0.3], [8.849999999999994, -2.0637376580965223, 0.3], [8.949999999999992, -2.0768207675982175, 0.3], [9.04999999

## Construct mock SSFR

In [2]:
original_directory = '/home/magnus/data/galcats_nonzero_sfr_no_density_with_growth_rate_no_lastMajM/'
destination_directory = '/home/magnus/data/mock_data/ssfr/'
file_name = 'galaxies.Z00'
bin_width = .2
error = .3

galfile = pd.read_hdf(original_directory + file_name + '.h5')
galaxies = galfile.values
gal_header = galfile.keys().tolist()

data_keys = {}
for col_nr, key in enumerate(gal_header):
    data_keys[key] = col_nr

# Remove the smaller haloes
galaxies = galaxies[galaxies[:,data_keys['Halo_mass']] > 10.5, :]

print(np.amax(galaxies[:, data_keys['Halo_mass']]))
print(np.amin(galaxies[:, data_keys['Halo_mass']]))

NameError: name 'original_directory' is not defined

In [137]:
max_stellar_mass = np.amax(galaxies[:, data_keys['Stellar_mass']])
min_stellar_mass = np.amin(galaxies[:, data_keys['Stellar_mass']])
lower_bin_edge = np.floor(min_stellar_mass * 10)/10
upper_bin_edge = np.ceil(max_stellar_mass* 10)/10
print((lower_bin_edge - upper_bin_edge) % bin_width)
if (upper_bin_edge - lower_bin_edge) % bin_width < 1e-10:
    upper_bin_edge += (upper_bin_edge - lower_bin_edge) % bin_width
print(min_stellar_mass, max_stellar_mass)
print(lower_bin_edge, upper_bin_edge)

9.992007221626409e-16
7.0 12.185381
7.0 12.2


In [None]:
bin_edges = np.arange(lower_bin_edge, upper_bin_edge + bin_width, bin_width)
n_bins = len(bin_edges)-1
bin_stats = stats.binned_statistic(galaxies[:, data_keys['Stellar_mass']], galaxies[:, data_keys['Stellar_mass']], 
                                        bins=bin_edges)

# separate the data, based on redshift, into separate lists 
bin_counts = [np.sum(bin_stats[2] == i) for i in range(1, n_bins+1)]

bin_centers = [(bin_edges[i] + bin_edges[i+1])/2 for i in range(len(bin_edges)-1)]
#  print(bin_centers)

#plt.hist(galaxies[:, data_keys['Stellar_mass']], bins = bin_edges, density = False, label = 'Stellar_mass')
#  plt.scatter(bin_centers, bin_counts)

#  print(bin_counts)
bin_counts = [float('nan') if count == 0 else count for count in bin_counts]
bin_counts_arr = np.array(bin_counts, dtype=np.float)
bin_counts_per_mpc3 = bin_counts_arr / 200**3 / bin_width
bin_counts_per_mpc3[bin_counts_per_mpc3 > 0] = np.log10(bin_counts_per_mpc3[bin_counts_per_mpc3 > 0])
#  print(bin_counts_per_mpc3)

full_data_list = [[stell_mass, phi, error] for (stell_mass, phi) in zip(bin_centers, bin_counts_per_mpc3) 
                  if (not np.isnan(phi)) ]
full_data_list.insert(0, {'bin_widths': bin_width, 'min_bin_edge': lower_bin_edge, 'max_bin_edge': upper_bin_edge})
#  print(full_data_list)

with open(destination_directory + file_name + '.json', 'w+') as f:
    json.dump(full_data_list, f)
f.close()

### Real data

In [15]:
filename = '/home/magnus/data/observational_data/all_data.h5'
file = h5py.File(filename, 'r')
#file?
# List all groups
print("Keys: ", list(file.keys()))

Keys:  ['Universe_0']


In [16]:
# Get the universe
universe_0 = file['Universe_0']
#universe_0?
# List all objects in the universe
print("Keys: ", list(universe_0.keys()))

Keys:  ['CSFRD', 'Clustering', 'FQ', 'Model_Parameters', 'SMF', 'SSFR']


In [17]:
# Get the SMF objects from the universe
smf = universe_0['SMF']
#smf?
# List all objects
print("Keys: ", list(smf.keys()))
# Get the dataset 'Data'
data = smf['Data']
print("Keys of Data: ", list(data.keys()))
data_keys = list(data.keys())
model = smf['Model']
sets = smf['Sets']

Keys:  ['Data', 'Model', 'Sets']
Keys of Data:  ['000 Li & White 2009 (z = 0.00 - 0.20)', '001 Baldry 2012 (z = 0.00 - 0.06)', '002 Bernardi 2013 (z = 0.00 - 0.20)', '003 Perez-Gonzalez 2008 (z = 0.00 - 0.20)', '004 Perez-Gonzalez 2008 (z = 0.20 - 0.40)', '005 Perez-Gonzalez 2008 (z = 0.40 - 0.60)', '006 Perez-Gonzalez 2008 (z = 0.60 - 0.80)', '007 Perez-Gonzalez 2008 (z = 0.80 - 1.00)', '008 Perez-Gonzalez 2008 (z = 1.00 - 1.30)', '009 Perez-Gonzalez 2008 (z = 1.30 - 1.60)', '010 Perez-Gonzalez 2008 (z = 1.60 - 2.00)', '011 Perez-Gonzalez 2008 (z = 2.00 - 2.50)', '012 Perez-Gonzalez 2008 (z = 2.50 - 3.00)', '013 Perez-Gonzalez 2008 (z = 3.00 - 3.50)', '014 Perez-Gonzalez 2008 (z = 3.50 - 4.00)', '015 Santini 2012 (z = 0.60 - 1.00)', '016 Santini 2012 (z = 1.00 - 1.40)', '017 Santini 2012 (z = 1.40 - 1.80)', '018 Santini 2012 (z = 1.80 - 2.50)', '019 Santini 2012 (z = 2.50 - 3.50)', '020 Santini 2012 (z = 3.50 - 4.50)', '021 Ilbert 2013 (z = 0.20 - 0.50)', '022 Ilbert 2013 (z = 0.50 - 

In [None]:
print(list(sets))
#sets?

In [None]:
print(list(model))
#model?

In [18]:
#print(list(data['000 Li & White 2009 (z = 0.00 - 0.20)']))
print(list(data))
#data?

['000 Li & White 2009 (z = 0.00 - 0.20)', '001 Baldry 2012 (z = 0.00 - 0.06)', '002 Bernardi 2013 (z = 0.00 - 0.20)', '003 Perez-Gonzalez 2008 (z = 0.00 - 0.20)', '004 Perez-Gonzalez 2008 (z = 0.20 - 0.40)', '005 Perez-Gonzalez 2008 (z = 0.40 - 0.60)', '006 Perez-Gonzalez 2008 (z = 0.60 - 0.80)', '007 Perez-Gonzalez 2008 (z = 0.80 - 1.00)', '008 Perez-Gonzalez 2008 (z = 1.00 - 1.30)', '009 Perez-Gonzalez 2008 (z = 1.30 - 1.60)', '010 Perez-Gonzalez 2008 (z = 1.60 - 2.00)', '011 Perez-Gonzalez 2008 (z = 2.00 - 2.50)', '012 Perez-Gonzalez 2008 (z = 2.50 - 3.00)', '013 Perez-Gonzalez 2008 (z = 3.00 - 3.50)', '014 Perez-Gonzalez 2008 (z = 3.50 - 4.00)', '015 Santini 2012 (z = 0.60 - 1.00)', '016 Santini 2012 (z = 1.00 - 1.40)', '017 Santini 2012 (z = 1.40 - 1.80)', '018 Santini 2012 (z = 1.80 - 2.50)', '019 Santini 2012 (z = 2.50 - 3.50)', '020 Santini 2012 (z = 3.50 - 4.50)', '021 Ilbert 2013 (z = 0.20 - 0.50)', '022 Ilbert 2013 (z = 0.50 - 0.80)', '023 Ilbert 2013 (z = 0.80 - 1.10)', '02

In [20]:
(1 + 1/(1+.2))/2

0.9166666666666667

In [19]:
data_obj_0 = data[data_keys[0]]
print(list(data_obj_0))
#data_obj_0?

[(8.387413, -1.7221187, 0.04560975, 0.9166667, -1.6924936, 0.00676285), (8.487412, -1.8181188, 0.05197114, 0.9166667, -1.7416298, 0.00715322), (8.587413, -1.8221188, 0.04463463, 0.9166667, -1.7860864, 0.00752563), (8.687413, -1.8331188, 0.04171629, 0.9166667, -1.8232217, 0.00785141), (8.787413, -1.8501188, 0.03881046, 0.9166667, -1.8558966, 0.00814962), (8.887413, -1.8651187, 0.03688156, 0.9166667, -1.8878008, 0.00845157), (8.987412, -1.8801187, 0.03162275, 0.9166667, -1.9226518, 0.00879406), (9.087413, -1.9241188, 0.03400365, 0.9166667, -1.9586033, 0.00916183), (9.187413, -1.9451188, 0.03067571, 0.9166667, -1.9911925, 0.00950832), (9.287413, -1.9841188, 0.03020344, 0.9166667, -2.0195405, 0.00982022), (9.387413, -2.0201187, 0.02926173, 0.9166667, -2.0455432, 0.0101152), (9.487412, -2.0801187, 0.02926173, 0.9166667, -2.0771618, 0.01048569), (9.587413, -2.1051188, 0.02832406, 0.9166667, -2.122887, 0.01104533), (9.687413, -2.1401186, 0.03067577, 0.9166667, -2.1666489, 0.01160853), (9.7874

### Using pandas

In [14]:
file_name = '/home/magnus/data/observational_data/all_data.h5'

file = pd.read_hdf(file_name, key='SMF')
file_header = file.keys().tolist()
print(file_header)

KeyError: 'No object named SMF in the file'