In [1]:
import numpy as np
import matplotlib.pylab as plt
np.random.seed(10)

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_probability as tfp
tfd = tfp.distributions
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, Callback

# Activate TF2 behavior:
from tensorflow.python import tf2
if not tf2.enabled():
  import tensorflow.compat.v2 as tf
  tf.enable_v2_behavior()
  assert tf2.enabled()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#### FLAGS ####

train_mode = False
reSampleTest = False
logTrue = False
DEEP2_SDSSTrue = False
SDSSTrainTrue = False
COSMOSTrue = False

FSPSTrue = True

if FSPSTrue:
    linear_z = True

    CombineTrue = False
    OnlySDSS = False
    OnlyDEEP2 = False
    OnlyCOSMOS = True

In [3]:
def minmax_cuts(X, y):
    print(X.shape)
    min_col = -5
    max_col = 5
    min_mag = 12
    max_mag = 25
    
    mask_cond =  np.where( 
        (X[:, 0] < max_col) & (X[:, 0] > min_col) &
        (X[:, 1] < max_col) & (X[:, 1] > min_col) &
        (X[:, 2] < max_col) & (X[:, 2] > min_col) &
        (X[:, 3] < max_col) & (X[:, 3] > min_col) & 
        (X[:, 4] < max_mag) & (X[:, 4] > min_mag) )
    
    print( np.array(mask_cond).shape)

    X_new = X[mask_cond]
    y_new = y[mask_cond]
    print(X_new.shape)
    return X_new, y_new

In [4]:
def rescale(X, y):
    xmin = [-5, -5, -5, -5, 12]
    xmax =  [5, 5, 5, 5, 25]
    X = (X - xmin) / (xmax - xmin)
    y = (y - ymin) / (ymax - ymin)
    return X, y 

In [5]:
def print_limits(X, y):
    print('number of datapoints: ', str(y.shape[0]))
    print('z-minmax: ', y.min(), y.max())
    print('ColMag-min: ', np.min(X, axis=0))
    print('ColMag-max: ', np.max(X, axis=0))

In [6]:

def ReadFSPS_raw(path_program = '../../Data/fromGalaxev/photozs/datasets/'):
    np.random.seed(12211)  

    fsps_ip = np.load('../../Data/fromGalaxev/photozs/datasets/data_march_2020/fsps_sdss_join_col_mag.npy')
    # (ngal, nprop, nredshift, ncol)
    ngal, nprop, nz, ncol = fsps_ip.shape
    fsps_reshaped = fsps_ip.reshape(-1, nz, ncol)
    fsps_reshaped = fsps_reshaped.reshape(-1, ncol)

    fsps_ip_z= np.load('../../Data/fromGalaxev/photozs/datasets/data_march_2020/fsps_sdss_join_zz.npy')
    ngal, nprop, nz = fsps_ip_z.shape
    fsps_z_reshaped = fsps_ip_z.reshape(-1, nz)
    fsps_z_reshaped = fsps_z_reshaped.reshape(-1)

    fsps_ip_flag = np.load('../../Data/fromGalaxev/photozs/datasets/data_march_2020/fsps_sdss_join_flag_zz.npy')
    ngal, nprop, nz = fsps_ip_flag.shape
    fsps_flag_reshaped = fsps_ip_flag.reshape(-1, nz)
    fsps_flag_reshaped = fsps_flag_reshaped.reshape(-1)

    linear_z = True
    if linear_z:
        z_index = (fsps_flag_reshaped == 0)
    else:
        z_index = (fsps_flag_reshaped == 1)

    Trainfiles = np.append(fsps_z_reshaped[z_index][:, None], fsps_reshaped[z_index], axis=1) 

    TrainshuffleOrder = np.arange(Trainfiles.shape[0])
    np.random.shuffle(TrainshuffleOrder)
    Trainfiles = Trainfiles[TrainshuffleOrder]

    X_trainFSPS = Trainfiles[:, 1:]  # color mag
    y_trainFSPS = Trainfiles[:, 0]  # spec z


    ##################################################

    TestSDSS = True
    if TestSDSS:

        fileIn_col = path_program + 'Training_data_UM_random/SDSS_col_val.npy'
        fileIn_z = path_program + 'Training_data_UM_random/SDSS_zz_val.npy'
        
        TestfilesColors = np.load(fileIn_col)
        Testfiles_z = np.load(fileIn_z)
        
        Testfiles1 = np.append(Testfiles_z[:, None], TestfilesColors, axis=1)

        X_testSDSS = Testfiles1[:, 1:]  # color mag
        y_testSDSS = Testfiles1[:, 0] # spec z
        
    ############################################################
    ############## THINGS ARE SAME AFTER THIS ###########     

    TestDEEP2 = True
    if TestDEEP2:

        fileIn_col = path_program + 'data_march_2020/DEEP2_updated_colors_new.npy'
        fileIn_z = path_program + 'data_march_2020/DEEP2_updated_zz_new.npy'
        
        TestfilesColors = np.load(fileIn_col)
        Testfiles_z = np.load(fileIn_z)
        
        Testfiles2 = np.append(Testfiles_z[:, None], TestfilesColors, axis=1)

        X_testDEEP2 = Testfiles2[:, 1:]  # color mag
        y_testDEEP2 = Testfiles2[:, 0] # spec z
    

    ############################################################

    TestCOSMOS = True
    if TestCOSMOS:

        fileIn_col = path_program + 'data_march_2020/COSMOS_updated_SDSScolors.npy'
        fileIn_z = path_program + 'data_march_2020/COSMOS_updated_SDSSzz.npy'
        

        TestfilesColors = np.load(fileIn_col)
        Testfiles_z = np.load(fileIn_z, allow_pickle=True).item()['zspec']
        
        Testfiles3 = np.append(Testfiles_z[:, None], TestfilesColors, axis=1)

        X_testCOSMOS = Testfiles3[:, 1:]  # color mag
        y_testCOSMOS = Testfiles3[:, 0] # spec z
    

    ############################################################
    print('FSPS')
    print_limits(X_trainFSPS, y_trainFSPS)
    print('SDSS')
    print_limits(X_testSDSS, y_testSDSS)
    print('DEEP2')
    print_limits(X_testDEEP2, y_testDEEP2)
    print('COSMOS')
    print_limits(X_testCOSMOS, y_testCOSMOS)

    
    X_trainFSPS, y_trainFSPS = minmax_cuts(X_trainFSPS, y_trainFSPS)
    X_testSDSS, y_testSDSS = minmax_cuts(X_testSDSS, y_testSDSS)
    X_testDEEP2, y_testDEEP2 = minmax_cuts(X_testDEEP2, y_testDEEP2)
    X_testCOSMOS, y_testCOSMOS = minmax_cuts(X_testCOSMOS, y_testCOSMOS)
    
    print('FSPS')
    print_limits(X_trainFSPS, y_trainFSPS)
    print('SDSS')
    print_limits(X_testSDSS, y_testSDSS)
    print('DEEP2')
    print_limits(X_testDEEP2, y_testDEEP2)
    print('COSMOS')
    print_limits(X_testCOSMOS, y_testCOSMOS)

    return X_trainFSPS, y_trainFSPS, X_testSDSS, y_testSDSS, X_testDEEP2, y_testDEEP2, X_testCOSMOS, y_testCOSMOS

In [7]:
X_trainFSPS, y_trainFSPS, X_testSDSS, y_testSDSS, X_testDEEP2, y_testDEEP2, X_testCOSMOS, y_testCOSMOS = ReadFSPS_raw(path_program = '../../Data/fromGalaxev/photozs/datasets/')

rescaleTrue = False
if rescaleTrue:
    X_trainFSPS, y_trainFSPS = rescale(X_trainFSPS, y_trainFSPS)
    X_testSDSS, y_testSDSS = rescale(X_testSDSS, y_testSDSS)
    X_testDEEP2, y_testDEEP2 = rescale(X_testDEEP2, y_testDEEP2)
    X_testCOSMOS, y_testCOSMOS = rescale(X_testCOSMOS, y_testCOSMOS)

(178300, 200, 5)
(35660000, 5)
(178300, 200)
(35660000,)
(178300, 200)
(35660000,)
number of datapoints:  17709557
z-minmax:  0.002000010024108306 1.2499997501080744
ColMag-min:  [-0.1784067  -0.58599727 -1.46125335 -1.77934106  5.55541849]
ColMag-max:  [  3.79654048   3.23352432   1.89477147   2.41539175 102.21784816]
number of datapoints:  839680
z-minmax:  0.0010009037796407938 0.9959535002708435
ColMag-min:  [ -4.13276291  -9.40818405  -5.60798359 -10.65641022  12.00018215]
ColMag-max:  [13.32734489  5.73001671  5.97406387  6.33528423 21.46190643]
number of datapoints:  13163
z-minmax:  0.00853208638727665 0.9998403191566467
ColMag-min:  [-16.02178146  -8.86878939  -0.87539692 -16.55613166  15.02469534]
ColMag-max:  [17.02883987 20.22452066  8.5757395   2.03815475 23.20179149]
number of datapoints:  7297
z-minmax:  0.0059 0.9997
ColMag-min:  [-3.10955813 -1.82954357 -1.88992152 -1.96569271 18.86137697]
ColMag-max:  [ 3.45250798  3.80604549  3.47295741  3.19488478 24.69223614]
(1770

In [19]:
PairPlotTrue = True

if PairPlotTrue:
    import seaborn as sns
    import pandas as pd
    from IPython.display import Image
    
    sns.set(font_scale=1.5)
    sns.set_style(style="ticks", 
                      rc={'axes.grid' : False, 'axes.facecolor': 'white',
                      'xtick.bottom': True, 
                      'xtick.top': False, 
                      'ytick.left': True, 
                      'ytick.right': False, 
                      'axes.spines.left': True, 
                      'axes.spines.bottom': True, 
                      'axes.spines.right': True, 
                      'axes.spines.top': True,
                      'axes.linewidth': 2, 
                      'axes.edgecolor':'black'}) 

    num_data = 2000

    df_train = pd.DataFrame()

    # Add columns
    df_train['u-g'] = X_trainFSPS[:5*num_data, 0]
    df_train['g-r'] = X_trainFSPS[:5*num_data, 1]
    df_train['r-i'] = X_trainFSPS[:5*num_data, 2]
    df_train['i-z'] = X_trainFSPS[:5*num_data, 3]
    df_train['mag(i)'] = X_trainFSPS[:5*num_data, 4]
    df_train['redshift'] = y_trainFSPS[:5*num_data]


    df_testSDSS = pd.DataFrame()

    # Add columns
    df_testSDSS['u-g'] = X_testSDSS[:num_data, 0]
    df_testSDSS['g-r'] = X_testSDSS[:num_data, 1]
    df_testSDSS['r-i'] = X_testSDSS[:num_data, 2]
    df_testSDSS['i-z'] = X_testSDSS[:num_data, 3]
    df_testSDSS['mag(i)'] = X_testSDSS[:num_data, 4]
    df_testSDSS['redshift'] = y_testSDSS[:num_data]

    df_testDEEP2 = pd.DataFrame()

    # Add columns
    df_testDEEP2['u-g'] = X_testDEEP2[:num_data, 0]
    df_testDEEP2['g-r'] = X_testDEEP2[:num_data, 1]
    df_testDEEP2['r-i'] = X_testDEEP2[:num_data, 2]
    df_testDEEP2['i-z'] = X_testDEEP2[:num_data, 3]
    df_testDEEP2['mag(i)'] = X_testDEEP2[:num_data, 4]
    df_testDEEP2['redshift'] = y_testDEEP2[:num_data]

    df_testCOSMOS = pd.DataFrame()

    # Add columns
    df_testCOSMOS['u-g'] = X_testCOSMOS[:num_data, 0]
    df_testCOSMOS['g-r'] = X_testCOSMOS[:num_data, 1]
    df_testCOSMOS['r-i'] = X_testCOSMOS[:num_data, 2]
    df_testCOSMOS['i-z'] = X_testCOSMOS[:num_data, 3]
    df_testCOSMOS['mag(i)'] = X_testCOSMOS[:num_data, 4]
    df_testCOSMOS['redshift'] = y_testCOSMOS[:num_data]


    df_all = pd.concat([df_train.assign(dataset='FSPS'), df_testSDSS.assign(dataset='SDSS'), df_testDEEP2.assign(dataset='DEEP2'), df_testCOSMOS.assign(dataset='COSMOS')])

    #### plotting ####
    g = sns.PairGrid(df_all, hue='dataset')
    g = g.map_upper(sns.scatterplot, alpha=0.3, size=2, marker='+')
    g = g.map_lower(sns.kdeplot, n_levels=10, shade=True, shade_lowest=False, alpha=0.7)
#     g = g.map_diag(sns.kdeplot, lw=2, shade=True)
    g = g.map_diag(sns.distplot, hist=True, kde=False)
    g = g.add_legend()

    g.savefig("pairplot_best_fsps_fullData_raw_cuts.png")

    plt.clf() # Clean parirplot figure from sns 
    plt.tight_layout()
    Image(filename="pairplot_best_fsps_fullData_raw_cuts.png") # Show pairplot as image


<Figure size 1212.9x1080 with 0 Axes>

In [20]:
sns.axes_style()

{'axes.facecolor': 'white',
 'axes.edgecolor': 'black',
 'axes.grid': False,
 'axes.axisbelow': True,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': '.8',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'patch.force_edgecolor': True,
 'xtick.bottom': True,
 'xtick.top': False,
 'ytick.left': True,
 'ytick.right': False,
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True}