Importing necessary libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from scipy import optimize
import pandas as pd
import glob
import math
import os

import multiprocessing
from multiprocessing import Pool

Given a dataset $X, Y$ each having size $n$ and a fitting function $f$, we define our fit error measure as -

$$J = \sum_{i=1}^{n} (y_i - f(x_i))^2$$

In [2]:
def fit_error(y, f):
    return np.sum((y-f)**2)

Fitting function 1, Gaussian - 

$$ p(z)= \frac{A}{\sqrt{2 \pi \sigma^2}} e^{-\frac{(z-\mu)^2}{2 \sigma^2}}$$

Which we want to obey the same normalization as the numerical PDF -

$$ \int_{-\infty}^{\infty} p(z) dz = \sum_{i = 1}^{n} w_i$$

In [3]:
def fit_func_1(Z, A, sigma):
    
    P = (A/np.sqrt(2*np.pi*sigma**2))*np.exp(-Z**2/(2*sigma**2))
    
    return P

Fitting function 2, normalized, exponential deca7 - 

$$ p(z)=  B e^{- \alpha z} $$

In [4]:
def fit_func_2(Z, B, alpha):
    
    P = B*np.exp(-alpha*Z)
    
    return P

Constructing fitted PDF

In [5]:
def fit_PDF(centers, heights):
    
    # Information about the peak in the numerical PDF
    peak_height = np.max(heights)
    peak_ind = np.where(heights == peak_height)[0][0]
    
    # mu is where the numerical PDF peaks
    mu = centers[peak_ind]
 
    # Recentering so that peak is at z = 0
    del_centers = centers - mu
    gauss_centers = del_centers[0:peak_ind+1]
    gauss_heights = heights[0:peak_ind+1]
                
    fit = np.zeros(len(centers))
    fit_err = np.inf
    fit_params = np.zeros(5)
    
    for i in range(0, len(fit_params)):
        fit_params[i] = float('nan')
    
    try:
        
        # Estimating sigma using FWHM
        sigma = 0

        for i in range(0, len(gauss_centers)):
            if(heights[i] >= peak_height/2):
                sigma = (-gauss_centers[i])/np.sqrt(2*np.log(2))
                break

        # Estimating A accordingly, by using the peak height
        A = np.sqrt(2*np.pi*sigma**2)*peak_height

        # First fit a Gaussian

        guess_gauss_params = np.array([A, sigma])
        
        gauss_bounds = ([0, 0], [np.max(heights), np.max(np.abs(del_centers))])

        fit_gauss_params, fit_gauss_covar = optimize.curve_fit(fit_func_1, gauss_centers, gauss_heights,
                                                               p0 = guess_gauss_params, 
                                                               bounds = gauss_bounds, method = 'trf')

                
        fit_params[0] = fit_gauss_params[0]
        fit_params[1] = mu
        fit_params[2] = fit_gauss_params[1]

        fit = fit_func_1(del_centers, *fit_gauss_params)
        fit_err = fit_error(heights, fit)

        # See if an exponential decay tail exists and is a better fit

        for i in range(peak_ind, len(centers)):

            try:

                v_T = centers[i]

                exp_centers = del_centers[i:] - del_centers[i]

                exp_heights = heights[i:]
                init_height = exp_heights[0]

                # Estimating alpha using half-life decay
                alpha = 0

                for j in range(0, len(exp_centers)):
                    if(exp_heights[j] <= init_height/2):
                        alpha = np.log(2)/(exp_centers[j])
                        break

                B = (fit_params[0]*np.exp(alpha*v_T))/np.sqrt(2*np.pi*fit_params[2]**2)*np.exp(-(v_T-mu)**2/(2*fit_params[2]**2))

                guess_exp_params = np.array([B, alpha])
                
                exp_bounds = ([0, 0], [np.max(heights), np.inf])

                fit_exp_params, fit_exp_covar = optimize.curve_fit(fit_func_2, exp_centers, exp_heights,
                                                                   p0 = guess_exp_params, 
                                                                   bounds = exp_bounds, method = 'trf')

                gauss_fit = fit_func_1(del_centers[:i], *fit_gauss_params)
                exp_fit = fit_func_2(exp_centers, *fit_exp_params)
                curr_fit = np.concatenate([gauss_fit, exp_fit])
                curr_err = fit_error(heights, curr_fit)

                if(curr_err < fit_err):
                    fit = curr_fit
                    fit_err = curr_err
                    fit_params[3] = fit_exp_params[1]
                    fit_params[4] = v_T

            except:
                continue
                
    except:
        
        pass
    
    return fit, fit_params

In [6]:
def fit_snap(info):
    
    snap_dict = info[0]
    metals = info[1]
    spath_metals = info[2]
    q = info[3]

    snap = snap_dict['snap']
    redshift = snap_dict['redshift']
        
    try:

        A = {}
        mu = {}
        sigma = {}
        alpha = {}
        z_T = {}

        for m in metals:

            num_df = pd.read_csv(spath_metals[m] +'data/num/' + str(snap) + '-num_' + m + '_data.csv')

            centers = np.array(num_df['abundance'].to_list())

            heights = np.array(num_df['num_val'].to_list())

            mass_norm = np.max(heights)

            heights /= mass_norm

            # Compute the fitted PDF

            fit, fit_params = fit_PDF(centers, heights)

            # Rescaling range to achieve desired normalization

            heights *= mass_norm
            fit *= mass_norm
            fit_params[0] *= mass_norm

            A[m] = fit_params[0]
            mu[m] = fit_params[1]
            sigma[m] = fit_params[2]
            alpha[m] = fit_params[3]
            z_T[m] = fit_params[4]

            datafile =  str(snap) + '-fit_' + m + '_data' + '.csv'

            fit_dict = {'abundance': centers, 'fit_val': fit}
            fit_df = pd.DataFrame(fit_dict)
            fit_df.to_csv(spath_metals[m] + 'data/fit/' + datafile, index = False)

        q.put({'snap_dict': snap_dict, 'A': A, 'mu': mu, 'sigma': sigma, 'alpha': alpha, 'z_T': z_T})
        
    except:
        
        for m in metals:
            
            datafile =  'fit_{}_params.csv'.format(m)
            fit_df = pd.read_csv(spath_metals[m] + 'data/fit/' + datafile)
            fit_df = fit_df[fit_df.snap != snap]
            fit_df.to_csv(spath_metals[m] + 'data/fit/' + datafile, index = False)

Importing dataset

In [7]:
# Specifying simulation directory and the directory to save results in
wdir = str(input('Enter simulation directory path: '))

# Specifying a snapshot for temporal analysis
sdir = wdir + 'temporal_analysis/'

Enter simulation directory path: /Users/thepoetoftwilight/Documents/CASSI2020/CASSI2020-Results/m10q_res250/


In [8]:
# Get all rendered indices

rendered_df = pd.read_csv(sdir + 'rendered_snap_stats.csv')
snap_dicts = rendered_df.to_dict(orient = 'records')

# Get rendered metals

metal_df = pd.read_csv((sdir + 'metal_list.csv'))
metals = metal_df['metals'].to_list()

# Create a list of paths for all metals
spath_metals = {}

for m in metals:
    spath_metals[m] = sdir + m + '/'

In [9]:
if __name__ == "__main__":

    n_proc = multiprocessing.cpu_count()
    
    print("About to start:")
    
    manager = multiprocessing.Manager()
    q = manager.Queue()

    with Pool(processes = n_proc) as pool:

        pool.map(fit_snap, [(snap_dict, metals, spath_metals, q) for snap_dict in snap_dicts])
    
    info_dicts = []
    
    while not q.empty():
        info_dicts.append(q.get())
      
    rendered_dicts = []
    A_dicts = []
    mu_dicts = []
    sigma_dicts = []
    alpha_dicts = []
    z_T_dicts = []
    
    for info_dict in info_dicts:
        
        rendered_dicts.append(info_dict['snap_dict'])
        A_dicts.append(info_dict['A'])
        mu_dicts.append(info_dict['mu'])
        sigma_dicts.append(info_dict['sigma'])
        alpha_dicts.append(info_dict['alpha'])
        z_T_dicts.append(info_dict['z_T'])
        
    num_rendered_snaps = len(rendered_dicts)
        
    rendered_indices = []
    redshifts = []
    times = []

    halo_masses = []

    SFRs_10 = []
    SFRs_100 = []
    SFRs_1000 = []

    velocities_mass = []
    velocities_vol = []
    velocities_rms_mass = []
    velocities_rms_vol = []
    velocities_spread = []

    sounds_mass = []
    sounds_vol = []
    sounds_rms_mass = []
    sounds_rms_vol = []
    sounds_spread = []

    thermals_mass = []
    thermals_vol = []
    thermals_rms_mass = []
    thermals_rms_vol = []
    thermals_spread = []

    mach_numbers_mass = []
    mach_numbers_vol = []
    mach_numbers_rms_mass = []
    mach_numbers_rms_vol = []
    mach_numbers_spread = []

    As = {}
    mus = {}
    sigmas = {}
    alphas = {}
    z_Ts = {}   
    
    for m in metals:
        As[m] = []
        mus[m] = []
        sigmas[m] = []
        alphas[m] = []
        z_Ts[m] = []

    for i in range(0, num_rendered_snaps):
        
        rendered_dict = rendered_dicts[i]
        A_dict = A_dicts[i]
        mu_dict = mu_dicts[i]
        sigma_dict = sigma_dicts[i]
        alpha_dict = alpha_dicts[i]
        z_T_dict = alpha_dicts[i]
        
        rendered_indices.append(rendered_dict['snap'])
        
        redshifts.append(rendered_dict['redshift'])
        times.append(rendered_dict['time'])
        
        halo_masses.append(rendered_dict['halo_mass'])
        
        SFRs_10.append(rendered_dict['SFR@10Myr'])
        SFRs_100.append(rendered_dict['SFR@100Myr'])
        SFRs_1000.append(rendered_dict['SFR@1000Myr'])

        velocities_mass.append(rendered_dict['velocity_mass'])
        velocities_vol.append(rendered_dict['velocity_vol'])
        velocities_rms_mass.append(rendered_dict['velocity_rms_mass'])
        velocities_rms_vol.append(rendered_dict['velocity_rms_vol'])
        velocities_spread.append(rendered_dict['velocity_spread'])

        sounds_mass.append(rendered_dict['sound_mass'])
        sounds_vol.append(rendered_dict['sound_vol'])
        sounds_rms_mass.append(rendered_dict['sound_rms_mass'])
        sounds_rms_vol.append(rendered_dict['sound_rms_vol'])
        sounds_spread.append(rendered_dict['sound_spread'])

        thermals_mass.append(rendered_dict['thermal_mass'])
        thermals_vol.append(rendered_dict['thermal_vol'])
        thermals_rms_mass.append(rendered_dict['thermal_rms_mass'])
        thermals_rms_vol.append(rendered_dict['thermal_rms_vol'])
        thermals_spread.append(rendered_dict['thermal_spread'])

        mach_numbers_mass.append(rendered_dict['mach_number_mass'])
        mach_numbers_vol.append(rendered_dict['mach_number_vol'])
        mach_numbers_rms_mass.append(rendered_dict['mach_number_rms_mass'])
        mach_numbers_rms_vol.append(rendered_dict['mach_number_rms_vol'])
        mach_numbers_spread.append(rendered_dict['mach_number_spread'])
        
        for m in metals:
            
            As[m].append(A_dict[m])
            mus[m].append(mu_dict[m])
            sigmas[m].append(sigma_dict[m])
            alphas[m].append(alpha_dict[m])
            z_Ts[m].append(z_T_dict[m])
            
    rendered_stats_dict = {'snap': rendered_indices,  'redshift': redshifts, 'time': times, 
                           'halo_mass': halo_masses,
                           'SFR@10Myr': SFRs_10, 'SFR@100Myr': SFRs_100, 'SFR@1000Myr': SFRs_1000,
                           'velocity_mass': velocities_mass, 'velocity_vol': velocities_vol, 
                           'velocity_rms_mass': velocities_rms_mass, 'velocity_rms_vol': velocities_rms_vol, 
                           'velocity_spread': velocities_spread,
                           'sound_mass': sounds_mass, 'sound_vol': sounds_vol,
                           'sound_rms_mass': sounds_rms_mass, 'sound_rms_vol': sounds_rms_vol,
                           'sound_spread': sounds_spread,
                           'thermal_mass': thermals_mass, 'thermal_vol': thermals_vol,
                           'thermal_rms_mass': thermals_rms_mass, 'thermal_rms_vol': thermals_rms_vol,
                           'thermal_spread': thermals_spread,
                           'mach_number_mass': mach_numbers_mass, 'mach_number_vol': mach_numbers_vol,
                           'mach_number_rms_mass': mach_numbers_rms_mass, 
                           'mach_number_rms_vol': mach_numbers_rms_vol, 
                           'mach_number_spread': mach_numbers_spread}

    rendered_stats_df = pd.DataFrame(rendered_stats_dict)
    rendered_stats_df = rendered_stats_df.sort_values(by = 'snap')
    rendered_stats_df = rendered_stats_df.reset_index(drop = True)
    rendered_stats_df.to_csv(sdir + 'rendered_snap_stats.csv', index = False)

    print(rendered_stats_df)
    print()

    for m in metals:
        param_df_init = pd.read_csv(spath_metals[m] + 'data/fit/fit_{}_params.csv'.format(m))
        param_dict_append = {'A': As[m], 'mu': mus[m], 'sigma': sigmas[m], 
                      'alpha': alphas[m], 'z_T': z_Ts[m]}
        param_df_append = pd.DataFrame(param_dict_append)
        param_df = pd.concat([param_df_init, param_df_append], axis = 1)
        param_df.to_csv(spath_metals[m] + 'data/fit/fit_{}_params.csv'.format(m), index = False)
        
    print("All done!")

About to start:


  upper_dist = ub - x
  (lower_dist <= np.minimum(upper_dist, lower_threshold)))
  upper_dist = ub - x
  (upper_dist <= np.minimum(lower_dist, upper_threshold)))
  (lower_dist <= np.minimum(upper_dist, lower_threshold)))
  (upper_dist <= np.minimum(lower_dist, upper_threshold)))
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  upper_dist = ub - x
  (lower_dist <= np.minimum(upper_dist, lower_threshold)))
  (upper_dist <= np.minimum(lower_dist, upper_threshold)))
  This is separate from the ipykernel package so we can avoid doing imports until
  upper_dist = ub - x
  (lower_dist <= np.minimum(upper_dist, lower_threshold)))
  (upper_dist <= np.minimum(lower_dist, upper_threshold)))
  This is separate from the ipykernel package so we can avoid doing imports until
  return np.all((x >= lb) & (x <= ub))
  return np.all((x >= lb) & (x <= ub))
  return np.all((x >= lb) & (x <= 

     snap  redshift       time     halo_mass  SFR@10Myr  SFR@100Myr  \
0     110  3.266667   2.008803  2.786898e+09   0.001611    0.001169   
1     111  3.238411   2.028696  2.959412e+09   0.008081    0.002210   
2     112  3.210526   2.048648  3.048775e+09   0.015893    0.004517   
3     113  3.183007   2.068659  3.119460e+09   0.007753    0.006225   
4     114  3.155844   2.088728  3.178012e+09   0.006455    0.007100   
..    ...       ...        ...           ...        ...         ...   
447   596  0.000637  13.789874  8.534205e+09   0.000000    0.000000   
448   597  0.000478  13.792092  8.534372e+09   0.000000    0.000000   
449   598  0.000319  13.794310  8.534627e+09   0.000000    0.000000   
450   599  0.000159  13.796529  8.534806e+09   0.000000    0.000000   
451   600  0.000000  13.798747  8.534961e+09   0.000000    0.000000   

     SFR@1000Myr  velocity_mass  velocity_vol  velocity_rms_mass  ...  \
0       0.001025      35.478512     55.402683          40.630207  ...   
1