## Star formation history toolkit

In [2]:
import pandas as pd
# this program uses holoviews, to install: conda install -c pyviz pyviz
import holoviews as hv
hv.extension('bokeh')
import hvplot.pandas
import datashader as ds
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize
from holoviews.operation import decimate
import numpy as np
import os
import subprocess

### Setup variables

In [3]:
# Location of the ZVAR code
zvarDirectory = '/Users/lrizzi/Work/SIMUL_LINUX/zvar_linux'
zvarProgram = 'zvar02_evh.exe'
zvarTemplates = '/Users/lrizzi/Python_Projects/pyZVAR/templates'

### Chemical evolution law

In [4]:
# Definition of the chemical evolution law
# note that age=0 means "now" (The minimum age is 0.023)
cehAges = [16, 0.023]
cehFeH  = [-1.67, -1.57]

cehLaw = pd.DataFrame(columns=['age', 'metallicity'])
cehLaw.age = cehAges
cehLaw.metallicity = cehFeH
cehLaw = cehLaw.sort_values(by='age')

cehLaw

Unnamed: 0,age,metallicity
1,0.023,-1.57
0,16.0,-1.67


### Age bins definition

In [5]:
# Definition of the age bins. The numbers correspond to the lower and upper limits of the bins, such that:
# ageBins = [1,2,3]
# creates 2 bins, one from 1 to 2 Gyr, and one from 2 to 3 Gyr

ageBins = [0,1,2,3,4,6,8,10,12,15]

In [6]:
# plot age and metallity for diagnostics

plot = hv.Curve(cehLaw).options(width=600)
for boundary in ageBins:
    plot = plot * hv.VLine(boundary)
plot.redim.unit(age='Gyr', metallicity='[Fe/H]')


In [7]:
# Interpolate the metallicity law at the specified age bins
new_val = np.interp(ageBins, cehLaw.age.values.astype(float), cehLaw.metallicity.values.astype(float))
# convert fe/h to Z
new_val_Z = 10**(new_val-1.7212)
# create data frame
cehLawInterpolated = pd.DataFrame(columns=['age', 'metallicity', 'Z'])
# and populate it
cehLawInterpolated.age = ageBins
cehLawInterpolated.metallicity = new_val
cehLawInterpolated.Z = new_val_Z
# print it for diagnostic
cehLawInterpolated

Unnamed: 0,age,metallicity,Z
0,0,-1.57,0.000511
1,1,-1.576115,0.000504
2,2,-1.582374,0.000497
3,3,-1.588633,0.00049
4,4,-1.594892,0.000483
5,6,-1.60741,0.000469
6,8,-1.619928,0.000456
7,10,-1.632446,0.000443
8,12,-1.644964,0.00043
9,15,-1.663741,0.000412


### Generation of stellar populations

In [8]:
def generate_par_file(age_initial, age_final, cehLaw, new_template):
    """
    Using simul_template.par, produces templates for each of the age bins, 
    to be used as input to zvar
    """
    template = os.path.join(zvarTemplates, 'simul_template.par')
    if age_initial in cehLaw.age.values and age_final in cehLaw.age.values:
        Z_initial = cehLaw[cehLaw['age']==age_initial]['Z'].values[0]
        Z_final = cehLaw[cehLaw['age']==age_final]['Z'].values[0]
    else:
        print("ERROR:The specified ages are not in the original age bins")
        return
    
    with open(template) as f:
        newText=f.read()
        newText=newText.replace('age2', f"{age_final}e9")
        newText=newText.replace('age1', f"{age_initial}e9")
        newText=newText.replace('met2', f"{Z_final}")
        newText=newText.replace('met1', f"{Z_initial}")
 
    with open(new_template, "w") as f:
        f.write(newText)
        
def extract_mass(output_file):
    """
    This is used to extract the content of the line # mtot= 
    from the output file created by ZVAR
    """
    with open(output_file) as f:
        data = f.readlines()
        tail = data[-9:]
        for line in tail:
            for field in line.split():
                subfield = field.split('=')
                if subfield[0]=='mtot':
                    mtot = subfield[1]
    return mtot

def run_simulation(template_file_name):
    # Extract the base name from the template file name
    base_name = template_file_name.replace('.par','')
    
    # using the base name, build the name of the output file
    temporary_population_file = f"{base_name}.output"
    
    # find the simul.sh file to use to run the simulation
    simulation_driver = os.path.join(zvarTemplates, 'simul.sh')
    
    # actually run the simulation
    subprocess.call(['sh', simulation_driver,template_file_name, temporary_population_file])
    
    # read the results in a pandas frame
    population = pd.read_csv(temporary_population_file, header=0, delim_whitespace=True, comment="#").dropna()
    
    # extract mass information
    mtot = extract_mass(temporary_population_file)
    return population, mtot

In [9]:
### MAIN ROUTINE TO GENERATE STELLAR POPULATIONS


# initialize containers
populations_dataframes = {}
star_formation_rates = {}
# loop through the ages....
for index in range(len(cehLawInterpolated.age.values)-1):
    age_initial = cehLawInterpolated.age.values[index]
    age_final = cehLawInterpolated.age.values[index+1]
    # generate the template relative to this population
    print(f"Computing stellar populations between {age_initial} and {age_final}")
    new_template = f"simulate_{age_initial}_{age_final}.par" 
    generate_par_file(age_initial, age_final, cehLawInterpolated, new_template)
    
    # run the simulation and extract the mass
    population, mtot = run_simulation(new_template)
    
    # populate the containers
    index = f"{age_initial}-{age_final}"
    populations_dataframes[index]=population
    star_formation_rates[index]=float(mtot)/(float(age_final-age_initial)*1e9)
        

Computing stellar populations between 0 and 1
Computing stellar populations between 1 and 2
Computing stellar populations between 2 and 3
Computing stellar populations between 3 and 4
Computing stellar populations between 4 and 6
Computing stellar populations between 6 and 8
Computing stellar populations between 8 and 10
Computing stellar populations between 10 and 12
Computing stellar populations between 12 and 15


In [10]:
# diagnostics if something looks odd
#print(populations_dataframes.keys())
#print(star_formation_rates)


In [11]:
# Plot of computed stellar populations

cmd = hv.Scatter([0,1]).options(height=600, width=600)
for key in populations_dataframes.keys():
    cmd = cmd * decimate(hv.Scatter(populations_dataframes[key],'CVI','MI').options(invert_yaxis=True))

cmd

## Shaker

In [12]:
ebv = 0.02
distance_modulus = 21.68

In [13]:
def doBVI(key):
    print(f"Preparing shell file for population {key}")
    
def setup1(key):
    av = 3.1*ebv
    ai = 0.587 * av
    aj = 0.282 * av
    ah = 0.175 * av
    ak = 0.112 * av
    populations_dataframes[key]['i_no_error'] = populations_dataframes[key]['MI'] + ai + distance_modulus
    populations_dataframes[key]['v_no_error'] = populations_dataframes[key]['MV'] + av + distance_modulus
    

In [14]:
for key in populations_dataframes.keys():
    setup1(key)
    #doBVI(key)

In [15]:
populations_dataframes['0-1'][:5]

Unnamed: 0,L/Lo,LogTe,M/Mo,G,INDEV,WR,MU,MB,MV,MR,...,CVI,t,z,D?,Mbol,MJ,MH,MK,i_no_error,v_no_error
1,-0.219,3.786,0.821,4.662,1.0,1.0,5.709,5.929,5.446,5.122,...,0.653,8.0252,0.000505,0.0,5.317,4.433,4.136,4.096,26.508394,27.188
2,-0.23,3.784,0.808,4.657,1.0,1.0,5.748,5.963,5.474,5.147,...,0.66,8.8162,0.000509,0.0,5.344,4.449,4.149,4.109,26.530394,27.216
3,0.592,3.914,1.217,4.532,1.0,1.0,3.496,3.527,3.36,3.273,...,0.183,8.1688,0.000505,0.0,3.291,3.085,3.02,3.01,24.894394,25.102
4,1.614,4.096,1.98,4.451,1.0,1.0,1.128,1.457,1.548,1.588,...,-0.099,8.6153,0.000507,0.0,0.735,1.769,1.809,1.839,23.363394,23.29
5,0.551,3.903,1.178,4.515,1.0,1.0,3.592,3.645,3.46,3.349,...,0.223,8.6036,0.000507,0.0,3.392,3.12,3.038,3.029,24.953394,25.202


In [16]:
# Parse the input error files create the fit functions for errors and completeness
errorsI = pd.read_csv('erroriI.dat', delim_whitespace=True, header=None, names=['mag','error'])
errorsV = pd.read_csv('erroriV.dat', delim_whitespace=True, header=None, names=['mag','error'])
completI = pd.read_csv('completI.dat', delim_whitespace=True, header=None, names=['mag','complet'])
completV = pd.read_csv('completV.dat', delim_whitespace=True, header=None, names=['mag','complet'])

# for ease of use, extract columns into arrays
errorsI_magnitudes = errorsI.mag.values
errorsV_magnitudes = errorsV.mag.values
errorsI_errors = errorsI.error.values
errorsV_errors = errorsV.error.values
completI_magnitudes = completI.mag.values
completV_magnitudes = completV.mag.values
completI_complet = completI.complet.values
completV_complet = completV.complet.values

def pick_random_error(sigma):
    # given the sigma of the distribution of errors, select a randon error to assign to a star
    return np.random.normal(0,sigma)

def flag_for_compleness(completeness):
    # given a completeness level (between 0 and 1), pick a random number between 0 and 1.
    # if the number is less than the completeness level, keep the star, otherwise reject
    
    result = np.random.random_sample()
    if result < completeness:
        return 1
    else:
        return 0
    

for key in populations_dataframes.keys():
    
    print(f'Adding errors and completeness columns to population {key}')
    
    # extract the current population being worked on, to increase the readability of the code
    population = populations_dataframes[key]
    
    # each of the following uses the "apply" method of pandas data frames, which 
    # applies a specific function to each element of a column, and return a new 
    # column
    
    # randomize I errors
    population['sigmaIerr'] = np.interp(population['i_no_error'].values.astype(float),
                                          errorsI_magnitudes, errorsI_errors)
    population['Ierr'] = population['sigmaIerr'].apply(pick_random_error)
    
    # randomize V errors
    population['sigmaVerr'] = np.interp(population['v_no_error'].values.astype(float),
                                          errorsV_magnitudes, errorsV_errors)
    population['Verr'] = population['sigmaVerr'].apply(pick_random_error)
    
    # add I completeness
    population['completI'] = np.interp(population['i_no_error'].values.astype(float),
                                      completI_magnitudes, completI_complet)
    population['I_retrieved'] = population['completI'].apply(flag_for_compleness)
    
    # add V completeness
    population['completV'] = np.interp(population['v_no_error'].values.astype(float),
                                      completI_magnitudes, completI_complet)
    population['V_retrieved'] = population['completV'].apply(flag_for_compleness)
    
    # add errors to input magnitudes: once a random error column has been computed, add the error
    # to the error-free magnitude and generate a new column
    
    population['i'] = population['i_no_error'] + population['Ierr']
    population['v'] = population['v_no_error'] + population['Verr']
   
    # generate new vi column
    population['vi'] = population['v'] - population['i']


Adding errors and completeness columns to population 0-1
Adding errors and completeness columns to population 1-2
Adding errors and completeness columns to population 2-3
Adding errors and completeness columns to population 3-4
Adding errors and completeness columns to population 4-6
Adding errors and completeness columns to population 6-8
Adding errors and completeness columns to population 8-10
Adding errors and completeness columns to population 10-12
Adding errors and completeness columns to population 12-15


In [17]:
# PLOT THE "OBSERVED" simulated diagram

cmd = ""
cmd = hv.Scatter([(-0.5,27),(1.5,18)]).options(height=600, width=600)
for key in populations_dataframes.keys():
    # drop columns where the retrived flag is set to 0
    population = populations_dataframes[key][(populations_dataframes[key]['I_retrieved'] < 1) |
                                                   (populations_dataframes[key]['V_retrieved'] < 1)]
    forplot = population[population['i']>10]
    cmd = cmd * decimate(hv.Scatter(forplot,'vi','i').options(invert_yaxis=True))

cmd

## Diagnostic plots

### Compare errors with expected distribution

In [18]:
key = '0-1'
plt1 = hv.Scatter(populations_dataframes[key],'i','Ierr')

In [19]:
bins = np.arange(14,27,0.5)
myerrors = []
mycompleteness = []
for k in range(len(bins)-1):
    in_this_bin = populations_dataframes[key][(populations_dataframes[key]['i_no_error']>bins[k]) &
                                                (populations_dataframes[key]['i_no_error']<bins[k+1])]
    
    errors = in_this_bin['Ierr']
    rms = np.std(errors.values)
    myerrors.append(((bins[k]+bins[k+1])/2,rms))
    retrieved = in_this_bin[in_this_bin['I_retrieved']>0]
    completeness = in_this_bin['completI']
    #print(bins[k],len(in_this_bin), len(retrieved), np.mean(completeness))
    mycompleteness.append(((bins[k]+bins[k+1])/2,len(retrieved)/len(in_this_bin)))
    #mycompleteness.append(((bins[k]+bins[k+1])/2,np.mean(completeness)))
    
plt2 = hv.Curve(myerrors,'I','Ierr_rms')
plt2 = plt2 * hv.Scatter(errorsI,'mag','error')
plt3 = hv.Curve(mycompleteness, 'I', 'Completeness') * hv.Scatter(completI, 'mag', 'complet')
plt1 + plt2 + plt3

### Convert Pandas structures into a simplified format

This is the same as producing the filed called opt_pop.?.dat

In [20]:
populations = {}
for key in populations_dataframes.keys():
    populations[key] = populations_dataframes[key][['i','vi']][
        (populations_dataframes[key]['I_retrieved']>0) &
        (populations_dataframes[key]['V_retrieved']>0)]
    output_name = f'opt_pop.{key}.dat'
    populations[key].to_csv(output_name, sep=" ", index=False, header=None)

In [21]:
photometry = pd.read_csv('newHST.cal', delim_whitespace=True, comment='#', names = ['id','x','y','v','ev','i','ei','chi','sharp'])

In [22]:
photometry.to_csv('opt_cmd_dati.dat', columns=['v','i'], sep=" ", index=False, header=None)

## Calculate the cmd_mixer pandas structure

The old cmd mixer had the format:
agemin agemax  ageavg zavg zsig sfr name frac

In [23]:
populations_dataframes['0-1'].columns

Index(['L/Lo', 'LogTe', 'M/Mo', 'G', 'INDEV', 'WR', 'MU', 'MB', 'MV', 'MR',
       'MI', 'CUB', 'CBV', 'CVR', 'CVI', 't', 'z', 'D?', 'Mbol', 'MJ', 'MH',
       'MK', 'i_no_error', 'v_no_error', 'sigmaIerr', 'Ierr', 'sigmaVerr',
       'Verr', 'completI', 'I_retrieved', 'completV', 'V_retrieved', 'i', 'v',
       'vi'],
      dtype='object')

In [24]:
cmd_mixer = pd.DataFrame(columns=['agemin','agemax','ageavg','zavg','zsig','sfr','key','fraction'])
for key in populations_dataframes.keys():
    agemin = float(('%1.3e' % 10**(populations_dataframes[key]['t'].min())))
    agemax = float(('%1.3e' % 10**(populations_dataframes[key]['t'].max())))
    ageavg = float(('%1.3e' % 10**(populations_dataframes[key]['t'].mean())))
    zavg = float(('%9.5f' % populations_dataframes[key]['z'].mean()))
    zsig = float(('%9.6f' % populations_dataframes[key]['z'].std()))
    sfr = float((('%10.3e' % star_formation_rates[key])))
    fraction = 1.0
    cmd_mixer = cmd_mixer.append({'agemin': agemin, 'agemax': agemax, 'ageavg': ageavg, 
                      'zavg': zavg, 'zsig': zsig,  'sfr': sfr,  'key': key,  'fraction': fraction},
                     ignore_index=True)
    

In [25]:
cmd_mixer

Unnamed: 0,agemin,agemax,ageavg,zavg,zsig,sfr,key,fraction
0,6633.0,1000000000.0,338400000.0,0.00051,2e-06,0.000205,0-1,1.0
1,1000000000.0,2000000000.0,1456000000.0,0.0005,2e-06,0.000252,1-2,1.0
2,2000000000.0,3000000000.0,2473000000.0,0.00049,2e-06,0.00029,2-3,1.0
3,3000000000.0,4000000000.0,3480000000.0,0.00049,2e-06,0.000323,3-4,1.0
4,4000000000.0,5999000000.0,4941000000.0,0.00048,4e-06,0.000183,4-6,1.0
5,6001000000.0,8000000000.0,6954000000.0,0.00046,4e-06,0.000212,6-8,1.0
6,8000000000.0,10000000000.0,8959000000.0,0.00045,4e-06,0.000239,8-10,1.0
7,10000000000.0,12000000000.0,10970000000.0,0.00044,4e-06,0.000267,10-12,1.0
8,12000000000.0,15000000000.0,13430000000.0,0.00042,5e-06,0.000202,12-15,1.0


In [26]:
tmp = []
def add_opti(key):
    return f'opt_pop.{key}.dat'    
tmp = cmd_mixer.copy()
tmp['key'] = tmp['key'].apply(add_opti)
tmp.to_csv('cmd_mixer.dat', header=None, index=False, sep=" ")

In [30]:
# run the minimizer
subprocess.call(['sh','zvar_opti'])

0