<h2>Multiple fitting with Differential Evolution (DE) and uncertainty estimation via Metropolis-Hasting</h2>

In [47]:
import numpy as np
import random
from collections import namedtuple
from typing import NamedTuple

In [48]:
R=1.0e7
S=1.5
P=0.5
D=0.0

boundR = (1e6, 1e8)
boundS = (0.5, 2.0)
boundP = (0.3, 1.5)
boundD = (0.0, 0.1)

bounds = (boundR, boundS, boundP, boundD)
guess = (R, S, P, D)

We have the following experimental data that has been categorized by each correspondent series

In [49]:
principal = ((3, 5.86175E-7, 1e-8),
             (4, 3.3003E-7, 1e-8))
sharp = ((5, 6.1435E-7, 1e-8),
         (5, 6.1381E-7, 1e-8),
         (6, 5.1326E-7, 1e-8),   
         (6, 5.1284E-7, 1e-8),   
         (7, 4.7319E-7, 1e-8),   
         (7, 4.7294E-7, 1e-8),   
         (8, 4.5707E-7, 1e-8))
diffuse = ((4, 5.6722E-7, 1e-8),
          (4, 5.6661E-7, 1e-8),
          (5, 4.9618E-7, 1e-8),
          (5, 4.9576E-7, 1e-8),    
          (6, 4.6486E-7, 1e-8),
          (6, 4.6450E-7, 1e-8),
          (7, 4.4856E-7, 1e-8))
balmer = ((3, 6.5930E-7, 1e-9),
          (4, 4.8580E-7, 1e-9),
          (5, 4.3415e-7, 1e-9),
          (6, 4.1056e-7, 1e-9),
          (7, 3.9760e-7, 1e-9))

In [50]:
principal = np.array(principal)
sharp = np.array(sharp)
diffuse = np.array(diffuse)
balmer = np.array(balmer)

In [51]:
Np = len(principal)
Ns = len(sharp) 
Nd = len(diffuse)
Nb = len(balmer)

In [52]:
series = namedtuple('series', ('name', 
                               'level', 
                               'wavelength', 
                               'uncertainty'))
principal_data = series('principal', 
                       principal[:, 0], 
                       principal[:, 1], 
                       principal[:, 2])
sharp_data = series('sharp', 
                   sharp[:, 0], 
                   sharp[:, 1], 
                   sharp[:, 2])
diffuse_data = series('diffuse', 
                     diffuse[:, 0], 
                     diffuse[:, 1], 
                     diffuse[:, 2])
balmer_data = series('balmer', 
                    balmer[:, 0], 
                    balmer[:, 1], 
                    balmer[:, 2])

In [53]:
pop = np.random.rand(4, 10)
pop = np.array([bounds[ii][0] + pop[ii] * (bounds[ii][1] - bounds[ii][0]) for ii in range(len(pop))])
if guess:
    pop[:, 0] = np.array(guess)
pop = pop.T

In [54]:
pop

array([[1.00000000e+07, 1.50000000e+00, 5.00000000e-01, 0.00000000e+00],
       [6.85305555e+07, 1.59506440e+00, 1.20311518e+00, 9.67177849e-02],
       [8.14217099e+07, 1.86617464e+00, 1.37312497e+00, 4.51586849e-02],
       [6.96573414e+07, 1.61143587e+00, 1.15020368e+00, 8.95367685e-02],
       [8.16835629e+07, 1.26569832e+00, 7.19223290e-01, 7.17533459e-02],
       [2.49449585e+07, 9.03665658e-01, 8.35522567e-01, 9.51728757e-02],
       [4.23813003e+07, 1.12658956e+00, 3.51013983e-01, 5.05323253e-02],
       [8.60475445e+07, 1.23828466e+00, 1.08666516e+00, 7.30517183e-02],
       [9.10351805e+07, 1.37436019e+00, 1.49173725e+00, 2.05752698e-02],
       [8.09422978e+07, 1.85518994e+00, 1.30324881e+00, 9.74749901e-02]])

In [55]:
def func_obj(r, cor1, cor2, n, yraw, stdraw):
    y = 1/ (r * ((3 - cor1) ** (-2) - (n - cor2) ** (-2))) 
    err = np.square(yraw - y) / np.square(stdraw) #chi-square goodness of fit ((MSWD))
    return err

In [56]:
data_collection = (principal_data, sharp_data, diffuse_data, balmer_data)
sizes = (Np, Ns, Nd, Nb)

In [57]:
def least_error_idx(pop: np.ndarray, data_collection: list[NamedTuple], sizes : tuple[int]) -> int:
    '''
    Identify the element from the population with the least error.
    Returns the index of the element with the least error.
    '''
    error = np.zeros(len(pop))
    for idx, ind in enumerate(pop):
        error[idx] = compute_error(ind, data_collection, sizes)
    return np.argmin(error), min(error) 

In [58]:
def compute_error(ind, data_collection, sizes):
    error = 0
    for data in data_collection:
        if data.name ==  'principal':
            for record in range(sizes[0]):
                error += func_obj(ind[0],
                                  ind[1],
                                  ind[2],
                                  data.level[record],
                                  data.wavelength[record],
                                  data.uncertainty[record])
        elif data.name ==  'sharp':
            for record in range(sizes[1]):
                error += func_obj(ind[0],
                                  ind[2], 
                                  ind[1], 
                                  data.level[record], 
                                  data.wavelength[record], 
                                  data.uncertainty[record])
        elif data.name ==  'diffuse':
            for record in range(sizes[2]):
                error += func_obj(ind[0], 
                                  ind[2], 
                                  ind[3], 
                                  data.level[record], 
                                  data.wavelength[record], 
                                  data.uncertainty[record])
        elif data.name ==  'balmer':
            for record in range(sizes[3]):
                error += func_obj(ind[0],
                                  1, 
                                  0, 
                                  data.level[record], 
                                  data.wavelength[record], 
                                  data.uncertainty[record])
    return np.sqrt(error)

In [59]:
def diff_evolution(pop, niter, kmut, kcross, data_collection, sizes):
    idx_list = tuple(range(len(pop)))
    lsterr_idx, lst_err = least_error_idx(pop, data_collection, sizes)
    bestfit = pop[lsterr_idx]
    for iter in tqdm(range(niter)):  
        for i, ind in enumerate(pop):
            rng_idx = np.random.choice(idx_list, 2, replace = False)
            trial = bestfit + kmut * (pop[rng_idx[0]] - pop[rng_idx[1]])
            cross = np.concatenate((np.random.rand(3) <= kcross, np.array([True])))
            trial = np.where(cross, trial, ind)
            for j, param in enumerate(trial):
                if not (param >= bounds[j][0] and param <= bounds[j][1]):
                     trial[j] = pop[:, j].min() + np.random.uniform() * (pop[:, j].max() - pop[:, j].min())
            trial_err = compute_error(trial, data_collection, sizes)
            if trial_err < compute_error(ind, data_collection, sizes):
                pop[i] = trial
                if trial_err < lst_err:
                    bestfit = trial
                    lst_err = trial_err
                    yield bestfit, lst_err

In [60]:
from tqdm import tqdm

niter = 50000
kmut = 0.2
kcross = 0.6

res = diff_evolution(pop, niter, kmut, kcross, data_collection, sizes)

In [61]:
best, err = zip(*res)

100%|███████████████████████████████████████████████████████████████████████████| 50000/50000 [02:01<00:00, 411.46it/s]


In [62]:
best

(array([1.24052443e+07, 1.59342108e+00, 1.37312497e+00, 2.45121946e-03]),
 array([1.13592741e+07, 1.57468209e+00, 1.36073954e+00, 4.47784149e-02]),
 array([1.09845835e+07, 1.59591329e+00, 1.41545862e+00, 4.50653651e-02]),
 array([1.07876668e+07, 1.57673063e+00, 1.38301071e+00, 4.57468811e-02]),
 array([1.10523500e+07, 1.54895199e+00, 1.36073954e+00, 4.37581716e-02]),
 array([1.09496008e+07, 1.57587459e+00, 1.38263907e+00, 4.47510920e-02]),
 array([1.09811075e+07, 1.57894382e+00, 1.38263907e+00, 4.48293313e-02]),
 array([1.05828734e+07, 1.71134362e+00, 1.30702964e+00, 4.47926302e-02]),
 array([1.09845835e+07, 1.73519700e+00, 1.32373750e+00, 4.58945932e-02]),
 array([1.09852786e+07, 1.71518016e+00, 1.31195719e+00, 4.61076455e-02]),
 array([1.11961692e+07, 1.71012385e+00, 1.19560876e+00, 4.61136204e-02]),
 array([1.10735101e+07, 1.57673063e+00, 1.21789293e+00, 4.58494224e-02]),
 array([1.10524006e+07, 1.58553891e+00, 1.21590851e+00, 4.57928256e-02]),
 array([1.09319214e+07, 1.58377726e+00

TODO: implement metropolis hasting

<h2>Get params from file</h2>

In [10]:
with open('exemplo.par', 'r') as param_file:
    filetxt = param_file.readlines()

In [16]:
filetxt

['Np= 2             numero de linhas de Principal, Nitida, Difusa, Balmer (max 15)\n',
 'Nn= 7\n',
 'Nd= 7\n',
 'Nb= 0\n',
 'R= 1.0e7          valores iniciais de R, S, P, D\n',
 'S= 1.5\n',
 'P= 0.5\n',
 'D= 0.0\n',
 'minR= 1e6         limites inferior e superior de R, S, P, D\n',
 'maxR= 1e8\n',
 'minS= 0.5\n',
 'maxS= 2.0\n',
 'minP= 0.3\n',
 'maxP= 1.5\n',
 'minD= 0.0\n',
 'maxD= 0.1\n',
 'Niteracao= 500000   numero máximo de iteracoes\n',
 'Nfilhos= 20        numero de conjuntos de parametros em cada geracao  (max 40)\n',
 'Kmutacao= 0.2      parametro de mutacao  (entre 0 e 1)\n',
 'Kconverge= 0.6     fator de convergencia para as geracoes sucessivas (entre 0 e 1)\n',
 'crit_erro= 1e-5    criterio de parada, variacao minima do erro relativo (ainda não está em uso) \n',
 'crit_iter= 10000    criterio de parada, numero maximo de iteracoes sem evolucao\n',
 'Principal=  \n',
 '3 5.86175E-7  1e-8           Np linhas da serie principal e incertezas (em m)\n',
 '4 3.3003E-7  1e-8   \n'