Skip to content

Commit

Permalink
Merge pull request #14 from mcyc/aic_fix
Browse files Browse the repository at this point in the history
Successfully implemented a fix on how AIC & AICc is calculated. Also fixed the QhullError for guess interpolation.
  • Loading branch information
mcyc committed Jul 20, 2022
2 parents 88bace9 + b0ac1ec commit e1e70c1
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 119 deletions.
91 changes: 80 additions & 11 deletions mufasa/UltraCube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ def __init__(self, cubefile=None, cube=None, snr_min=None, rmsfile=None, cnv_fac
self.Tpeak_maps = {}
self.chisq_maps = {}
self.rchisq_maps = {}
self.rss_maps = {}
self.NSamp_maps = {}
self.AICc_maps = {}
self.master_model_mask = None
self.snr_min = None
self.snr_min = 0.0
self.cnv_factor = cnv_factor
self.n_cores = multiprocessing.cpu_count()

Expand Down Expand Up @@ -148,6 +149,14 @@ def get_rms(self, ncomp):
self.rms_maps[compID] = get_rms(self.residual_cubes[compID])
return self.rms_maps[compID]

def get_rss(self, ncomp, mask=None):
# residual sum of squares
if mask is None:
mask = self.master_model_mask
# note: a mechanism is needed to make sure NSamp is consistient across the models
self.rss_maps[str(ncomp)], self.NSamp_maps[str(ncomp)] = \
calc_rss(self, ncomp, usemask=True, mask=mask, return_size=True)


def get_Tpeak(self, ncomp):
compID = str(ncomp)
Expand Down Expand Up @@ -176,12 +185,12 @@ def get_AICc(self, ncomp, update=True, **kwargs):

compID = str(ncomp)
if not compID in self.chisq_maps:
self.get_chisq(ncomp, **kwargs)
self.get_rss(ncomp, **kwargs)

# note that zero component is assumed to have no free-parameter (i.e., no fitting)
p = ncomp*4

AICc_map = get_aic(chisq=self.chisq_maps[compID], p=p, N=self.NSamp_maps[compID])
AICc_map = aic.AICc(rss=self.rss_maps[compID], p=p, N=self.NSamp_maps[compID])

if update:
self.AICc_maps[compID] = AICc_map
Expand Down Expand Up @@ -282,6 +291,25 @@ def convolve_sky_byfactor(cube, factor, savename=None, **kwargs):
#======================================================================================================================#
# UltraCube based methods


def calc_rss(ucube, compID, usemask=True, mask=None, return_size=True):
# calculate residual sum of squares

if isinstance(compID, int):
compID = str(compID)

cube = ucube.cube

if compID == '0':
# the zero component model is just a y = 0 baseline
modcube = np.zeros(cube.shape)
else:
modcube = ucube.pcubes[compID].get_modelcube(multicore=ucube.n_cores)

gc.collect()
return get_rss(cube, modcube, expand=20, usemask=usemask, mask=mask, return_size=return_size)


def calc_chisq(ucube, compID, reduced=False, usemask=False, mask=None):

if isinstance(compID, int):
Expand Down Expand Up @@ -330,12 +358,55 @@ def calc_AICc_likelihood(ucube, ncomp_A, ncomp_B, ucube_B=None):
#======================================================================================================================#
# statistics tools

'''
def get_aic(chisq, p, N=None):
# calculate AIC or AICc values
if N is None:
return aic.AIC(chisq, p)
else:
return aic.AICc(chisq, p, N)
'''


def get_rss(cube, model, expand=20, usemask = True, mask = None, return_size=True):
'''
Calculate residual sum of squares (RSS)
cube : SpectralCube
model: numpy array
expand : int
Expands the region where the residual is evaluated by this many channels in the spectral dimension
reduced : boolean
Whether or not to return the reduced chi-squared value or not
mask: boolean array
A mask stating which array elements the chi-squared values are calculated from
'''

if usemask:
if mask is None:
mask = model > 0
else:
mask = ~np.isnan(model)

residual = get_residual(cube, model)

# creating mask over region where the model is non-zero,
# plus a buffer of size set by the expand keyword.
mask = expand_mask(mask, expand)
mask = mask.astype(np.float)

# note: using nan-sum may walk over some potential bad pixel cases
rss = np.nansum((residual * mask)**2, axis=0)

if return_size:
return rss, np.nansum(mask, axis=0)
else:
return rss



def get_chisq(cube, model, expand=20, reduced = True, usemask = True, mask = None):
Expand All @@ -358,10 +429,6 @@ def get_chisq(cube, model, expand=20, reduced = True, usemask = True, mask = Non
A mask stating which array elements the chi-squared values are calculated from
'''

#model = np.zeros(cube.shape)

#cube = cube.with_spectral_unit(u.Hz, rest_value = freq_dict['oneone']*u.Hz)

if usemask:
if mask is None:
mask = model > 0
Expand All @@ -376,13 +443,14 @@ def get_chisq(cube, model, expand=20, reduced = True, usemask = True, mask = Non
mask = mask.astype(np.float)

# note: using nan-sum may walk over some potential bad pixel cases
chisq = np.nansum((residual * mask)**2, axis=0)
chisq = np.nansum((residual * mask) ** 2, axis=0)

if reduced:
# assuming n_size >> n_parameters
chisq /= np.nansum(mask, axis=0)

rms = get_rms(residual)
chisq /= rms**2
chisq /= rms ** 2

gc.collect()

Expand All @@ -394,6 +462,7 @@ def get_chisq(cube, model, expand=20, reduced = True, usemask = True, mask = Non
return chisq, np.nansum(mask, axis=0)



def get_masked_moment(cube, model, order=0, expand=10, mask=None):
'''
create masked moment using either the model
Expand Down Expand Up @@ -470,9 +539,9 @@ def expand_mask(mask, expand):
def get_rms(residual):
# get robust estimate of the rms from the fit residual
diff = residual - np.roll(residual, 2, axis=0)
print("finite diff cube size: {}".format(np.sum(np.isfinite(diff))))
#print("finite diff cube size: {}".format(np.sum(np.isfinite(diff))))
rms = 1.4826 * np.nanmedian(np.abs(diff), axis=0) / 2**0.5
print("finite rms map size: {}".format(np.sum(np.isfinite(rms))))
#print("finite rms map size: {}".format(np.sum(np.isfinite(rms))))
gc.collect()
return rms

Expand Down
18 changes: 10 additions & 8 deletions mufasa/aic.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,32 +83,34 @@ def get_comp_AICc(cube, model1, model2, p1, p2):
return aicc1, aicc2


def AIC(chisq, p):
def AIC(rss, p, N):
'''
Calculate the Akaike information criterion based on the provided chi-squared values
:param chisq:
Chi-squared values
:param rss:
Residual sum of squares
:param p:
Number of parameters
:param N:
Number of samples
:return:
'''
return chisq + 2*p
return N * np.log(rss/N) + 2*p


def AICc(chisq, p, N):
def AICc(rss, p, N):
'''
Calculate the corrected Akaike information criterion based on the provided chi-squared values
corrected AIC (AICc) approaches that of the AIC value when chisq >> p^2
:param chisq:
Chi-squared values
:param rss:
Residual sum of squares
:param p:
Number of parameters
:param N:
:return:
'''
top = 2*p*(p+1)
bottom = N - p - 1
return AIC(chisq, p) + top/bottom
return AIC(rss, p, N) + top/bottom


def likelihood(aiccA, aiccB):
Expand Down
136 changes: 85 additions & 51 deletions mufasa/guess_refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from scipy.interpolate import CloughTocher2DInterpolator as intp
from scipy.interpolate import griddata
from FITS_tools.hcongrid import get_pixel_mapping
from astropy.convolution import Gaussian2DKernel, convolve

from scipy.spatial.qhull import QhullError

#=======================================================================================================================

Expand Down Expand Up @@ -71,70 +74,86 @@ def guess_from_cnvpara(data_cnv, header_cnv, header_target, mask=None):
data_cnv[data_cnv == 0] = np.nan
data_cnv = data_cnv[0:npara*ncomp]

def tautex_renorm(taumap, texmap, tau_thresh = 0.3, tex_thresh = 10.0):

# attempt to re-normalize the tau & text values at the optically thin regime (where the two are degenerate)
isthin = np.logical_and(taumap < tau_thresh, np.isfinite(taumap))
texmap[isthin] = texmap[isthin]*taumap[isthin]/tau_thresh
taumap[isthin] = tau_thresh

# optically thin gas are also unlikely to have high spatial density and thus high Tex
tex_thin = 3.5 # note: at Tk = 30K, n = 1e3, N = 1e13, & sig = 0.2 km.s --> Tex = 3.49 K, tau = 0.8
hightex = np.logical_and(texmap > tex_thresh, np.isfinite(texmap))
texmap[hightex] = tex_thin
taumap[hightex] = texmap[hightex]*taumap[hightex]/tex_thin

# note, tau values that are too low will be taken care of by refine_each_comp()
return taumap, texmap


def refine_each_comp(guess_comp, mask=None):
# refine guesses for each component, with values outside ranges specified below removed

Tex_min = 3.0
Tex_max = 8.0
Tau_min = 0.2
Tau_max = 8.0

disksize = 1.0

if mask is None:
mask = master_mask(guess_comp)

guess_comp[0] = refine_guess(guess_comp[0], min=None, max=None, mask=mask, disksize=disksize)
guess_comp[1] = refine_guess(guess_comp[1], min=None, max=None, mask=mask, disksize=disksize)

# re-normalize the degenerated tau & text for the purpose of estimate guesses
guess_comp[3], guess_comp[2] = tautex_renorm(guess_comp[3], guess_comp[2], tau_thresh = 0.1)

# place a more "strict" limits for Tex and Tau guessing than the fitting itself
guess_comp[2] = refine_guess(guess_comp[2], min=Tex_min, max=Tex_max, mask=mask, disksize=disksize)
guess_comp[3] = refine_guess(guess_comp[3], min=Tau_min, max=Tau_max, mask=mask, disksize=disksize)
return guess_comp

for i in range (0, ncomp):
data_cnv[i*npara:i*npara+npara] = refine_each_comp(data_cnv[i*npara:i*npara+npara], mask)

# regrid the guess back to that of the original data
hdr_final = get_celestial_hdr(header_target)

kernel = Gaussian2DKernel(1)

guesses_final = []

# regrid the guesses
for gss in data_cnv:

newmask = np.isfinite(gss)
# removal holes with areas that smaller than a 5 by 5 square
newmask = remove_small_holes(newmask, 25)
# create a mask to regrid over
newmask = regrid(newmask, hdr_conv, hdr_final, dmask=None, method='nearest')
newmask = newmask.astype('bool')
guesses_final.append(regrid(gss, hdr_conv, hdr_final, dmask=newmask))

new_guess = regrid(gss, hdr_conv, hdr_final, dmask=newmask)

# expand the interpolation a bit, since regridding can often miss some pixels due to aliasing
newmask_l = dilation(newmask)
newmask_l = dilation(newmask_l)
new_guess_cnv = convolve(new_guess, kernel, boundary='extend')

new_guess_cnv[~newmask_l] = np.nan
# retrain the originally interpolataed values within the original mask
mask_finite = np.isfinite(new_guess)
new_guess_cnv[mask_finite] = new_guess[mask_finite]
guesses_final.append(new_guess_cnv)

return np.array(guesses_final)



def tautex_renorm(taumap, texmap, tau_thresh = 0.3, tex_thresh = 10.0):

# attempt to re-normalize the tau & text values at the optically thin regime (where the two are degenerate)
isthin = np.logical_and(taumap < tau_thresh, np.isfinite(taumap))
texmap[isthin] = texmap[isthin]*taumap[isthin]/tau_thresh
taumap[isthin] = tau_thresh

# optically thin gas are also unlikely to have high spatial density and thus high Tex
tex_thin = 3.5 # note: at Tk = 30K, n = 1e3, N = 1e13, & sig = 0.2 km.s --> Tex = 3.49 K, tau = 0.8
hightex = np.logical_and(texmap > tex_thresh, np.isfinite(texmap))
texmap[hightex] = tex_thin
taumap[hightex] = texmap[hightex]*taumap[hightex]/tex_thin

# note, tau values that are too low will be taken care of by refine_each_comp()
return taumap, texmap


def refine_each_comp(guess_comp, mask=None):
# refine guesses for each component, with values outside ranges specified below removed

Tex_min = 3.0
Tex_max = 8.0
Tau_min = 0.2
Tau_max = 8.0

disksize = 1.0

if mask is None:
mask = master_mask(guess_comp)

guess_comp[0] = refine_guess(guess_comp[0], min=None, max=None, mask=mask, disksize=disksize)
guess_comp[1] = refine_guess(guess_comp[1], min=None, max=None, mask=mask, disksize=disksize)

# re-normalize the degenerated tau & text for the purpose of estimate guesses
guess_comp[3], guess_comp[2] = tautex_renorm(guess_comp[3], guess_comp[2], tau_thresh = 0.1)

# place a more "strict" limits for Tex and Tau guessing than the fitting itself
guess_comp[2] = refine_guess(guess_comp[2], min=Tex_min, max=Tex_max, mask=mask, disksize=disksize)
guess_comp[3] = refine_guess(guess_comp[3], min=Tau_min, max=Tau_max, mask=mask, disksize=disksize)
return guess_comp



def simple_para_clean(pmaps, ncomp, npara=4):
# clean parameter maps based on their error values

Expand Down Expand Up @@ -214,15 +233,30 @@ def refine_guess(map, min=None, max=None, mask=None, disksize=1):
mask = np.isfinite(map)
mask = mask_cleaning(mask)

# interpolate over the dmask footprint
xline = np.arange(map.shape[1])
yline = np.arange(map.shape[0])
X,Y = np.meshgrid(xline, yline)
itpmask = np.isfinite(map)
C = intp((X[itpmask],Y[itpmask]), map[itpmask])

# interpolate over the dmask footprint
zi = C(X*mask,Y*mask)
def interpolate(map, mask):
# interpolate over the dmask footprint
xline = np.arange(map.shape[1])
yline = np.arange(map.shape[0])
X,Y = np.meshgrid(xline, yline)
itpmask = np.isfinite(map)
C = intp((X[itpmask],Y[itpmask]), map[itpmask])

# interpolate over the dmask footprint
zi = C(X*mask,Y*mask)
return zi

try:
# interpolate the mask
zi = interpolate(map, mask)
except QhullError as e:
print("[WARNING]: qhull input error found; astropy convolve will be used instead")
#print(e)
# use astropy convolve as a proxi for interpolation
kernel = Gaussian2DKernel(3)
zi = convolve(map, kernel, boundary='extend')
# retrain the original median_filtered map over its original positions
map_finite = np.isfinite(map)
zi[map_finite] = map[map_finite]

return zi

Expand Down
Loading

0 comments on commit e1e70c1

Please sign in to comment.