In [294]:
# adds parent dir to python path
import sys
sys.path.insert(0, '..')

#system lib
import os
import json
from importlib import reload
from multiprocessing import Pool
import warnings
warnings.filterwarnings('ignore')

# 3rd party lib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib auto

# astro lib
from astropy.io import fits
import sep

# my lib
import ImageTools as it
import DataTools as dt
import gphelper
from gphelper import GPHelper

Using matplotlib backend: Qt5Agg


In [10]:
def make_nonnegative(img):
    epsilon = np.abs(img.min()) + 1e-3
    if img.min() <= 0:
        img += epsilon
    return img

def get_spheroids(num=10, band='h'):
    with open('../spheroids', 'r') as f:
        spheroids= np.array(f.readlines())
    
    data_dir = os.path.join(os.getenv('HOME'), 'Documents/astro_data/orig_images')
    fmask = 'GDS_{}_{}.fits'
    f_string = os.path.join(data_dir, fmask)
    
    sources = []
    for s in spheroids:
        s = s.strip()
        img = fits.getdata(f_string.format(s, band))
        segmap = fits.getdata(f_string.format(s, 'segmap'))
        img_id = int(s.split('_')[1])
        sources.append((img.copy(), segmap.copy(), img_id))
        del img
        del segmap
        
    return sources
        
def process_img(args):
    def denoise(img, segmap, img_id):
        noise_bank = img[segmap==0].flatten()
        other_source = np.logical_and(segmap!=0, segmap!=img_id)
        np.place(img, other_source, noise_bank)

        sep_arr = img.byteswap().newbyteorder('N')
        bkg = sep.Background(sep_arr, mask=segmap==img_id, bw=10, bh=10)
        img = img-bkg

        return img
    
    def get_rs_and_fs(img, src_map):
        cx, cy = it.img_center(img, src_map)
        xs, ys = np.meshgrid(np.arange(img.shape[0]), np.arange(img.shape[1]).T)
        rs = np.sqrt(np.square(xs-cx)+np.square(ys-cy))

        rs = rs.flatten()
        fs = img.flatten()
        sorted_rs = np.argsort(rs)

        rs = rs[sorted_rs]
        fs = fs[sorted_rs]

        return rs, fs
    
    img, segmap, img_id = args
    src_map = segmap==img_id
    img = denoise(img, segmap, img_id)
    
    rs, fs = get_rs_and_fs(img, src_map)
    
    dr = 0.05*(rs.max()-rs.min())
    fs = dt.loessc_p(rs, fs, dr, pnum=2)
    fs[fs<0]=0
    
    num_interp = 100
    _rs = np.linspace(rs.min(), rs.max(), num_interp)
    fs = np.interp(_rs, rs, fs)
    rs = _rs
    
    L = np.zeros(num_interp)
    L[0] = np.pi*rs[0]**2  * fs[0]
    for i in range(1,num_interp,1):
        L[i] = L[i-1] + np.pi*(rs[i]**2 - rs[i-1]**2)*((fs[i]+fs[i-1])/2)

    A = np.pi * rs**2
    etas = fs*A/L
    r_lim = rs<25.0
    #etas = etas[r_lim]
    mono_dec = np.argmax(np.diff(etas)>=0)
    mono_dec = np.arange(len(etas))<mono_dec
    r_lim = np.logical_and(r_lim, mono_dec)
    
    Rp = np.interp(0.2, np.flipud(etas[r_lim]), np.flipud(rs[r_lim]))
    Fp_idx = np.square(rs - 2*Rp).argmin()

    Fp = np.cumsum(L[:Fp_idx]/L[:Fp_idx].sum())
    Ip = np.interp(Rp, rs, fs)

    return (rs, fs, Rp, Ip, etas, img_id)

In [64]:
# get imgs in other bands
spheroids_h = get_spheroids(-1, 'h')
spheroids_j = get_spheroids(-1, 'j')
spheroids_v = get_spheroids(-1, 'v')
spheroids_z = get_spheroids(-1, 'z')

In [4]:
# get the imgs in h band 
with open('h_vals.json', 'r') as f:
    processed_h = json.load(f)

In [5]:
ids = [h['id'] for h in processed_h]

# get rid of the imgs that were excluded from h
tmpj, tmpv, tmpz = [], [], []
for j, v, z, in zip(spheroids_j, spheroids_v, spheroids_z):
    if j[-1] in ids:
        tmpj.append(j)
        tmpv.append(v)
        tmpz.append(z)

spheroids_j = tmpj
spheroids_v = tmpv
spheroids_z = tmpz

In [100]:
%%time
count = 0

processed_h, processed_j, processed_v, processed_z = [], [], [], []
for h, j, v, z in zip(spheroids_h, spheroids_j, spheroids_v, spheroids_z):
    print(count/len(spheroids_j), end='\r')
    count += 1
    
    try:
        h = process_img(h)
        j = process_img(j)
        v = process_img(v)
        z = process_img(z)
    except Exception as e:
        continue
    
    processed_h.append(h)
    processed_j.append(j)
    processed_v.append(v)
    processed_z.append(z)

CPU times: user 9min, sys: 2min 59s, total: 11min 59s
Wall time: 1h 14min 47s


In [94]:
%%time
count = 0

processed_h =[]
for h  in spheroids_h:
    print(count/len(spheroids_j), end='\r')
    count += 1
    
    if h[-1] not in ids:
        continue
    
    try:
        h = process_img(h)
    except Exception as e:
        continue
    
    processed_h.append(h)


CPU times: user 2min 4s, sys: 42.2 s, total: 2min 46s
Wall time: 17min 24s


In [85]:
ids = [j[-1] for j in processed_j]

In [159]:
#Save

for b,data in zip(['h','j','v','z'], [processed_h,processed_j, processed_v, processed_z]):
    vals = []
    for s in data:
        v = {
            'rs' : dt._nmpy_encode(s[0]),
            'fs' : dt._nmpy_encode(s[1]),
            'rp' : s[2],
            'ip' : s[3],
            'etas' : dt._nmpy_encode(s[4]),
            'id' : s[5]
        }
        vals.append(v)

    with open(b+'_vals.json', 'w') as f:
        json.dump(vals, f)

In [96]:
# Load 
processed_h, processed_j, processed_v, processed_z = [], [], [], []

for b, l in zip(['h', 'j', 'v', 'z'], [processed_h, processed_j, processed_v, processed_z]):
    with open(b+'_vals.json', 'r') as f:
        vals = json.load(f)
        
    for v in vals:
        l.append((dt._nmpy_decode(v['rs']),
                  dt._nmpy_decode(v['fs']),
                  v['rp'],
                  v['ip'],
                  dt._nmpy_decode(v['etas']),
                 v['id']))


In [393]:
rms = lambda a: np.sqrt(np.mean(np.square(a)))

noise = []
for h, j, v, z in zip(spheroids_h, spheroids_j, spheroids_v, spheroids_z):
    noise_h = h[0][h[1]==0].flatten()
    noise_j = j[0][j[1]==0].flatten()
    noise_v = v[0][v[1]==0].flatten()
    noise_z = z[0][z[1]==0].flatten()
    
    params_h = (np.mean(noise_h), np.std(noise_h), np.sum(h[0][h[1]==h[2]])/rms(noise_h))
    params_j = (np.mean(noise_j), np.std(noise_j), np.sum(j[0][j[1]==j[2]])/rms(noise_j))
    params_v = (np.mean(noise_v), np.std(noise_v), np.sum(v[0][v[1]==v[2]])/rms(noise_v))
    params_z = (np.mean(noise_z), np.std(noise_z), np.sum(z[0][z[1]==z[2]])/rms(noise_z))
    
    noise.append((params_h, params_j, params_v, params_z))
    
with open('signal_to_noise', 'w') as f:
    for h, j, v, z in noise:
        row = ""
        for var in [h, j, v, z]:
            row += ','.join([str(i) for i in var]) + ','
        f.write(row[:-1] + "\n")    

In [234]:
interped_marks = np.concatenate([np.linspace(0, 1, 50, endpoint=False), np.linspace(1, 20, 950)])
h_ratios, j_ratios, v_ratios, z_ratios = [], [], [], []
h_vals, j_vals, v_vals, z_vals = [], [], [], []

for h, j, v, z in zip(processed_h, processed_j, processed_v, processed_z):
    h_rs, h_fs, h_rp, h_ip = h[0].copy(), h[1].copy(), h[2], h[3]
    j_rs, j_fs, j_rp, j_ip = j[0].copy(), j[1].copy(), j[2], j[3]
    v_rs, v_fs, v_rp, v_ip = v[0].copy(), v[1].copy(), v[2], v[3]
    z_rs, z_fs, z_rp, z_ip = z[0].copy(), z[1].copy(), z[2], z[3]
    
    # are there any values we need to pad
    if (np.sum(h_fs<=0)+np.sum(j_fs<=0)+np.sum(v_fs<=0)+np.sum(z_fs<=0)) > 0:
            epsilon = min(h_fs.min(), j_fs.min(), v_fs.min(), z_fs.min()) + 1e-3
    
            h_fs += epsilon
            j_fs += epsilon
            v_fs += epsilon
            z_fs += epsilon
    """
    # normalize
    h_rs /= h_rp
    h_fs /= h_ip
    j_rs /= h_rp
    j_fs /= j_ip
    v_rs /= h_rp
    v_fs /= v_ip
    z_rs /= h_rp
    z_fs /= z_ip
    """

    # get interp values
    h_fs = np.interp(interped_marks, h_rs, h_fs, left=np.nan, right=np.nan)
    j_fs = np.interp(interped_marks, j_rs, j_fs, left=np.nan, right=np.nan)
    v_fs = np.interp(interped_marks, v_rs, v_fs, left=np.nan, right=np.nan)
    z_fs = np.interp(interped_marks, z_rs, z_fs, left=np.nan, right=np.nan)
    
    h_vals.append(h_fs)
    j_vals.append(j_fs)
    v_vals.append(v_fs)
    z_vals.append(z_fs)
    
    h_ratios.append(h_fs/h_fs)
    j_ratios.append(j_fs/h_fs)
    v_ratios.append(v_fs/h_fs)
    z_ratios.append(z_fs/h_fs)

In [377]:
stacked_j = np.array(j_ratios)
stacked_v = np.array(v_ratios)
stacked_z = np.array(z_ratios)

gp_vals = {}
for b, coll in zip(['J', 'V', 'Z'],[stacked_j, stacked_v, stacked_z]):
    #f_50 = np.nanmedian(coll, axis=0)
    f_50 = np.nanpercentile(coll, 50, axis=0)
    f_16 = np.nanpercentile(coll, 16, axis=0)
    f_84 = np.nanpercentile(coll, 84, axis=0)
    f_std = np.nanstd(coll, axis=0)
    valid = np.isfinite(coll).sum(axis=0) >= 1

    # augment lines
    xs = interped_marks.copy()
    dx = 0.05*(xs.max()-xs.min())
    f_50 = dt.loessc_p(xs[valid], f_50[valid], dx, pnum=2)
    f_16 = dt.loessc_p(xs[valid], f_16[valid], dx, pnum=2)
    f_84 = dt.loessc_p(xs[valid], f_84[valid], dx, pnum=2)
    f_std = dt.loessc_p(xs[valid], f_std[valid], dx, pnum=2)
    
    _, f_50 = dt.pad_line(xs[valid], f_50, 30, 10, append=False)
    xs, f_std = dt.pad_line(xs[valid], f_std, 30, 10, append=False)
    
    gp_vals[b] = {}
    gp_vals[b]['x'] = xs
    gp_vals[b]['y'] = f_50
    gp_vals[b]['a'] = f_std
    #gp_vals[b]['a'] = (np.abs(f_84[valid]-f_50[valid]))
    #gp_vals[b]['a'] = (np.abs(f_84[valid]-f_50[valid]) + np.abs(f_16[valid]-f_50[valid]))/2
    
    plt.figure()
    plt.title(f'Ratio Distribution for {b}/H')
    plt.plot(xs[xs>0], f_50[xs>0], color='r', label='$median$', zorder=100)
    plt.fill_between(xs[xs>0], 
                     np.maximum(f_50[xs>0]-f_std[xs>0], np.zeros_like(f_50[xs>0])), 
                     f_50[xs>0]+f_std[xs>0], color='r', alpha=0.45, label="$16^{th}-84^{th}$", zorder=100)
    plt.legend()
    plt.show()
    

In [389]:
reload(gphelper)
from gphelper import GPHelper


gp_j, gp_v, gp_z = GPHelper(), GPHelper(), GPHelper()
x = gp_vals['J']['x']
mask = x <= 5

gp_j.fit(x[mask,np.newaxis], gp_vals['J']['y'][mask], gp_vals['J']['a'][mask], length_scale=0.35, optimize='sigma_n')
gp_v.fit(x[mask,np.newaxis], gp_vals['V']['y'][mask], gp_vals['V']['a'][mask], length_scale=0.5, optimize='sigma_n')
gp_z.fit(x[mask,np.newaxis], gp_vals['Z']['y'][mask], gp_vals['Z']['a'][mask], length_scale=0.5, optimize='sigma_n')

Optimization terminated successfully.
         Current function value: 61.932869
         Iterations: 14
         Function evaluations: 28
Optimization terminated successfully.
         Current function value: 151.801477
         Iterations: 15
         Function evaluations: 30
Optimization terminated successfully.
         Current function value: -35.186605
         Iterations: 18
         Function evaluations: 36


<gphelper.GPHelper at 0x7f6afd8f0b38>

In [392]:
for b, gp in zip(['J', 'V', 'Z'], [gp_j, gp_v, gp_z]):
    plt.figure()
    plt.title(b + ' Ratio GP Fit')
    plt.xlim(0,5)
    y, std = gp.predict(x[mask,np.newaxis], return_std=True)
    plt.plot(x[mask], y, label='gp')
    plt.fill_between(x[mask], y-std, y+std, alpha=0.35)
    
    plt.plot(x[mask], gp_vals[b]['y'][mask], label='data')
    plt.fill_between(x[mask], gp_vals[b]['y'][mask]-gp_vals[b]['a'][mask], gp_vals[b]['y'][mask]+gp_vals[b]['a'][mask], alpha=0.35)
    plt.legend()
    

In [391]:
gp_j.save_params(save_file='gp_j.json')
gp_v.save_params(save_file='gp_v.json')
gp_z.save_params(save_file='gp_z.json')

In [380]:
with open('rp_ip', 'w') as f:
    for (rs, fs, Rp, Ip, etas, img_id) in processed_h:
        f.write(f'{Rp},{Ip}\n')

In [198]:
plt.figure()
plt.title('H')
plt.ylim(-.5, 2.0)
plt.xlim(0, 10)
for f in h_ratios:
    plt.plot(interped_marks, f, '.', markersize=1)

plt.figure()
plt.title('J')
plt.ylim(-.5, 2.0)
plt.xlim(0, 10)

for f in j_ratios:
    plt.plot(interped_marks, f, '.', markersize=1)
    
plt.figure()
plt.title('V')
plt.ylim(-.5, 2.0)
plt.xlim(0, 10)
for f in v_ratios:
    plt.plot(interped_marks, f, '.')
    
plt.figure()
plt.title('Z')
plt.ylim(-.5, 2.0)
plt.xlim(0, 10)
for f in z_ratios:
    plt.plot(interped_marks, f, '.')

In [170]:
plt.figure()
plt.title('H')
for h in h_vals:
    plt.plot(interped_marks, h)
    
plt.figure()
plt.title('J')
for h in j_vals:
    plt.plot(interped_marks, h)
    
plt.figure()
plt.title('V')
for h in v_vals:
    plt.plot(interped_marks, h)
    
plt.figure()
plt.title('Z')
for h in z_vals:
    plt.plot(interped_marks, h)

In [189]:
plt.figure()
plt.title('Petrosian Radius Comparison by Band')
plt.hist([h[2] for h in processed_h], bins=20, label='H', alpha=0.25)
plt.hist([h[2] for h in processed_j], bins=20, label='J', alpha=0.25)
plt.hist([h[2] for h in processed_v], bins=20, label='V', alpha=0.25)
plt.hist([h[2] for h in processed_z], bins=20, label='Z', alpha=0.25)
plt.legend()


<matplotlib.legend.Legend at 0x7f6afc109b38>

In [188]:
plt.figure()
plt.title('Petrosian Radius Ratio by Band')
plt.hist([j[2]/h[2] for h,j in zip(processed_h, processed_j)], bins=20, label='J/H', alpha=0.25)
plt.hist([j[2]/h[2] for h,j in zip(processed_h, processed_v)], bins=20, label='V/H', alpha=0.25)
plt.hist([j[2]/h[2] for h,j in zip(processed_h, processed_z)], bins=20, label='Z/H', alpha=0.25)
plt.legend()


<matplotlib.legend.Legend at 0x7f6afc53c6d8>