In [3]:
import numpy as np

In [4]:
def loop_wins(images, med, sigma, rejMask, verbose=False):
    
    n1 = 1
    qmask = np.ones(images.shape[0], dtype=np.bool)
    while n1>0:
        m0 = (med - 1.5*sigma)[:, np.newaxis]
        m1 = (med + 1.5*sigma)[:, np.newaxis]
        mask_min = images < m0
        mask_max = images > m1
        images[mask_min] = np.tile(m0, (1, images.shape[1]))[mask_min]
        images[mask_max] = np.tile(m1, (1, images.shape[1]))[mask_max]
        images[rejMask] = np.nan
        sigma0 = sigma.copy()
        # Apply pixel rejection mask before calculating new sigma
        if np.isnan(images.sum()):
            med = np.nanmedian(images, axis=1)
            sigma[qmask] = 1.134*np.nanstd(images, axis=1)[qmask]
        else:
            med = np.median(images, axis=1)
            sigma[qmask] = 1.134*np.std(images, axis=1)[qmask]
        qmask = np.abs(sigma - sigma0)/sigma0 > 0.0005
        n1 = qmask.sum()
        if verbose: print('n1 = ', n1)
    return med, sigma

In [None]:
randimages = cube_outliers(size=(256, 256, 50))

In [None]:
datacube = randimages.copy()
sz = datacube.shape
flatc = datacube.reshape([sz[0]*sz[1], sz[2]])
rej_mask = np.zeros(flatc.shape, dtype=np.bool)

med = np.median(flatc, axis=1)
sigma = np.std(flatc, axis=1)

In [None]:
r = loop_wins(flatc, med, sigma, rej_mask)

In [5]:
from astropy.io import fits
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import os
cal_dir = Path(os.environ['DATA'], 'DDS', 'Taka', 'Calibration')
str(cal_dir)

def fitsread(f, header=False):
    with fits.open(f) as hdul:
        data = hdul[0].data
        if header:
            h = hdul[0].header
            return data, h
        else:
            return data

In [6]:
bias_dir = Path(cal_dir, 'Bias_bin1_Mar_2018')
biasf = list(bias_dir.rglob('Bias*.fit'))
print(len(biasf))
d0, h0 = fitsread(biasf[0], header=True)
nx = h0['NAXIS1']
ny = h0['NAXIS2']
ny2 = int(ny/4)
print(nx,ny)
print(nx, ny2)
slice1 = np.s_[0:ny2, :]

cube_slice = np.zeros([ny2, nx, len(biasf)])
for i, f in enumerate(biasf):
        im = fitsread(f)
        cube_slice[...,i] = im[slice1]

200
4524 3624
4524 906


In [15]:
flatc = cube_slice[0,...]
m = np.median(flatc, axis=1)
sigma = np.std(flatc, axis=1)

In [14]:
# Winsorized sigma
rej_mask = np.zeros(flatc.shape, dtype=np.bool)
m, sigma = loop_wins(flatc.copy(), m, sigma, rej_mask, verbose=True)

n1 =  4440
n1 =  2162
n1 =  1966
n1 =  1577
n1 =  854
n1 =  230
n1 =  40
n1 =  2
n1 =  0


In [None]:
def win_sigma_clip2(flatc, verbose=False):
    # Make a 1st pass while there's no need to consider NaN-flagged arrays 
    # as median() and std() are faster than nanmedian() and nanstd(). 

    if verbose: print('flattened shape = ', flatc.shape)
    rej_mask = np.zeros(flatc.shape, dtype=np.bool)
    
    m = np.median(flatc, axis=1)
    sigma = np.std(flatc, axis=1)
    # Winsorized sigma
    m, sigma = loop_wins(flatc.copy(), m, sigma, rej_mask, verbose=verbose)
    
    mask = np.abs(flatc - m[:,np.newaxis]) > 5*sigma[:,np.newaxis]
    n = mask.sum()
    if n == 0:
        return rej_mask
    # Prepare new passes only on the rows that have outliers. 
    clip_rows = np.where(np.any(mask, axis=1))[0]
    mask = mask[clip_rows, :]
    rej_mask[clip_rows, :] = mask
    if verbose: 
        print('Total outliers n =', n)
        print('new shape = ', mask.shape)
    while n > 0:
        # Work only on the rows that have outliers. Flag them as NaN
        flatc = flatc[clip_rows, :]
        flatc[mask] = np.nan
        m = np.nanmedian(flatc, axis=1)
        sigma = np.nanstd(flatc, axis=1)
        # Winsorized sigma
        m, sigma = loop_wins(flatc.copy(), m, sigma, mask, verbose=verbose)
        
        mask = np.abs(flatc - m[:,np.newaxis]) > 5*sigma[:,np.newaxis]
        clip_rows0 = clip_rows.copy()
        clip_rows = np.where(np.any(mask, axis=1))[0]
        mask = mask[clip_rows, :]
        n = mask.sum()
        if verbose: print('total outliers n =', n)
        rej_mask[clip_rows0[clip_rows]] = rej_mask[clip_rows0[clip_rows]] | mask
    
    return rej_mask

In [None]:
data_slice = cube_slice.copy()

In [None]:
%%time
sz = data_slice.shape
rejmask1 = np.zeros(sz, dtype=np.bool)
for r in range(sz[0]):
    rejmask1[r,...] = win_sigma_clip2(data_slice[r,...], verbose=False)

In [None]:
rejmask.sum()

In [None]:
from multiprocessing import shared_memory
def wins_wrapper(rows, shape, shared_name, verbose=False):
    existing_shm = shared_memory.SharedMemory(name=shared_name)
    shared_slice = np.ndarray(shape, dtype=np.float64, buffer=existing_shm.buf)
    data_slice = shared_slice[rows[0]:rows[1], ...]
    sz = data_slice.shape
    rejmask = np.zeros(sz, dtype=np.bool)
    for r in range(sz[0]):
        rejmask[r, ...] = win_sigma_clip2(data_slice[r, ...], verbose=verbose)
    return rejmask

In [None]:

def wins_sigma_par(datacube):

    shm = shared_memory.SharedMemory(create=True, size=datacube.nbytes)
    sh_cube_slice = np.ndarray(datacube.shape, dtype=datacube.dtype, buffer=shm.buf)
    sh_cube_slice[:] = datacube[:]

    nrows = datacube.shape[0]
    nr = nrows / 6
    r0 = np.arange(6) * nr
    r1 = r0 + nr
    rows = [[int(a), int(b)] for a, b in zip(r0, r1)]

    wins_partial = partial(wins_wrapper, shape=datacube.shape, shared_name=shm.name, verbose=False)
    rejmask_list = list(map(wins_partial, rows))
    # print('starting parallel pool')

    rejmask = np.array(rejmask_list).reshape(datacube.shape)

    return rejmask

In [None]:
%time rejmask2 = wins_sigma_par(data_slice)

6 cores - 416.48 seconds for the entire image series 