In [2]:
import time
import statistics

import numpy as np
from astropy.io import fits

In [9]:
def list_stats(data):
    """Compute median and mean."""
    data.sort()
    mean = sum(data)/len(data)
    mid = len(data)//2
    if len(data) % 2 == 0:
        median = (data[mid - 1] + data[mid])/2
    else:
        median = data[mid]
    return median, mean


if __name__ == '__main__':
    m = list_stats([1.3, 2.4, 20.6, 0.95, 3.1, 2.7])
    print(m)

    m = list_stats([1.5])
    print(m)

(2.55, 5.175000000000001)
(1.5, 1.5)


In [10]:
def time_stat(func, size, n_trials):
    results = []
    for _ in range(n_trials):
        data = np.random.rand(size)
        start = time.perf_counter()
        func(data)
        end = time.perf_counter() - start
        results.append(end)
    return np.mean(results)

if __name__ == '__main__':
    template = '%.6fs for %s'
    statistics_mean = time_stat(statistics.mean, 10**5, 10)
    np_mean = time_stat(np.mean, 10**5, 1000)
    
    print(template % (statistics_mean, 'statistics_mean'))
    print(template % (np_mean, 'np_mean'))

0.158032s for statistics_mean
0.000047s for np_mean


In [30]:
def median_fits(files):
    """Return median, time and memory."""
    start = time.perf_counter()
    arrays = [fits.open(file)[0].data for file in files]
    median = np.median(arrays, axis=0)
    memory = sum(array.nbytes for array in arrays)/1024
    end = time.perf_counter() - start
    return median, end, memory


if __name__ == '__main__':
    result = median_fits(['data5/image0.fits', 'data5/image1.fits'])
    print(result[0][100, 100], result[1], result[2])
  
    result = median_fits(['data5/image%s.fits' % i for i in range(12)])
    print(result[0][100, 100], result[1], result[2])

0.012338057160377502 0.03336585099987133 625.0
0.012102657463401556 0.19101409299992156 3750.0


In [30]:
def median_bins(data, nbins):
    """Returns mean, standard deviation, ignored number, bins array."""
    mean = np.mean(data)
    std = np.std(data)
    minval = mean - std
    maxval = mean + std
    width = 2*std/nbins
    ignored = 0
    bins = np.zeros(nbins)
    
    for item in data:
        if item < minval:
            ignored += 1
        elif item < maxval:
            index = int((item - minval)//width)
            bins[index] += 1
    
    return mean, std, ignored, bins


def median_approx(data, nbins):
    mean, std, ignored, dbins = median_bins(data, nbins)
    minval = mean - std
    width = 2*std/nbins
    i, total = 0, ignored
    for i, dbin in enumerate(dbins):
        total += dbin
        if total >= (len(data) + 1)/2:
            break
    return minval + width*(i + 0.5)
        


if __name__ == '__main__':
    print(median_bins([1, 1, 3, 2, 2, 6], 3))
    print(median_approx([1, 1, 3, 2, 2, 6], 3))

    print(median_bins([1, 5, 7, 7, 3, 6, 1, 1], 4))
    print(median_approx([1, 5, 7, 7, 3, 6, 1, 1], 4))
    
    print(median_bins([0, 1], 5))
    print(median_approx([0, 1], 5))

(2.5, 1.707825127659933, 0, array([2., 3., 0.]))
2.5
(3.875, 2.521780125229002, 3, array([0., 1., 1., 1.]))
4.50544503130725
(0.5, 0.5, 0, array([1., 0., 0., 0., 0.]))
0.9


In [11]:
from helper import running_stats


def median_bins_fits(files, nbins):
    mean, std = running_stats(files)
    minval = mean - std
    maxval = mean + std
    width = 2*std/nbins
    ignored = np.zeros((200, 200))
    bins = np.zeros((200, 200, nbins))
    
    for file in files:
        with fits.open(file) as fits_file:
            data = fits_file[0].data
            for i in range(200):
                for j in range(200):
                    if data[i, j] < minval[i, j]:
                        ignored[i, j] += 1
                    elif data[i, j] < maxval[i, j]:
                        index = int((data[i, j] - minval[i, j])//width[i, j])
                        bins[i, j, index] += 1

    return mean, std, ignored, bins
    


def median_approx_fits(files, nbins):
    mean, std, ignored, dbins = median_bins_fits(files, nbins)
    minval = mean - std
    width = 2*std/nbins
    median = np.zeros((200, 200))
    k, total = 0, ignored
    
    for i in range(200):
        for j in range(200):
            for k, dbin in enumerate(dbins[i, j]):
                total[i, j] += dbin
                if total[i, j] >= (len(files) + 1)/2:
                    break
            median[i, j] = minval[i, j] + width[i, j]*(k + 0.5)
    
    return median


def main():
    fits_file_names = ['image0.fits', 'image1.fits', 'image2.fits']
    fits_files = ['data6/' + file for file in fits_file_names]
    mean, std, left_bin, bins = median_bins_fits(fits_files, 5)
    median = median_approx_fits(fits_files, 5)
    
    print(mean[100, 100])
    print(std[100, 100])
    print(left_bin[100, 100])
    print(bins[100, 100, :])
    print(median[100, 100])


if __name__ == '__main__':
    main()

0.018398113548755646
0.010496325561403296
0.0
[0. 2. 0. 0. 0.]
0.014199583324194326
