In [5]:
'''
Write a function called list_stats that takes a list of numbers and 
returns a tuple of the median and mean of the list (in this order).
'''
# Write your list_stats function here.
import numpy as np
def list_stats(list):
  mean = round(np.mean(list), 3)
  median = round(np.median(list), 2)
  return median, mean



# You can use this to test your function.
# Any code inside this `if` statement will be ignored by the automarker.
if __name__ == '__main__':
  # Run your function with the first example in the question.
  m = list_stats([1.3, 2.4, 20.6, 0.95, 3.1, 2.7])
  print(m)

  # Run your function with the second example in the question
  m = list_stats([1.5])
  print(m)

(2.55, 5.175)
(1.5, 1.5)


In [6]:
'''
Write a time_stat function to time our statistic implementations.

time_stat should take three arguments: 
the func function we're timing, 
the size of the random array to test, 
and the number of experiments to perform. 
It should return the average running time for the func function.
'''

import numpy as np
import statistics
import time

def time_stat(func, size, ntrials):
  total = 0
  for i in range(ntrials):
    # the time to generate the random array should not be included
    data = np.random.rand(size)
    # modify this function to time func with ntrials times using a new random array each time
    start = time.perf_counter()
    res = func(data)
    total += time.perf_counter() - start
    
  
  # return the average run time
  return total/ntrials

if __name__ == '__main__':
  print('{:.6f}s for statistics.mean'.format(time_stat(statistics.mean, 10**5, 10)))
  print('{:.6f}s for np.mean'.format(time_stat(np.mean, 10**5, 1000)))


0.069851s for statistics.mean
0.000046s for np.mean


In [9]:
import time, numpy as np
from astropy.io import fits

def median_fits(filenames):
    start = time.time() # start timer
    
    # Read all FITS files and store in numpy array
    FITS_list = []
    for filename in filenames:
        hdulist = fits.open(filename)
        FITS_list.append(hdulist[0].data)
        hdulist.close()
    
    # Stack images in 3d array for median calculation
    FITS_stack = np.dstack(FITS_list)
    
    median = np.median(FITS_stack, axis=2) 
    
    # Calculate the memory consumed by data
    memory = FITS_stack.nbytes
    memory /= 1024 # convert into kb
    
    stop = time.time() - start # stop timer
    
    return median, stop, memory

if __name__ == '__main__':
    result = median_fits(['image0.fits', 'image1.fits'])
    print(result[0][100,100], result[1], result[2])
          
    #result = median_fits(['image{}.fits'.format(str(i)) for i in range(11)])
    #print(result[0][100,100], result[1], result[2])

0.010775766335427761 0.03369307518005371 625.0


In [11]:
'''
Binapprox:
Let's implement the binapprox algorithm to calculate the median of a list of numbers.
2 functions:
median_bins; to calculate the mean, standard deviation and the bins
median_approx; which calls median_bins and then calculates the approximated median
'''
import numpy as np

def median_bins(values, B):
  mean = np.mean(values)
  std = np.std(values)
  
  left_bin = 0
  bins = np.zeros(B)
  bin_width = 2*std/B
  
  for value in values:
    if value < mean - std:
      left_bin += 1
    elif value < mean + std:
      bin = int((value - (mean-std))/bin_width)
      bins[bin] += 1
      
  return mean, std, left_bin, bins

def median_approx(values, B):
  mean, std, left_bin, bins = median_bins(values, B)
  
  N = len(values)
  mid = (N+1)/2
  
  count = left_bin
  for b, bincount in enumerate(bins):
    count += bincount
    if count >= mid:
      break
      
  width = 2*std/B
  median = mean - std + width*(b+0.5)
  
  return median

if __name__ == '__main__':
  print(median_bins([1, 1, 3, 2, 2, 6], 3))
  print(median_approx([1, 1, 3, 2, 2, 6], 3))

  print(median_bins([1, 5, 7, 7, 3, 6, 1, 1], 4))
  print(median_approx([1, 5, 7, 7, 3, 6, 1, 1], 4))

(2.5, 1.707825127659933, 0, array([2., 3., 0.]))
2.5
(3.875, 2.521780125229002, 3, array([0., 1., 1., 1.]))
4.50544503130725
