# Loading the EAGLE simulation data

Write out the function for loading the EAGLE simulation data.  It checks whether reduced resolution versions have been created already and loads those if it has.

The main function called in this script is "loaddata".
It accepts a limited range of inputs, for the distance at which we are pretending to observe the simulated data and the "slice width" of the simulation (which corresponds to the filter width of the H$\alpha$ NB filter).  The third input is the desired resolution of the image (i.e. binned resolution), which can be anything, but there are a few standards listed below.

loaddata returns a data tuple which contains [data array, resolution, trimmed original data array shape]. <br>
The "trimmed original data array shape" is the size of the original data array (32000x32000 from the 100Mpc size simulation) but trimmed by the amount that was needed in order to reduce the resolution of the simulation.  E.g. if the data needs to be reduced by a factor of 7, that does not go evenly into 32000, so before the data resolution is reduced, the data array is trimmed to a slightly smaller size that is factorable by 7.  This is recorded so we can determine the transverse distances accurately for plotting and masking later on.

An accompanying script, load_data_testing.ipynb, tests the functionality of this script by providing and running some examples.

### Distance:
Distance must be one of: 50Mpc, 100Mpc, 200Mpc, or 500Mpc. <br> To expand this range, we will need to add in the cosmology calculator to calculate the distance-arcsecond transverse scaling at that distance.

### Slice Width:
The EAGLE simulations are projected into slice widths of 5Mpc, covering 20 Mpc of width in total (z = 0 to 20 Mpc).  The desired slice width must be one of: 5Mpc, 10Mpc, 15Mpc, or 20Mpc.  Roughly, this corresponds to filter widths of 1nm to 5nm.

### Resolution:
You can input any resolution (in arcseconds), but some standards are:  14 arcsec, 100 arcsec, 500 arcsec, 1000 arcsec<br>
The minimum resolution of the EAGLE simulation is 13 (6.4,3.2,1.3) arcsec per pixel at a distance of 50 (100,200,500) Mpc.  Since Dragonfly pixels are 2.8 arcsec in size, let's pick 2.8 arcsec*5 = 14 arcsec as the smallest consistent resolution for each distance.  This just makes things easier when comparing how much we have to bin at each distance (since if we can compare the same angular resolution at each distance this becomes a little easier, rather than comparing the different minimum angular resolution at each distance).

## Inputs:
homedir<br>
basedir


In [11]:
import numpy as np
import eagle_constants_and_units as c
import cosmo_utils as csu
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import mpl_toolkits.axes_grid1 as axgrid
from astropy import constants as const
from astropy import units as u

import os

#import get_halpha_SB

In [None]:
def imreduce_masked(img,factor,log=True,fillby='max'):
    """
    reduces the resolution of an image by taking the mean of individual elements
    takes in a mask to mask out values to be not included in the mean
        img: 2D image array
        mask: mask for the 2D image array
        factor: factor by which to reduce the number of array elements along each axis
    examples:
    for testing: 
        image = np.array([[1,1,2,2],[1,201,2,2],[3,3,200,4],[3,3,4,4]])
    mask your mask like this:  
        clipped = sigma_clip(image,sig=3,iters=2)
        mask = clipped.mask
    """
    return

def imreduce(img, factor, log=True, method = 'average'):
    """
    img: 2D image array
    factor: factor by which to reduce the number of array elements along each axis
    log: whether or not the array contains log data values
    """
    if log:
        inimg = 10**img
    else:
        inimg = img
    inshape = np.array(img.shape)

    if np.sum(inshape%factor) != 0:
        print('Output grid must have a integer number of cells: cannot reduce image pixels by a factor %i'%factor)
        return None
    inimg = np.array(np.split(inimg,inshape[0]/factor,axis=0))
    inimg = np.array(np.split(inimg,inshape[1]/factor,axis=-1))

    inimg = np.sum(inimg,axis=-1)
    inimg = np.sum(inimg,axis=-1)
    
    if method == 'average':
        inimg = inimg/np.float(factor**2)
        #outimg = np.average(inimg[])
    if log:
        inimg = np.log10(inimg)
    return inimg.T

In [6]:
def initstuff(testing):
    global homedir
    global files_SF_28
    global files_noSF_28
    homedir=''
    if machine=='chinook':
        homedir='/Users/lokhorst/Eagle/'
    elif machine=='coho':
        homedir='/Users/deblokhorst/eagle/SlicesFromNastasha/'
    ## Add the path to where the raw data is kept on your computer here
    
    files_SF_28 = [homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen12.5__fromSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen17.5__fromSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen2.5__fromSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen7.5__fromSFR.npz']

    files_noSF_28 = [homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen12.5_noSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen17.5_noSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen2.5_noSFR.npz',
                    homedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen7.5_noSFR.npz']

    if testing:
        'define an array to be used for testing - this will be in log10'
        size   = 320
        maxval = 10.
        fakedata = np.array([[x/(size/maxval)-maxval/2. for x in range(size)] for y in range(size)])
        np.savez(homedir+'fakedata.npz',fakedata)
        files_SF_28 = [homedir+'fakedata.npz',homedir+'fakedata.npz',homedir+'fakedata.npz',homedir+'fakedata.npz']
        files_noSF_28 = [homedir+'fakedata.npz',homedir+'fakedata.npz',homedir+'fakedata.npz',homedir+'fakedata.npz']
        
    sl = [slice(None,None,None), slice(None,None,None)]
    
    return homedir,files_SF_28,files_noSF_28,sl

def changeres(distance,resolution,data):
    """
    distance: one of 50Mpc, 100Mpc, 200Mpc, or 500Mpc
    resolution: desired output resolution (e.g. 14 arcsec, 100 arcsec, 500 arcsec, 1000 arcsec)
    data: the raw data (size 32000 x 32000 pixels, 100 Mpc x 100 Mpc)
    """
    pixscale_tomatchres = {'46Mpc': 0.221/1000.*(1.+0.0107),'92Mpc':0.437/1000.*(1.+0.0215)}
    pixscale            = {'50Mpc': 0.237/1000.*(1.+0.0115), '100Mpc': 0.477/1000.*(1.+0.0235),\
                           '200Mpc': 0.928/1000.*(1.+0.047) , '500Mpc': 2.178/1000.*(1.+0.12)} ### Mpc / arcsec (comoving)
    size = 32000. # pixels 
    scale = 100.  # Mpc 
    
    if dotests:
        print "TESTING IS TRUE in changeres"
        size  = 320. # pixels
        scale = 1.   # Mpc
    
    if distance in pixscale:
        simpixsize = scale/size ### Mpc / pixel is resolution of raw data 
        factor = round(pixscale[distance]*resolution/simpixsize)
        print("Will reduce resolution by a factor of %s."%factor)
        # LATER determine the current resolution of the data. FOR NOW assume current resolution is 100 Mpc/ 32000 pixels ~ 3 kpc/pixel

        'If the factors are not integer multiples of the size (32000), trim the data first and then imreduce it'
        if size%((factor)) != 0.:
            times_factor_fits_in = int(size/factor)
            newsize = times_factor_fits_in * factor
            print("Before reducing resolution, the original data was trimmed to size %s."%newsize)
            datanew = data[0:int(newsize),0:int(newsize)]
        else:
            datanew = data
            newsize = size
    else:
        print('distance needs to be one of: 50Mpc, 100Mpc, 200Mpc, or 500Mpc.  Others not supported atm.')
        return None
    
    if factor < 2.:
        return datanew, newsize, factor
    else:
        return imreduce(datanew, round(factor), log=True, method = 'average'), newsize, factor


def loadraw(files_SF_28,files_noSF_28,sl,index=0):
    print('Loading noSF data first ('+files_noSF_28[index]+')...')
    data1 = (np.load(files_noSF_28[index])['arr_0'])[sl]
    print('Loading SF data second ('+files_SF_28[index]+')...')
    data11 = (np.load(files_SF_28[index])['arr_0'])[sl]
    print('Adding together to make a 5 Mpc slice...')
    data = np.log10(10**data1+10**data11)
    print('Deleting intermediate files: noSF data, SF data...')
    del data1
    del data11
    return data

def load5Mpcslice(files_SF_28,files_noSF_28,sl):
    print('Loading a 5Mpc slice of data...')        
    'check if the SF plus noSF data file already exists...'
    total_fname = basedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen12.5_total.npz'
    if os.path.isfile(total_fname):
        print("Data exists, loading %s now..."%total_fname)
        data = (np.load(total_fname)['arr_0'])[sl]
    else:
        print("Data not saved, loading from original files now...")
        data = loadraw(files_SF_28,files_noSF_28,sl)
        print("Saving the summed data in %s..."%total_fname)
        np.savez(total_fname,data)
    return data

def loadslice(slicewidth,numslices,files_SF_28,files_noSF_28,sl):
    total_fname = basedir+'emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_%sslice_zcen12.5_total.npz'\
                    %slicewidth
    if os.path.isfile(total_fname):
        print("Data exists, loading %s now..."%total_fname)
        data = (np.load(total_fname)['arr_0'])[sl]
    else:
        print('Creating data of slicewidth %s Mpc (%s does not yet exist)...'\
              %(slicewidth,total_fname))
        print('First slice: index %s of %s'%(1,numslices))
        data = loadraw(files_SF_28,files_noSF_28,sl,index=0)
        for index in range(numslices-1):
            print('Next slice: index %s of %s'%(index+1,numslices))
            rawdata = loadraw(files_SF_28,files_noSF_28,sl,index=index+1)  
            data = np.log10(10**data+10**rawdata)
        print("Saving the summed data in %s..."%total_fname)
        del rawdata
        np.savez(total_fname,data)    
    return data
    
def loaddata(machine='coho',resolution=None, distance=None, slicewidth=5, testing=False):
    """
    Returns a tuple containing the data in the first index
    It contains the size of the data before resolution was reduced in the second index (just its own size if raw data)
    It contains the factor by which the data was reduced in the third index (just '1' if raw data)
    """
    homedir,files_SF_28,files_noSF_28,sl = initstuff(testing) # Simulation snapnum 28 (z = 0), xy box size: 100Mpc, z slice width: 5Mpc,
    global basedir
    global dotests
    dotests = False
    if testing:
        dotests = True
    basedir ='./intermediatedata/'
    if testing:
        basedir=basedir+'testing/'
    ## Can change the basedir of where your intermediatedata files will live here
    
    'Create outdir if it does not exist'
    if not os.path.isdir(basedir):
        os.makedirs(basedir)
    
    if resolution is None and distance is None and int(slicewidth) == 5:
        data = load5Mpcslice(files_SF_28,files_noSF_28,sl)
        data = np.array([data,data.shape[0],1]) # make into a tuple
        
    elif resolution is not None and int(slicewidth) == 5:
        # Note: Once the data has new resolution, it is saved as a tuple, with extra info
        # The format of the tuple is: data, trimmed_data_size_before_reducing, factor_by_which_reduced
        if distance is None:
            print('Need to input both a distance and a resolution. Exiting.')
            return None
        fname = basedir+'data_%s_%sarcsec.npz'%(str(distance),int(resolution))
        
        if fname == basedir+'data_50Mpc_14arcsec.npz':
            print ('%s exists (and is special because cant save as tuple), loading now...'%fname)
            sl = [slice(None,None,None), slice(None,None,None)]
            data = (np.load(fname)['arr_0'])[sl]
            return data,32000.,1.
        
        if os.path.isfile(fname):
            print('%s exists, loading now...'%fname)
            data_tuple = (np.load(fname)['arr_0'])
        else:
            print('%s does not exist, making it now...'%fname)
            rawdata = load5Mpcslice(files_SF_28,files_noSF_28,sl)
            print("Reducing the resolution to desired distance/resolution...")
            data_tuple = changeres(distance,resolution,rawdata)
            print("Saving the data to %s..."%fname)
            np.savez(fname,data_tuple)
        data = data_tuple # save the tuple to data

    elif int(slicewidth) != 5:
        numslices = int(round(slicewidth / 5.))
        slicewidth = int(numslices * 5.)
        print("Loading data of slicewidth %s (after rounding), number of slices is %s"\
              %(slicewidth,numslices))                   
        if resolution is not None:
            if distance is None:
                print('Need to input both a distance and a resolution.  Exiting.')
                return None
            fname = basedir+'data_%s_%sarcsec_%sslwd.npz'%(str(distance),int(resolution),int(slicewidth))

            if fname == basedir+'data_50Mpc_14arcsec_20slwd.npz':
                if os.path.isfile(fname):
                    print ('%s exists (and is special because cant save as tuple), loading now...'%fname)
                    sl = [slice(None,None,None), slice(None,None,None)]
                    data = (np.load(fname)['arr_0'])[sl]
                    return data,32000.,1.
                else:
                    print ('%s doesnt exist yet (but is special because cant save as tuple and is equal to the raw data), copying from...'%fname)
                    fulldata = loadslice(slicewidth,numslices,files_SF_28,files_noSF_28,sl)
                    print ('Saving %s as %s' %(fulldata,fname))
                    np.savez(fname,fulldata)
                    return fulldata,32000.,1.
                    
            if os.path.isfile(fname):
                print('%s exists, loading now...'%fname)
                data_tuple = (np.load(fname)['arr_0'])
            else:
                print('%s does not exist, making it now...'%fname)
                fulldata = loadslice(slicewidth,numslices,files_SF_28,files_noSF_28,sl)
                print("Reducing the resolution to desired distance/resolution...")
                data_tuple = changeres(distance,resolution,fulldata)
                print("DEBUGGING: data_tuple is: ")
                print data_tuple
                print("Saving the data to %s..."%fname)
                np.savez(fname,data_tuple)
            data = data_tuple # save the tuple to data

        else:
            data = loadslice(slicewidth,numslices,files_SF_28,files_noSF_28,sl)
            data = np.array([data,data.shape[0],1]) # make into a tuple
            
    return data

In [None]:
## TESTING SCRIPT ##
#machine = 'chinook'



In [34]:
### EXAMPLES ###
#machine='coho'

#distance = '50Mpc'; resolution = 100
#data_tuple = loaddata(machine=machine,resolution=resolution,distance=distance)
#data = data_tuple[0]

#data_tuple = loaddata(machine=machine)
#data = data_tuple[0]
######

Loading a 5Mpc slice of data...
data exists, loading emission_halpha_L0100N1504_28_test2_SmAb_C2Sm_32000pix_5.000000slice_zcen12.5_total.npz now...
