Exploring various programming methods (see day 3)

In [166]:
import pandas as pd
import pandas.testing as pdt
import numpy as np
import lcanalyzer.models as models
from lcanalyzer import survey
from functools import reduce
import time
import collections.abc

### Define Parameters

In [14]:
# Define the bands names
bands = 'ugrizy'

# Mag column
mag_col = 'psfMag'

# Time column
time_col = 'expMidptMJD'

### Load in data

In [4]:
lc_datasets = {}
lc_datasets['lsst'] = pd.read_pickle('data/lsst_RRLyr.pkl')

In [66]:
# Create an empty list where we will be storing our light curves
lcs = []
# For each observed object
for obj_id in lc_datasets["lsst"]["objectId"].unique():
    # Create an empty dict for the light curves of this object
    lc = {}
    lc['objectId'] = obj_id
    for b in bands:
        filt_band_obj = (lc_datasets["lsst"]["objectId"] == obj_id) & (
            lc_datasets["lsst"]["band"] == b
        )
        # The observations in each band are converted to lists and stored as dict elements
        lc[b+'_'+mag_col] = np.array(lc_datasets["lsst"][filt_band_obj][mag_col])
        lc[b+'_'+time_col] = np.array(lc_datasets["lsst"][filt_band_obj][time_col])
    lcs.append(lc)
# Turn the list of dicts into a DataFrame    
lcs = pd.DataFrame.from_records(lcs)

### Process Nans

In [67]:
def lightcurve_nans2zero(lcs, bands):
    for b in bands:
        lcs['cleaned_{}_psfMag'.format(b)] = lcs['{}_psfMag'.format(b)].map(lambda x: np.where(np.isnan(x),0,x))
    return lcs

In [79]:
def lightcurve_remove_nans(lcs, bands):
    for b in bands:
        lcs['removed_nans_{}_psfMag'.format(b)] = lcs['{}_psfMag'.format(b)].map(lambda x: np.delete(x, np.where(np.isnan(x))))
        lcs['removed_nans_{}_expMidptMJD'.format(b)] = lcs['{}_psfMag'.format(b)].map(lambda x: np.delete(x, np.where(np.isnan(x))))
    return lcs

In [68]:
lcs_nan2zero = lightcurve_nans2zero(lcs, bands)

In [80]:
lcs_removed_nans = lightcurve_remove_nans(lcs, bands)

In [87]:
def lightcurve_remove_nans_instructor_solution(lcs,bands, mag_col, time_col):
    for b in bands:
        # Create column names variables for better readability
        mcol = b + "_" + mag_col
        tcol = b + "_" + time_col
        mcol_cl = mcol + "_cleaned"
        tcol_cl = tcol + "_cleaned"
        # The new cleaned columns, `mcol_cl` and `tcol_cl`, contain the result of applying
        # a lambda function to each row (`axis=1` argument). The lambda function returns a tuple
        # of two numpy arrays, filtered according to the mask that is `False` for the elements that
        # are NaNs and `True` to all other elements.
        lcs[[mcol_cl, tcol_cl]] = lcs.apply(
            lambda l: (
                l[mcol][~np.isnan(l[mcol])],
                l[tcol][~np.isnan(l[mcol])],
            ),
            axis=1,
            result_type="expand",
        )
    return lcs

In [85]:
print(len(lcs_removed_nans['u_psfMag'][0]))
print(len(lcs_removed_nans['u_expMidptMJD'][0]))
print(len(lcs_removed_nans['cleaned_u_psfMag'][0]))
print(len(lcs_removed_nans['removed_nans_u_psfMag'][0]))
print(len(lcs_removed_nans['removed_nans_u_expMidptMJD'][0]))


39
39
39
24
24


### Practice Reduce

In [93]:
sequence = np.arange(1, 5)

In [94]:
reduce((lambda a, b : a + b), sequence)

10

### Time performance decorator

In [124]:
def time_performance(func):
    
    def inner(*args, **kwargs):
        start = time.process_time_ns()
        result = func(*args, **kwargs)
        time_took = (time.process_time_ns() - start)/1e9
        
        print("Took {} seconds".format(time_took))
        return result
        
    return inner

In [125]:
@time_performance
def measure_me(n):
    total = 0
    for i in range(n):
        total += i * i

    return total

In [126]:

measure_me(10)

Took 7e-06 seconds


285

In [164]:
class Variable:
    """A Variable class"""
    def __init__(self, obj_id):
        self.obj_id = obj_id
        self.lc = {
                   'mjd': np.array([]),
                   'mag': np.array([])
                  }

    def __str__(self):
      return str(self.obj_id)

    def add_observations(self, mjds, mags, mag_errs=None):
        """
        Adds observations to the light curve.
    
        Args:
          mjds: A vector of Modified Julian Dates (x values).
          mags: A vector of luminosities (y values).
        """

        self.convert_to_array(mjds)
        self.convert_to_array(mags)
        
        observation_arrays = [mjds, mags]
        
        self.lc['mjd'] = np.array(mjds)
        self.lc['mag'] = np.array(mags)
        if mag_errs is not None:
            self.convert_to_array(mag_errs)
            observation_arrays.append(mag_errs)
            self.lc['mag_errs'] = np.array(mag_errs)

        self.compare_len(observation_arrays)

        return

    def __len__(self):
        """
        Returns length of lightcurve
        """
        return len(self.lc['mjd'])


    def convert_to_array(self, var):
        if not isinstance(var, np.ndarray):
            if isinstance(var, (list, tuple, pd.Series)):
                var = np.array(var)
            elif isinstance(var, (int, float)):
                var = np.array([var])
            else:
               raise ValueError("Data must be array-like, int, or float")

        return var

    def compare_len(self, arrays):
        for array in arrays:
            if len(array) != self.__len__():
                raise Exception("All observational arrays must have the same length")
                
    @property
    def mean_mag(self):
        return np.mean(self.lc['mags'])

In [165]:
obj_id = lc_datasets['lsst']['objectId'].unique()[7]
b = 'g'
filt_band_obj = (lc_datasets['lsst']['objectId'] == obj_id) & (
        lc_datasets['lsst']['band'] == b
    )
obj_obs = lc_datasets['lsst'][filt_band_obj]
star = Variable(obj_id)
star.add_observations(obj_obs[time_col],obj_obs[mag_col])
print(star.__len__())

44


In [161]:
star.compare_len([star.lc['mjd'], star.lc['mag']])

In [145]:
isinstance(obj_obs[mag_col], pd.core.series.Series)

False

### Practicing Object Oriented Programming

In [174]:
import importlib
importlib.reload(survey)

<module 'lcanalyzer.survey' from '/Users/nsabrams/Documents/RubinScience/IntermediatePython/InterPython_Workshop_Example/lcanalyzer/survey.py'>

In [175]:
survey_test = survey.Survey('data/lsst_RRLyr.pkl')

In [176]:
survey_test.get_lc(1251384969897480052, 'u')

{'mjd': array([60582.2471442, 60261.0782212, 61322.1785842, 60582.2776822,
        60261.0777762, 60731.0440582, 60613.1148232, 60673.1041662,
        60582.2695892, 61321.1909252, 59904.0842892, 60613.1327722,
        61321.1752202, 61321.1747752, 60998.0695872, 61297.2892992,
        61322.1869182, 60585.2017922, 60970.1382022, 60261.0786692,
        60557.2808162, 61322.1790312, 60998.0700362, 60970.1579432,
        59819.2946332, 61297.2830082, 59874.2225572, 59818.3869242,
        60970.1583922, 59851.2883982, 59584.1123332, 61297.2834572,
        60702.1126702, 60585.2004502, 61322.1864712, 60200.2522912,
        60998.0686642, 61380.1195292, 60731.0534842]),
 'mag': array([25.7412109 , 24.94350022,         nan,         nan, 25.5851343 ,
                nan, 27.54439979, 25.06830723, 27.25449578,         nan,
                nan, 28.2117262 , 25.42959483, 25.01922754, 25.46825047,
                nan, 25.1103313 , 25.42262994,         nan,         nan,
        25.40161976,       

In [177]:
survey_test.data

Unnamed: 0,band,ccdVisitId,coord_ra,coord_dec,objectId,psfFlux,psfFluxErr,psfMag,ccdVisitId2,band2,expMidptMJD,zeroPoint
0,y,1032263018,62.462569,-44.113360,1251384969897480052,-515.183603,1697.218490,,1032263018,y,61100.069706,30.602301
1,y,1033987172,62.462569,-44.113360,1251384969897480052,3151.738459,1686.955775,22.653625,1033987172,y,61102.068464,30.606100
2,u,675163080,62.462569,-44.113360,1251384969897480052,183.449123,209.242045,25.741211,675163080,u,60582.247144,30.469101
3,y,443055067,62.462569,-44.113360,1251384969897480052,-704.848327,1624.400086,,443055067,y,60215.203585,30.612801
4,u,466722002,62.462569,-44.113360,1251384969897480052,382.472233,278.926670,24.943500,466722002,u,60261.078221,30.461201
...,...,...,...,...,...,...,...,...,...,...,...,...
11172,i,428582024,66.267960,-27.741765,2185793931646635508,544023.409292,922.343038,17.060956,428582024,i,60193.365488,31.846800
11173,i,1173720158,66.267960,-27.741765,2185793931646635508,393272.358975,768.388803,17.413266,1173720158,i,61305.324734,31.849400
11174,i,1173719142,66.267960,-27.741765,2185793931646635508,342286.052877,720.070098,17.564027,1173719142,i,61305.324286,31.846701
11175,i,238057099,66.267960,-27.741765,2185793931646635508,547265.490786,910.999082,17.054505,238057099,i,59908.119769,31.843300
