In this notebook we will load in and model a single lightcurve and making a new function to look at stats.

In [33]:
import pandas as pd
import pandas.testing as pdt
import numpy as np
import lcanalyzer.models as models

### Params

In [2]:
# Define the bands names
bands = 'ugrizy'

# Mag column
colname_mag = 'psfMag'

### Load in data

In [3]:
lc_datasets = {}
lc_datasets['lsst'] = pd.read_pickle('data/lsst_RRLyr.pkl')

### Inspect data

In [4]:
lc_datasets['lsst'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11177 entries, 0 to 11176
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   band         11177 non-null  object 
 1   ccdVisitId   11177 non-null  int64  
 2   coord_ra     11177 non-null  float64
 3   coord_dec    11177 non-null  float64
 4   objectId     11177 non-null  int64  
 5   psfFlux      11177 non-null  float64
 6   psfFluxErr   11177 non-null  float64
 7   psfMag       10944 non-null  float64
 8   ccdVisitId2  11177 non-null  int64  
 9   band2        11177 non-null  object 
 10  expMidptMJD  11177 non-null  float64
 11  zeroPoint    11177 non-null  float32
dtypes: float32(1), float64(6), int64(3), object(2)
memory usage: 1004.3+ KB


In [5]:
lc_datasets['lsst'].head()

Unnamed: 0,band,ccdVisitId,coord_ra,coord_dec,objectId,psfFlux,psfFluxErr,psfMag,ccdVisitId2,band2,expMidptMJD,zeroPoint
0,y,1032263018,62.462569,-44.11336,1251384969897480052,-515.183603,1697.21849,,1032263018,y,61100.069706,30.602301
1,y,1033987172,62.462569,-44.11336,1251384969897480052,3151.738459,1686.955775,22.653625,1033987172,y,61102.068464,30.6061
2,u,675163080,62.462569,-44.11336,1251384969897480052,183.449123,209.242045,25.741211,675163080,u,60582.247144,30.469101
3,y,443055067,62.462569,-44.11336,1251384969897480052,-704.848327,1624.400086,,443055067,y,60215.203585,30.612801
4,u,466722002,62.462569,-44.11336,1251384969897480052,382.472233,278.92667,24.9435,466722002,u,60261.078221,30.461201


### Select and prep single lightcurve

In [6]:
### Pick an object
obj_id = lc_datasets['lsst']['objectId'].unique()[4]

In [7]:
### Get all the observations for this obj_id for each band
# Create an empty dict
lc = {}
# For each band create a bool array that indicates
# that this observation belongs to a certain object and is made in a
# certain band
for b in bands:
    filt_band_obj = (lc_datasets['lsst']['objectId'] == obj_id) & (
        lc_datasets['lsst']['band'] == b
    )
    # Select the observations and store in the dict 'lc'
    lc[b] = lc_datasets['lsst'][filt_band_obj]

### Finding lightcurve params

In [8]:
models.max_mag(lc['g'], colname_mag)

19.183367224358136

### New calc stats function

In [36]:
def calc_stats(lc, bands, mag_col):
    # Calculate max, mean and min values for all bands of a light curve
    stats = {}
    for b in bands:
        stat = {}
        stat["max"] = models.max_mag(lc[b], mag_col)
        stat["mean"] = models.mean_mag(lc[b], mag_col)
        stat["min"] = models.min_mag(lc[b], mag_col)
        stats[b] = stat
    return pd.DataFrame.from_records(stats)

In [22]:
size = (4,3)
np.random.seed(0)
df1 = pd.DataFrame(data=np.random.randint(0, 10, size), columns=list("abc"))
np.random.seed(1)
df2 = pd.DataFrame(data=np.random.randint(0, 10, size), columns=list("abc"))
np.random.seed(2)
df3 = pd.DataFrame(data=np.random.randint(0, 10, size), columns=list("abc"))
print(df1)
print(df2)
print(df3)

   a  b  c
0  5  0  3
1  3  7  9
2  3  5  2
3  4  7  6
   a  b  c
0  5  8  9
1  5  0  0
2  1  7  6
3  9  2  4
   a  b  c
0  8  8  6
1  2  8  7
2  2  1  5
3  4  4  5


In [29]:
test_input = {"df1": df1, "df2": df2, "df3": df3}

In [31]:
test_output = pd.DataFrame(data=[[7,8,8],[4.75,4.25,5.25],[0,0,1]],columns=['df1','df2','df3'],index=['max','mean','min'])

In [37]:
pdt.assert_frame_equal(calc_stats(test_input, ["df1", "df2", "df3"], "b"),
                       test_output,
                       check_exact=False,
                       atol=0.01)
                       