In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

import healpy as hp
from scipy.spatial import ConvexHull

import lsdb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import astropy.units as u
import glob as glob

import lsst.daf.butler as dafButler
from lsst.analysis.ap import apdb
from lsst.ap.association import AssociationTask, AssociationConfig
from lsst.dax.apdb import Apdb, ApdbCassandra, ApdbTables
import lsst.geom as geom
from lsst.afw import image as afwImage

from mpl_toolkits.axes_grid1 import make_axes_locatable
from astropy.visualization import ZScaleInterval, SqrtStretch, ImageNormalize, ManualInterval, AsinhStretch, MinMaxInterval, LogStretch
from scipy.optimize import curve_fit

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

from typing import TYPE_CHECKING, cast
from pathlib import Path

import astropy.units as u
from astropy.coordinates import SkyCoord

plt.set_loglevel('WARNING')

# Loading Vizier and functions

In [None]:

# Base directory where the files are
base_dir = Path("/sdf/home/n/ncaplar")

# List of filenames
filenames = ["m49_period.tsv"]

# Column names based on your file structure
column_names = [
    '_RAJ2000', '_DEJ2000', 'Source', 'PF', 'P1O',
    'Gmagavg', 'R21G', 'R31G', 'phi21G','phi31G', 'FundFreq1', 'FundFreq2', 'Class'
]

# Dictionary to store the dataframes
dfs = {}

for file in filenames:
    file_path = base_dir / file
    
    # Read and find where the actual data starts
    with open(file_path, "r") as f:
        lines = f.readlines()

    start_idx = None
    for i, line in enumerate(lines):
        if line.strip().startswith('_RAJ2000|'):
            start_idx = i
            break

    if start_idx is None:
        raise ValueError(f"Could not find table header in {file}")

    data_start = start_idx + 3  # skip header + units + dashed lines

    # Read the actual table
    df = pd.read_csv(
        file_path,
        sep="|",
        skiprows=data_start,
        names=column_names,
        engine="python",
    )

    df.columns = df.columns.str.strip()
    dfs[file.replace(".tsv", "")] = df  # store with nice key like '212_m7'

# Unpack individual DataFrames
df_m49 = dfs["m49_period"]

# Example: print how many rows were loaded
for name, df in dfs.items():
    print(f"{name}: {len(df)} rows loaded.")

In [None]:
import pandas as pd
from pathlib import Path

# Base directory where the files are
base_dir = Path("/sdf/home/n/ncaplar")

# List of filenames
filenames = ["212_m7.tsv", "216_m17.tsv", "m49.tsv"]

# Column names based on your file structure
column_names = [
    '_RAJ2000', '_DEJ2000', 'Source', 'PF', 'P1O',
    'Gmagavg', 'RA_ICRS', 'DE_ICRS'
]

# Dictionary to store the dataframes
dfs = {}

for file in filenames:
    file_path = base_dir / file
    
    # Read and find where the actual data starts
    with open(file_path, "r") as f:
        lines = f.readlines()

    start_idx = None
    for i, line in enumerate(lines):
        if line.strip().startswith('_RAJ2000|'):
            start_idx = i
            break

    if start_idx is None:
        raise ValueError(f"Could not find table header in {file}")

    data_start = start_idx + 3  # skip header + units + dashed lines

    # Read the actual table
    df = pd.read_csv(
        file_path,
        sep="|",
        skiprows=data_start,
        names=column_names,
        engine="python",
    )

    df.columns = df.columns.str.strip()
    dfs[file.replace(".tsv", "")] = df  # store with nice key like '212_m7'

# Unpack individual DataFrames
df_212_m7 = dfs["212_m7"]
df_216_m17 = dfs["216_m17"]
df_m49 = dfs["m49"]

# Example: print how many rows were loaded
for name, df in dfs.items():
    print(f"{name}: {len(df)} rows loaded.")

In [None]:
df_m49 

In [None]:

# Inputs: time (MJD), mag_obs, mag_err
# These must be numpy arrays of same length
def fit_constrained_fourier(time, mag_obs, mag_err, period, R21, R31, phi21, phi31):
    # Phase-fold the time
    phase = (time % period) / period

    # Define constrained model with only 3 free params
    def constrained_model(phase, A0, A1, phi1):
        A2 = R21 * A1
        A3 = R31 * A1
        phi2 = 2 * phi1 + phi21
        phi3 = 3 * phi1 + phi31

        return (
            A0
            + A1 * np.cos(2 * np.pi * 1 * phase + phi1)
            + A2 * np.cos(2 * np.pi * 2 * phase + phi2)
            + A3 * np.cos(2 * np.pi * 3 * phase + phi3)
        )

    # Initial guess
    A0_init = np.median(mag_obs)
    A1_init = 0.3
    phi1_init = 0.0
    p0 = [A0_init, A1_init, phi1_init]

    # Fit
    popt, _ = curve_fit(
        constrained_model, phase, mag_obs, p0=p0,
        sigma=mag_err, absolute_sigma=False, maxfev=10000
    )

    # Return fitted parameters and a model evaluation function
    def model_function(ph):
        return constrained_model(ph, *popt)

    return popt, model_function

In [None]:
def create_mag_errors(sciFlux, sciFluxErr):
    """Convert flux into magnitudes and compute magnitude error with a lower limit.
    
    Parameters
    ----------
    sciFlux : `float` or array-like
        Science flux
    sciFluxErr : `float` or array-like
        Science flux error

    Returns
    -------
    mag, magErr : `float` or array-like
        Magnitude and magnitude error
    """
    mag = u.nJy.to(u.ABmag, sciFlux)
    upper_mag = u.nJy.to(u.ABmag, sciFlux + sciFluxErr)
    lower_mag = u.nJy.to(u.ABmag, sciFlux - sciFluxErr)
    magErr = -(upper_mag - lower_mag) / 2

    # Enforce minimum error
    magErr = np.maximum(magErr, 0.001)

    return mag, magErr

def create_mag(sciFlux):
    """Move flux into magnitudes
    
    Parameters
    ----------
    sciFlux : `float`
        Science flux

    Returns
    -------
    mag  : `float`
        Magnitude
    """
    
    mag = u.nJy.to(u.ABmag, sciFlux)
    
    return mag

# LSST butler

In [None]:
repo = "embargo"
collection = "LSSTCam/runs/DRP/20250415_20250422/d_2025_04_23/DM-50409"
instrument = "LSSTCam"

butler = dafButler.Butler(repo, collections=collection, instrument=instrument)

In [None]:
repo = "embargo"
collection = "LSSTCam/runs/DRP/20250420_20250429/w_2025_18/DM-50628"
collection_w19 = "LSSTCam/runs/DRP/FL/w_2025_19/DM-50795"
instrument = "LSSTCam"

butler = dafButler.Butler(repo, collections=collection, instrument=instrument)
butler_w19 = dafButler.Butler(repo, collections=collection_w19, instrument=instrument)

In [None]:
obj_refs = butler.query_datasets("object")

diaobj_refs = butler.query_datasets("dia_object")
lc_refs = butler.query_datasets("dia_source")
diaobj_refs_w19 = butler_w19.query_datasets("dia_object")
lc_refs_w19 = butler_w19.query_datasets("dia_source")

print(len(obj_refs))
print(len(diaobj_refs))
print(len(lc_refs))

print(len(diaobj_refs_w19))
print(len(lc_refs_w19))

In [None]:
"""
bands = ["u", "g", "r", "i", "z", "y"]
verbose = False

for band in bands:
    try:
        datasetRefs_dia = list(
            butler.query_datasets(
                "dia_source_detector",
                where=f"band='{band}'",
                limit=None  # Remove dataset limit
            )
        )
    except Exception as e:
        print(f"Error querying band '{band}': {e}")
        datasetRefs_dia = []

    if verbose and datasetRefs_dia:
        print(f"\nDataset references for band '{band}':")
        for dr in datasetRefs_dia:
            print(dr)

    print(f"Found {len(datasetRefs_dia)} dia_source_detector datasets for band '{band}'")
"""

In [None]:
"""

# Initialize list to hold object DataFrames
object_list = []

# Loop over tracts via obj_refs
for obj_ref in tqdm(obj_refs):
    try:
        # Load full object table for this tract
        table = butler.get("object", dataId=obj_ref.dataId, parameters={'columns': ['objectId', 'tract', 'patch',
                                                                                    'u_ra', 'u_dec', 'u_psfFlux', 'u_psfFluxErr',
                                                                                    'g_ra', 'g_dec', 'g_psfFlux', 'g_psfFluxErr',
                                                                                    'r_ra', 'r_dec', 'r_psfFlux', 'r_psfFluxErr',
                                                                                    'i_ra', 'i_dec', 'i_psfFlux', 'i_psfFluxErr']})
        # table = butler.get("object", dataId=obj_ref.dataId)
        df = table.to_pandas()
        object_list.append(df)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {obj_ref.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
objects = pd.concat(object_list, ignore_index=True) if object_list else pd.DataFrame()

"""

In [None]:
"""
objects['mean_ra'] = objects[[f'{b}_ra' for b in ['u', 'g', 'r', 'i']]].mean(axis=1, skipna=True)
objects['mean_dec'] = objects[[f'{b}_dec' for b in ['u', 'g', 'r', 'i']]].mean(axis=1, skipna=True)
objects
"""

In [None]:
# Initialize list to hold object DataFrames
diaobject_list = []

# Loop over tracts via obj_refs
for diaobj_ref in tqdm(diaobj_refs):
    try:
        # Load full object table for this tract
        table = butler.get("dia_object", dataId=diaobj_ref.dataId)
        # table = butler.get("object", dataId=obj_ref.dataId)
        diaobject_list.append(table)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {diaobject_list.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
diaobjects = pd.concat(diaobject_list, ignore_index=False) if diaobject_list else pd.DataFrame()

In [None]:
# Initialize list to hold object DataFrames
diaobject_list_w19 = []

# Loop over tracts via obj_refs
for diaobj_ref in tqdm(diaobj_refs_w19):
    try:
        # Load full object table for this tract
        table = butler_w19.get("dia_object", dataId=diaobj_ref.dataId)
        # table = butler.get("object", dataId=obj_ref.dataId)
        diaobject_list_w19.append(table)
    except FileNotFoundError:
        print(f"Warning: objectTable_tract not found for tract {diaobject_list.dataId['tract']}. Skipping.")

# Concatenate into one DataFrame
diaobjects_w19 = pd.concat(diaobject_list_w19, ignore_index=False) if diaobject_list_w19 else pd.DataFrame()

In [None]:
diaobjects

In [None]:
diaobjects_w19

In [None]:
#plt.scatter(diaobjects['ra'], diaobjects['dec'], s=1, alpha=0.5)

In [None]:
# objects_m49 = objects[objects['mean_ra'].between(180, 192) & objects['mean_dec'].between(1, 13)]
diaobjects_m49 = diaobjects[diaobjects['ra'].between(180, 192) & diaobjects['dec'].between(1, 13)]
diaobjects_m49_w19 = diaobjects_w19[diaobjects_w19['ra'].between(180, 192) & diaobjects_w19['dec'].between(1, 13)]

In [None]:
diaobjects_m49 = diaobjects_m49.copy()
diaobjects_m49['diaObjectId'] = diaobjects_m49.index

diaobjects_m49_w19 = diaobjects_m49_w19.copy()
diaobjects_m49_w19['diaObjectId'] = diaobjects_m49_w19.index

df_m49['ra'] = df_m49['_RAJ2000']
df_m49['dec'] = df_m49['_DEJ2000']

In [None]:
RR_LSST = lsdb.crossmatch(df_m49[['ra', 'dec', 'PF','P1O']], diaobjects_m49[['ra', 'dec', 'diaObjectId']].reset_index(drop=True), radius_arcsec=0.10)
RR_LSST_computed = RR_LSST.compute()

RR_LSST_w19 = lsdb.crossmatch(df_m49, diaobjects_m49_w19[['ra', 'dec', 'diaObjectId']].reset_index(drop=True), radius_arcsec=0.10)
RR_LSST_computed_w19 = RR_LSST_w19.compute()

In [None]:
RR_LSST_computed.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed.parquet")
RR_LSST_computed_w19.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_w19.parquet")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(8, 2.5), sharey=True)

axes[0].hist(RR_LSST_computed['_dist_arcsec'], bins=30)
axes[0].set_title('RR_LSST w18')
axes[0].set_xlabel('Dist (arcsec)')

axes[1].hist(RR_LSST_computed_w19['_dist_arcsec'], bins=30)
axes[1].set_title('RR_LSST w19')
axes[1].set_xlabel('Dist (arcsec)')

plt.tight_layout()
plt.show()

In [None]:
# this is a bit silly because I have tract patch information in the object table

# Get the skymap once
skymap = butler.get("skyMap", skymap="lsst_cells_v1")

# Prepare storage for tract and patch values
tracts = []
patches = []

# Loop through the table and compute tract/patch for each coordinate
for ra, dec in zip(RR_LSST_computed["ra_left"], RR_LSST_computed["dec_left"]):
    radec = geom.SpherePoint(ra, dec, geom.degrees)
    tractInfo = skymap.findTract(radec)
    patchInfo = tractInfo.findPatch(radec)
    
    tr_id = tractInfo.getId()
    pt_idx = patchInfo.getIndex()  # tuple like (x, y)
    
    tracts.append(tr_id)
    patches.append(f"{pt_idx[0]},{pt_idx[1]}")  # optional: convert to string for easier saving/plotting

# Add as new columns to the original DataFrame
RR_LSST_computed["tract"] = tracts
RR_LSST_computed["patch"] = patches

In [None]:
# this is a bit silly because I have tract patch information in the object table

# Get the skymap once
skymap = butler.get("skyMap", skymap="lsst_cells_v1")

# Prepare storage for tract and patch values
tracts = []
patches = []

# Loop through the table and compute tract/patch for each coordinate
for ra, dec in zip(RR_LSST_computed_w19["ra_left"], RR_LSST_computed_w19["dec_left"]):
    radec = geom.SpherePoint(ra, dec, geom.degrees)
    tractInfo = skymap.findTract(radec)
    patchInfo = tractInfo.findPatch(radec)
    
    tr_id = tractInfo.getId()
    pt_idx = patchInfo.getIndex()  # tuple like (x, y)
    
    tracts.append(tr_id)
    patches.append(f"{pt_idx[0]},{pt_idx[1]}")  # optional: convert to string for easier saving/plotting

# Add as new columns to the original DataFrame
RR_LSST_computed_w19["tract"] = tracts
RR_LSST_computed_w19["patch"] = patches

In [None]:
# list of unique tract values
RR_LSST_computed['tract'].unique()

In [None]:
RR_LSST_computed_w19['tract'].unique()

In [None]:
RR_LSST_computed

In [None]:
RR_LSST_computed_w19

In [None]:
tract_ids = RR_LSST_computed['tract'].unique()
tract_list_str = ", ".join(str(t) for t in tract_ids)

forcedSource_refs = butler.query_datasets(
    "dia_object_forced_source",
    where=f"tract IN ({tract_list_str}) AND skymap='lsst_cells_v1'"
)

print(len(forcedSource_refs))

In [None]:
tract_ids = RR_LSST_computed_w19['tract'].unique()
tract_list_str = ", ".join(str(t) for t in tract_ids)

forcedSource_refs_w19 = butler_w19.query_datasets(
    "dia_object_forced_source",
    where=f"tract IN ({tract_list_str}) AND skymap='lsst_cells_v1'"
)

print(len(forcedSource_refs_w19))

In [None]:
# rerun this cell with the new data and butler 
"""
# Step 1: Prepare your list of target objectIds
target_objectIds = set(RR_LSST_computed['diaObjectId_right'].values)

# Step 3: Loop through datasets and filter by objectId
matched_forced_sources = []

for ref in tqdm(forcedSource_refs):
    try:
        table = butler.get(ref)  # Astropy table
        df = table.to_pandas()

        # Filter only the objectIds you're interested in
        df_filtered = df[df["diaObjectId"].isin(target_objectIds)]
        if not df_filtered.empty:
            matched_forced_sources.append(df_filtered)

    except Exception as e:
        print(f"Warning: Failed to load {ref.dataId} — {e}")

# Step 4: Combine all matched rows into one DataFrame
if matched_forced_sources:
    all_forced_sources = pd.concat(matched_forced_sources, ignore_index=True)
    print(f"Retrieved {len(all_forced_sources)} forced source rows for {len(target_objectIds)} objectIds.")
else:
    all_forced_sources = pd.DataFrame()
    print("No forced sources found.")
"""

In [None]:
"""
# Step 1: Prepare your list of target objectIds
target_objectIds = set(RR_LSST_computed_w19['diaObjectId_right'].values)

# Step 3: Loop through datasets and filter by objectId
matched_forced_sources = []

for ref in tqdm(forcedSource_refs_w19):
    try:
        table = butler_w19.get(ref)  # Astropy table
        df = table.to_pandas()

        # Filter only the objectIds you're interested in
        df_filtered = df[df["diaObjectId"].isin(target_objectIds)]
        if not df_filtered.empty:
            matched_forced_sources.append(df_filtered)

    except Exception as e:
        print(f"Warning: Failed to load {ref.dataId} — {e}")

# Step 4: Combine all matched rows into one DataFrame
if matched_forced_sources:
    all_forced_sources_w19 = pd.concat(matched_forced_sources, ignore_index=True)
    print(f"Retrieved {len(all_forced_sources)} forced source rows for {len(target_objectIds)} objectIds.")
else:
    all_forced_sources_w19 = pd.DataFrame()
    print("No forced sources found.")
"""

In [None]:
# all_forced_sources_w19.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources_w19.parquet")
# all_forced_sources.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources.parquet")

In [None]:
all_forced_sources = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources.parquet")
all_forced_sources_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources_w19.parquet")

In [None]:
target_objectIds = all_forced_sources['diaObjectId'].unique()
target_objectIds_w19 = all_forced_sources_w19['diaObjectId'].unique()

In [None]:
# add length of lightcurve to RR_LSST_computed 

# Step 1: Count number of forced source rows per objectId
lc_lengths = (
    all_forced_sources
    .groupby("diaObjectId")
    .size()
    .reset_index(name="lc_length")  # column with the lightcurve length
)

# Step 2: Merge into RR_LSST_computed using objectId_right
RR_LSST_computed_with_length = RR_LSST_computed.merge(
    lc_lengths,
    how='left',
    left_on='diaObjectId_right',
    right_on='diaObjectId'
)

# Optional: drop duplicate objectId column if desired
RR_LSST_computed_with_length = RR_LSST_computed_with_length.drop(columns=["diaObjectId"])


# Step 1: Count number of forced source rows per objectId
lc_lengths = (
    all_forced_sources_w19
    .groupby("diaObjectId")
    .size()
    .reset_index(name="lc_length")  # column with the lightcurve length
)

# Step 2: Merge into RR_LSST_computed using objectId_right
RR_LSST_computed_with_length_w19 = RR_LSST_computed_w19.merge(
    lc_lengths,
    how='left',
    left_on='diaObjectId_right',
    right_on='diaObjectId'
)

# Optional: drop duplicate objectId column if desired
RR_LSST_computed_with_length_w19 = RR_LSST_computed_with_length_w19.drop(columns=["diaObjectId"])

In [None]:
# Remove entries that are empty or contain only whitespace
mask = RR_LSST_computed_with_length['PF_left'].str.strip() != ''
RR_LSST_computed_with_length = RR_LSST_computed_with_length[mask].copy()

mask = RR_LSST_computed_with_length_w19['PF_left'].str.strip() != ''
RR_LSST_computed_with_length_w19 = RR_LSST_computed_with_length_w19[mask].copy()

# Optionally convert 'PF_left' to float
RR_LSST_computed_with_length['PF_left'] = RR_LSST_computed_with_length['PF_left'].astype(float)
RR_LSST_computed_with_length_w19['PF_left'] = RR_LSST_computed_with_length_w19['PF_left'].astype(float)

In [None]:
RR_LSST_computed_long = RR_LSST_computed_with_length[(RR_LSST_computed_with_length['lc_length']>120) ]
RR_LSST_computed_long_w19 = RR_LSST_computed_with_length_w19[(RR_LSST_computed_with_length_w19['lc_length']>120) ]


In [None]:
RR_LSST_computed_long.head(6)

In [None]:
RR_LSST_computed_long_w19.head(6)

In [None]:
# parameters of good RR Lyrae
RR_LSST_computed_long.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long.parquet")
RR_LSST_computed_long_w19.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long_w19.parquet")

In [None]:
RR_LSST_computed_long= pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long.parquet")
RR_LSST_computed_long_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long_w19.parquet")

In [None]:
RR_LSST_computed_long_w19

In [None]:
import pandas as pd

# Show up to 42 rows in notebook or terminal
pd.set_option("display.max_rows", 42)

# Now display the DataFrame
RR_LSST_computed_long_w19[["Source_left", "diaObjectId_right"]]

In [None]:
obj_id_longest = int(RR_LSST_computed_long[RR_LSST_computed_long['lc_length'] == np.max(RR_LSST_computed_long['lc_length'])]['diaObjectId_right'])

In [None]:
# example of a lightcurve
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == obj_id_longest]
single_lc

In [None]:
visit_ids = all_forced_sources['visit'].unique()
np.save("/sdf/home/n/ncaplar/rrlyrae_lightcurves/visit_ids.npy", visit_ids)

visit_ids_w19 = all_forced_sources_w19['visit'].unique()
np.save("/sdf/home/n/ncaplar/rrlyrae_lightcurves/visit_ids_w19.npy", visit_ids_w19)

In [None]:
times = np.load('/sdf/home/n/ncaplar/rrlyrae_lightcurves/times.npy')
times_w19 = np.load('/sdf/home/n/ncaplar/rrlyrae_lightcurves/times_w19.npy')

In [None]:
filtered_visits_df = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/filtered_visits_df.parquet")
filtered_visits_df_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/filtered_visits_df_w19.parquet")

In [None]:
filtered_visits_df['visit_id']

In [None]:
filtered_visits_df_w19['visit_id']

In [None]:
# Sanity check that the length matches
assert len(visit_ids) == len(times), "Mismatch in number of visits and MJD times"
assert len(visit_ids_w19) == len(times_w19), "Mismatch in number of visits and MJD times"

# Step 3: Create a mapping dictionary
visit_to_mjd = dict(zip(filtered_visits_df['visit_id'], filtered_visits_df['exp_midpt_mjd']))
visit_to_mjd_w19 = dict(zip(filtered_visits_df_w19['visit_id'], filtered_visits_df_w19['exp_midpt_mjd']))

# Step 4: Map the MJD to the all_forced_sources DataFrame
all_forced_sources['mjd'] = all_forced_sources['visit'].map(visit_to_mjd)
all_forced_sources_w19['mjd'] = all_forced_sources_w19['visit'].map(visit_to_mjd_w19)

In [None]:
object_id = RR_LSST_computed_long['diaObjectId_right'].unique()[11]
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()
single_lc

In [None]:
id_w19 = RR_LSST_computed_long_w19['diaObjectId_right'].unique() 

In [None]:
# Count number of rows per diaObjectId
lightcurve_lengths = all_forced_sources_w19.groupby('diaObjectId').size()

# Get top 30 diaObjectIds with longest lightcurves
top_30_ids = lightcurve_lengths.nlargest(50).index

# If you want the actual DataFrame entries for these top 30
top_30_lcs = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'].isin(top_30_ids)]
top_30_lcs

In [None]:
# Ensure both are numpy arrays
id_w19_array = np.array(id_w19)
top_30_ids_array = top_30_lcs['diaObjectId'].unique()

# Compute intersection
intersect_ids_w19 = np.intersect1d(id_w19_array, top_30_ids_array)
print(len(intersect_ids_w19), "common IDs found in both lists.")
intersect_ids_w19

In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(single_lc['mjd'], single_lc['psfFluxErr'], s=1, label='w18', alpha=0.5)
# plt.scatter(single_lc_w19['mjd'], single_lc_w19['psfFluxErr'], s=1, label='w19', alpha=0.5)

# W18 fit

In [None]:
good_index =  np.array([70927777120911428, 72591613091643579, 74238475351621650, 72584053949202434, 74241224130691123, 74242667239702535, 74249126870515736, 74244522665574412,  75894133704622083, 75895508094156806 ])

In [None]:
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == 74241224130691123].copy()

# Identify all columns that contain 'flag'
flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]

# Exclude rows where any flag column is True
flag_mask = ~(single_lc[flag_cols].any(axis=1))  # True where all flags are False
single_lc = single_lc[flag_mask]
single_lc[flag_mask]

In [None]:
# w18

band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown"
}
# Sinusoid model: phase in [0, 1]
def sinusoid(phase, A, phi0, mean_mag):
    return mean_mag + A * np.sin(2 * np.pi * phase + phi0)

# Convert flux to magnitude
def create_mag(sciFlux):
    return u.nJy.to(u.ABmag, sciFlux)
# for object_id in RR_LSST_computed_long['diaObjectId_right'].unique():
for object_id in good_index:
    # Get one object's lightcurve and period
    #object_id = RR_LSST_computed_long['objectId_right'].values[7]
    single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()
    
    # Identify all columns that contain 'flag'
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]

    # Exclude rows where any flag column is True
    flag_mask = ~(single_lc[flag_cols].any(axis=1))  # True where all flags are False
    single_lc = single_lc[flag_mask]
    
    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
    single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]
    #single_lc_with_columns.to_parquet(
    #    "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/test/w_18_objectId_"
    #    + str(int(single_lc_with_columns['diaObjectId'].values[0])) + ".parquet"
    #)    
    
    period = float(RR_LSST_computed_long[RR_LSST_computed_long['diaObjectId_right'] == int(object_id)]['PF_left'].values[0])
    if pd.isna(period):
        print(f"No period for object {object_id}")
    else:
        fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
        global_ymin = np.inf
        global_ymax = -np.inf



        for band, group in single_lc.groupby("band"):
            time = group["mjd"].values.astype(float)
            visit = group["visit"].values.astype(int)
            flux = group["psfFlux"].values.astype(float)
            flux_err = group["psfFluxErr"].values.astype(float)

            mag, mag_err = create_mag_errors(flux, flux_err)
            # Save the lightcurve (including computed mags and errors)

            
            phase = (time % period) / period
            valid = np.isfinite(mag) & np.isfinite(phase) & np.isfinite(mag_err)

            # Save the lightcurve (including computed mags and errors)
            lc_df = pd.DataFrame({
                "diaobjectId": object_id,
                "band": band,
                "visit": visit,
                "mjd": time,
                "psfFlux": flux,
                "psfFluxErr": flux_err,
                "mag": mag,
                "magErr": mag_err,
                "phase": phase
            })[valid]  # only keep valid rows

            lightcurve_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/test/w_18_objectId_lightcurve_{int(object_id)}_{band}.parquet"
            lc_df.to_parquet(lightcurve_path, index=False)
            # Fit sinusoid model
            try:
                popt, _ = curve_fit(
                    sinusoid, phase[valid], mag[valid],
                    p0=[0.1, 0.0, np.nanmean(mag)],
                    sigma=mag_err[valid],
                    absolute_sigma=True
                )
                A_fit, phi0_fit, mean_mag_fit = popt
                # Save fit parameters to Parquet
                fit_df = pd.DataFrame({
                    "objectId": [object_id],
                    "band": [band],
                    "period": [period],
                    "A": [A_fit],
                    "phi0": [phi0_fit],
                    "mean_mag": [mean_mag_fit]
                })
                parquet_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/test/w_18_objectId_popt_{int(object_id)}_{band}.parquet"
                fit_df.to_parquet(parquet_path, index=False)
                
                
                
            except Exception as e:
                print(f"Fit failed for band {band}: {e}")
                continue

            # Model values
            phase_model = np.linspace(0, 1, 500)
            mag_model_phase = sinusoid(phase_model, A_fit, phi0_fit, mean_mag_fit)

            t_model = np.linspace(np.min(time), np.max(time), 1000)
            phase_t_model = (t_model % period) / period
            mag_model_time = sinusoid(phase_t_model, A_fit, phi0_fit, mean_mag_fit)

            # Update global min/max for zoomed y-axis
            combined_mags = np.concatenate([mag[valid], mag_model_phase, mag_model_time])
            global_ymin = min(global_ymin, np.nanmin(combined_mags))
            #global_ymean = np.mean(global_ymean, np.nanmin(combined_mags))
            global_ymax = max(global_ymax, np.nanmax(combined_mags))

            # Plot: time-domain
            axes[0].errorbar(time, mag, yerr=mag_err, fmt='o', label=f"{band}-band",
                            alpha=0.6, color=band_colors.get(band, 'gray'), capsize=3)
            axes[0].plot(t_model, mag_model_time, '--',
                        label=f"Fit ({band})", color=band_colors.get(band, 'gray'))

            # Plot: phase-folded
            axes[1].errorbar(phase, mag, yerr=mag_err, fmt='o', label=f"{band}-band",
                            alpha=0.6, color=band_colors.get(band, 'gray'), capsize=3)
            axes[1].plot(phase_model, mag_model_phase, '--',
                        label=f"Fit ({band})", color=band_colors.get(band, 'gray'))

        # Final plot adjustments
        for ax in axes:
            ax.invert_yaxis()
            ax.legend()
            ax.grid(True)
            ax.set_ylabel("Magnitude")

        axes[0].set_xlabel("MJD")
        axes[0].set_title("Time Domain")

        axes[1].set_xlabel("Phase")
        axes[1].set_xlim(0, 1)
        axes[1].set_title("Phase Folded")

        # Apply padded y-limits
        pad = 0.1 * (global_ymax - global_ymin)
        axes[0].set_ylim(global_ymax + pad, global_ymin - pad)
        axes[1].set_ylim(global_ymax + pad, global_ymin - pad)

        plt.suptitle(f"Object ID {object_id} / Period = {period:.4f} days")
        plt.tight_layout()
        plt.subplots_adjust(top=0.88)
        plt.show()


# W19 original fit

In [None]:
RR_LSST_computed_long_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long_w19.parquet")
all_forced_sources_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources_w19.parquet")
visit_ids_w19 = all_forced_sources_w19['visit'].unique()
times_w19 = np.load('/sdf/home/n/ncaplar/rrlyrae_lightcurves/times_w19.npy')
filtered_visits_df_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/filtered_visits_df_w19.parquet")
visit_to_mjd_w19 = dict(zip(filtered_visits_df_w19['visit_id'], filtered_visits_df_w19['exp_midpt_mjd']))
all_forced_sources_w19['mjd'] = all_forced_sources_w19['visit'].map(visit_to_mjd_w19)

In [None]:
intersect_ids_w19 = np.array([69261123651633168, 70922760599109657, 70922898038063320,
       70927777120911374, 70930113583120389, 70934168032247856,
       72578144074203143, 72582679559667755, 72584053949202435,
       72584191388155907, 72585222180306950, 72585771936120856,
       72586871447748649, 72591613091643426, 72597866564026416,
       72598553758793772, 72601233818386462, 74238475351621651,
       74239231265865808, 74240536935923747, 74241224130691109,
       74241636447551510, 74242667239702537, 74243423153946646,
       74244522665574414, 74246446810923009, 74247477603074095,
       74247615042027587, 74249126870515732, 74249195589992487,
       74252562844352545, 74255242903945239, 74256479854526548,
       74257304488247305, 74258060402491403, 75889735658111016,
       75894133704622084, 75895508094156808, 75896813764214861,
       75897019922645015, 75897294800551970, 75897638397935633])

In [None]:
# w19
good_index_w19 =  np.array([70927777120911374, 72585222180306950, 74238475351621651, 
   74241636447551510, 74242667239702537, 74246446810923009, 74249126870515732, 
   75894133704622084, 75895508094156808, 75897638397935633])

# good_index_w19 = intersect_ids_w19

# Define band color mapping
band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown",
    "z": "purple",
    "y": "black"
}

# Sinusoid model
def sinusoid(phase, A, phi0, mean_mag):
    return mean_mag + A * np.sin(2 * np.pi * phase + phi0)

# Convert flux to magnitude
def create_mag(flux):
    return u.nJy.to(u.ABmag, flux)

# Iterate over selected objects
for object_id in good_index_w19:
    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == object_id].copy()

    # Identify and apply flag mask
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]

    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
    single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]
    single_lc_with_columns.to_parquet(
        "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_"
        + str(int(single_lc_with_columns['diaObjectId'].values[0])) + ".parquet"
    )  
    # Period
    period = float(
        RR_LSST_computed_long_w19[
            RR_LSST_computed_long_w19['diaObjectId_right'] == int(object_id)
        ]['PF_left'].values[0]
    )
    
    if pd.isna(period):
        print(f"No period for object {object_id}")
        continue

    # Initialize figure
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        visit = group["visit"].values.astype(int)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)

        mag, mag_err = create_mag_errors(flux, flux_err)
        phase = (time % period) / period

        valid = np.isfinite(mag) & np.isfinite(phase) & np.isfinite(mag_err)

        # Save LC with computed columns
        lc_df = pd.DataFrame({
            "diaObjectId": object_id,
            "band": band,
            "visit": visit,
            "mjd": time,
            "psfFlux": flux,
            "psfFluxErr": flux_err,
            "mag": mag,
            "magErr": mag_err,
            "phase": phase
        })[valid]
        
        lightcurve_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_lightcurve_{int(object_id)}_{band}.parquet"
        lc_df.to_parquet(lightcurve_path, index=False)
        # Fit sinusoid
        try:
            popt, _ = curve_fit(
                sinusoid, phase[valid], mag[valid],
                p0=[0.1, 0.0, np.nanmean(mag)],
                sigma=mag_err[valid],
                absolute_sigma=True
            )
            A_fit, phi0_fit, mean_mag_fit = popt
            # Save fit parameters to Parquet
            fit_df = pd.DataFrame({
                "objectId": [object_id],
                "band": [band],
                "period": [period],
                "A": [A_fit],
                "phi0": [phi0_fit],
                "mean_mag": [mean_mag_fit]
            })
            parquet_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_popt_{int(object_id)}_{band}.parquet"
            fit_df.to_parquet(parquet_path, index=False)
        except Exception as e:
            print(f"Fit failed for band {band}: {e}")
            continue

        # Model curves
        phase_model = np.linspace(0, 1, 500)
        mag_model_phase = sinusoid(phase_model, A_fit, phi0_fit, mean_mag_fit)

        t_model = np.linspace(np.min(time), np.max(time), 1000)
        phase_t_model = (t_model % period) / period
        mag_model_time = sinusoid(phase_t_model, A_fit, phi0_fit, mean_mag_fit)

        # Update global y-limits
        combined_mags = np.concatenate([mag[valid], mag_model_phase, mag_model_time])
        global_ymin = min(global_ymin, np.nanmin(combined_mags))
        global_ymax = max(global_ymax, np.nanmax(combined_mags))

        # Plot: time-domain
        axes[0].errorbar(
            time, mag, yerr=mag_err, fmt='o', alpha=0.6,
            label=f"{band}-band", color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[0].plot(
            t_model, mag_model_time, '--',
            label=f"Fit ({band})", color=band_colors.get(band, 'gray')
        )

        # Plot: phase-folded
        axes[1].errorbar(
            phase, mag, yerr=mag_err, fmt='o', alpha=0.6,
            color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[1].plot(
            phase_model, mag_model_phase, '--',
            color=band_colors.get(band, 'gray')
        )

    # Final plot adjustments
    for ax in axes:
        ax.invert_yaxis()
        ax.grid(True)
        ax.set_ylabel("Magnitude")

    axes[0].legend()
    axes[0].set_xlabel("MJD")
    axes[0].set_title("Time Domain")

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    pad = 0.1 * (global_ymax - global_ymin)
    axes[0].set_ylim(global_ymax + pad, global_ymin - pad)
    axes[1].set_ylim(global_ymax + pad, global_ymin - pad)

    plt.suptitle(f"Object ID {object_id} / Period = {period:.4f} days")
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

In [None]:
intersect_ids_w19

In [None]:
# np.array([70927777120911374, 72585222180306950, 74238475351621651, 
#    74241636447551510, 74242667239702537, 74246446810923009, 74249126870515732, 
#    75894133704622084, 75895508094156808, 75897638397935633])

In [None]:
np.array([70922760599109657, 70934168032247856, 72598553758793772, 74238475351621651, 74239231265865808, 74246446810923009, 74258060402491403])

In [None]:
RR_LSST_computed_long_w19[RR_LSST_computed_long_w19['diaObjectId_right'] == int(70922760599109657)]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from astropy import units as u

# Define color per band
band_colors = {
    "u": "blue", "g": "green", "r": "red",
    "i": "brown", "z": "purple", "y": "black"
}

# Fourier model with N harmonics
def fourier_model(phase, *params):
    N = (len(params) - 1) // 2
    result = params[0]
    for k in range(1, N + 1):
        Ak = params[2 * k - 1]
        phik = params[2 * k]
        result += Ak * np.cos(2 * np.pi * k * phase + phik)
    return result

# Convert flux to magnitude
def create_mag(flux):
    return u.nJy.to(u.ABmag, flux)

# Parameters
N_harmonics = 4
good_index_w19 = np.array([70922760599109657, 72585771936120856, 72598553758793772, 74238475351621651, 74239231265865808, 74246446810923009, 74258060402491403])

for object_id in good_index_w19:
    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == object_id].copy()

    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]

    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
    single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 
                                        'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 
                                        'mjd', 'psfFlux', 'psfMag', 'band']]
    
    single_lc_with_columns.to_parquet(
        f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/Fourier/w_19_objectId_{int(object_id)}.parquet"
    )

    period = float(
        RR_LSST_computed_long_w19[
            RR_LSST_computed_long_w19['diaObjectId_right'] == int(object_id)
        ]['PF_left'].values[0]
    )
    
    if pd.isna(period):
        print(f"No period for object {object_id}")
        continue

    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        visit = group["visit"].values.astype(int)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)

        mag, mag_err = create_mag_errors(flux, flux_err)
        phase = (time % period) / period

        valid = np.isfinite(mag) & np.isfinite(phase) & np.isfinite(mag_err)

        lc_df = pd.DataFrame({
            "diaObjectId": object_id,
            "band": band,
            "visit": visit,
            "mjd": time,
            "psfFlux": flux,
            "psfFluxErr": flux_err,
            "mag": mag,
            "magErr": mag_err,
            "phase": phase
        })[valid]
        lightcurve_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/Fourier/w_19_objectId_lightcurve_{int(object_id)}_{band}.parquet"
        lc_df.to_parquet(lightcurve_path, index=False)
        try:
            mag_mean = np.nanmean(mag[valid])
            p0 = [mag_mean] + [0.1, 0.0] * N_harmonics

            popt, _ = curve_fit(
                fourier_model, phase[valid], mag[valid],
                p0=p0,
                sigma=mag_err[valid],
                absolute_sigma=True,
                maxfev=10000
            )

            # Save fit parameters (optional)
            fit_df = pd.DataFrame({
                "objectId": [object_id],
                "band": [band],
                "period": [period],
                "mean_mag": [popt[0]],
                **{f"A{k}": [popt[2*k - 1]] for k in range(1, N_harmonics + 1)},
                **{f"phi{k}": [popt[2*k]] for k in range(1, N_harmonics + 1)},
            })

        except Exception as e:
            print(f"Fourier fit failed for object {object_id} in band {band}: {e}")
            continue

        # Evaluate model
        phase_model = np.linspace(0, 1, 500)
        mag_model_phase = fourier_model(phase_model, *popt)

        t_model = np.linspace(np.min(time), np.max(time), 1000)
        phase_t_model = (t_model % period) / period
        mag_model_time = fourier_model(phase_t_model, *popt)

        # Update y-limits
        combined_mags = np.concatenate([mag[valid], mag_model_phase, mag_model_time])
        global_ymin = min(global_ymin, np.nanmin(combined_mags))
        global_ymax = max(global_ymax, np.nanmax(combined_mags))

        # Plot time-domain
        axes[0].errorbar(
            time, mag, yerr=mag_err, fmt='o', alpha=0.6,
            label=f"{band}-band", color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[0].plot(
            t_model, mag_model_time, '--',
            label=f"Fourier fit ({band})", color=band_colors.get(band, 'gray')
        )

        # Plot phase-folded
        axes[1].errorbar(
            phase, mag, yerr=mag_err, fmt='o', alpha=0.6,
            color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[1].plot(
            phase_model, mag_model_phase, '--',
            color=band_colors.get(band, 'gray')
        )

    # Final plot tweaks
    for ax in axes:
        ax.invert_yaxis()
        ax.grid(True)
        ax.set_ylabel("Magnitude")

    axes[0].legend()
    axes[0].set_xlabel("MJD")
    axes[0].set_title("Time Domain")

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    pad = 0.1 * (global_ymax - global_ymin)
    axes[0].set_ylim(global_ymax + pad, global_ymin - pad)
    axes[1].set_ylim(global_ymax + pad, global_ymin - pad)

    plt.suptitle(f"Object ID {object_id} / Period = {period:.4f} days", fontsize=14)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

# Sinus fit

In [None]:
flux = single_lc['psfFlux'].values.astype(float)
flux_err = single_lc['psfFluxErr'].values.astype(float)

mag, mag_err = create_mag_errors(flux, flux_err)

In [None]:
# w19
good_index_w19 =  np.array([70927777120911374, 72585222180306950, 74238475351621651, 
    74241636447551510, 74242667239702537, 74246446810923009, 74249126870515732, 
    75894133704622084, 75895508094156808, 75897638397935633])
# Define band color mapping
band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown",
    "z": "purple",
    "y": "black"
}

# Sinusoid model
def sinusoid(phase, A, phi0, mean_mag):
    return mean_mag + A * np.sin(2 * np.pi * phase + phi0)

# Convert flux to magnitude
def create_mag(flux):
    return u.nJy.to(u.ABmag, flux)

# Iterate over selected objects
for object_id in intersect_ids:
    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == object_id].copy()

    # Identify and apply flag mask
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]

    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
    single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]
    single_lc_with_columns.to_parquet(
        "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_"
        + str(int(single_lc_with_columns['diaObjectId'].values[0])) + ".parquet"
    )  
    # Period
    period = float(
        RR_LSST_computed_long_w19[
            RR_LSST_computed_long_w19['diaObjectId_right'] == int(object_id)
        ]['PF_left'].values[0]
    )
    
    if pd.isna(period):
        print(f"No period for object {object_id}")
        continue

    # Initialize figure
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        visit = group["visit"].values.astype(int)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)

        mag, mag_err = create_mag_errors(flux, flux_err)
        phase = (time % period) / period

        valid = np.isfinite(mag) & np.isfinite(phase) & np.isfinite(mag_err)

        # Save LC with computed columns
        lc_df = pd.DataFrame({
            "diaObjectId": object_id,
            "band": band,
            "visit": visit,
            "mjd": time,
            "psfFlux": flux,
            "psfFluxErr": flux_err,
            "mag": mag,
            "magErr": mag_err,
            "phase": phase
        })[valid]
        
        lightcurve_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_lightcurve_{int(object_id)}_{band}.parquet"
        lc_df.to_parquet(lightcurve_path, index=False)
        # Fit sinusoid
        try:
            popt, _ = curve_fit(
                sinusoid, phase[valid], mag[valid],
                p0=[0.1, 0.0, np.nanmean(mag)],
                sigma=mag_err[valid],
                absolute_sigma=True
            )
            A_fit, phi0_fit, mean_mag_fit = popt
            # Save fit parameters to Parquet
            fit_df = pd.DataFrame({
                "objectId": [object_id],
                "band": [band],
                "period": [period],
                "A": [A_fit],
                "phi0": [phi0_fit],
                "mean_mag": [mean_mag_fit]
            })
            parquet_path = f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/w_19_objectId_popt_{int(object_id)}_{band}.parquet"
            fit_df.to_parquet(parquet_path, index=False)
        except Exception as e:
            print(f"Fit failed for band {band}: {e}")
            continue

        # Model curves
        phase_model = np.linspace(0, 1, 500)
        mag_model_phase = sinusoid(phase_model, A_fit, phi0_fit, mean_mag_fit)

        t_model = np.linspace(np.min(time), np.max(time), 1000)
        phase_t_model = (t_model % period) / period
        mag_model_time = sinusoid(phase_t_model, A_fit, phi0_fit, mean_mag_fit)

        # Update global y-limits
        combined_mags = np.concatenate([mag[valid], mag_model_phase, mag_model_time])
        global_ymin = min(global_ymin, np.nanmin(combined_mags))
        global_ymax = max(global_ymax, np.nanmax(combined_mags))

        # Plot: time-domain
        axes[0].errorbar(
            time, mag, yerr=mag_err, fmt='o', alpha=0.6,
            label=f"{band}-band", color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[0].plot(
            t_model, mag_model_time, '--',
            label=f"Fit ({band})", color=band_colors.get(band, 'gray')
        )

        # Plot: phase-folded
        axes[1].errorbar(
            phase, mag, yerr=mag_err, fmt='o', alpha=0.6,
            color=band_colors.get(band, 'gray'), capsize=3
        )
        axes[1].plot(
            phase_model, mag_model_phase, '--',
            color=band_colors.get(band, 'gray')
        )

    # Final plot adjustments
    for ax in axes:
        ax.invert_yaxis()
        ax.grid(True)
        ax.set_ylabel("Magnitude")

    axes[0].legend()
    axes[0].set_xlabel("MJD")
    axes[0].set_title("Time Domain")

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    pad = 0.1 * (global_ymax - global_ymin)
    axes[0].set_ylim(global_ymax + pad, global_ymin - pad)
    axes[1].set_ylim(global_ymax + pad, global_ymin - pad)

    plt.suptitle(f"Object ID {object_id} / Period = {period:.4f} days")
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

In [None]:
# testing loading

band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown"
}

def sinusoid(phase, A, phi0, mean_mag):
    return mean_mag + A * np.sin(2 * np.pi * phase + phi0)

for object_id in good_index:
    #base_path = "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/test"
    
    # Load main lightcurve table
    # lc_path = f"{base_path}/w_18_objectId_{int(object_id)}.parquet"
    # single_lc = pd.read_parquet(lc_path)
    
    # Apply flag filtering if needed
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    if flag_cols:
        single_lc = single_lc[~single_lc[flag_cols].any(axis=1)]

    period = None  # Initialize period

    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band, group in single_lc.groupby("band"):
        try:
            # Load per-band lightcurve
            lc_band_path = f"{base_path}/w_18_objectId_lightcurve_{int(object_id)}_{band}.parquet"
            lc_df = pd.read_parquet(lc_band_path)

            # Load fit parameters
            fit_path = f"{base_path}/w_18_objectId_popt_{int(object_id)}_{band}.parquet"
            fit_df = pd.read_parquet(fit_path)
            A, phi0, mean_mag = fit_df.loc[0, ['A', 'phi0', 'mean_mag']]
            period = fit_df.loc[0, 'period']

            # Extract data
            time = lc_df["mjd"].values
            mag = lc_df["mag"].values
            mag_err = lc_df["magErr"].values
            phase = lc_df["phase"].values

            # Model values
            phase_model = np.linspace(0, 1, 500)
            mag_model_phase = sinusoid(phase_model, A, phi0, mean_mag)

            t_model = np.linspace(np.min(time), np.max(time), 1000)
            phase_t_model = (t_model % period) / period
            mag_model_time = sinusoid(phase_t_model, A, phi0, mean_mag)

            # Update y-limits
            combined_mags = np.concatenate([mag, mag_model_phase, mag_model_time])
            global_ymin = min(global_ymin, np.nanmin(combined_mags))
            global_ymax = max(global_ymax, np.nanmax(combined_mags))

            # Plot: time domain
            axes[0].errorbar(time, mag, yerr=mag_err, fmt='o', label=f"{band}-band",
                             alpha=0.6, color=band_colors.get(band, 'gray'), capsize=3)
            axes[0].plot(t_model, mag_model_time, '--',
                         label=f"Fit ({band})", color=band_colors.get(band, 'gray'))

            # Plot: phase-folded
            axes[1].errorbar(phase, mag, yerr=mag_err, fmt='o', label=f"{band}-band",
                             alpha=0.6, color=band_colors.get(band, 'gray'), capsize=3)
            axes[1].plot(phase_model, mag_model_phase, '--',
                         label=f"Fit ({band})", color=band_colors.get(band, 'gray'))

        except Exception as e:
            print(f"Failed for object {object_id}, band {band}: {e}")
            continue

    for ax in axes:
        ax.invert_yaxis()
        ax.legend()
        ax.grid(True)
        ax.set_ylabel("Magnitude")

    axes[0].set_xlabel("MJD")
    axes[0].set_title("Time Domain")

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    if period is not None:
        plt.suptitle(f"Object ID {object_id} / Period = {period:.4f} days")
    else:
        plt.suptitle(f"Object ID {object_id} / Period unknown")

    pad = 0.1 * (global_ymax - global_ymin)
    axes[0].set_ylim(global_ymax + pad, global_ymin - pad)
    axes[1].set_ylim(global_ymax + pad, global_ymin - pad)

    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

In [None]:
object_id = RR_LSST_computed_long['diaObjectId_right'].unique()[0]
# Get one object's lightcurve and period
#object_id = RR_LSST_computed_long['objectId_right'].values[7]
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()
single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]

In [None]:
object_id = RR_LSST_computed_long['diaObjectId_right'].unique()[0]
# Get one object's lightcurve and period
#object_id = RR_LSST_computed_long['objectId_right'].values[7]
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()
single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]

single_lc_with_columns

In [None]:
all_forced_sources.columns

In [None]:
outlier = all_forced_sources[all_forced_sources['forcedSourceOnDiaObjectId'] == 24580707377481276]
flag_columns = [col for col in all_forced_sources.columns if "flag" in col.lower()]

outlier[flag_columns]


In [None]:
all_forced_sources.columns

In [None]:
object_id = good_index[4]
# Get one object's lightcurve and period
#object_id = RR_LSST_computed_long['objectId_right'].values[7]
single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()
single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]

single_lc_with_columns

In [None]:
pd.set_option('display.max_rows', 100)  # or any number you prefer
single_lc_with_columns[single_lc_with_columns['band'] =='u'][single_lc_with_columns[single_lc_with_columns['band'] =='u']['psfMag'] >19]

In [None]:
all_forced_sources[all_forced_sources['forcedSourceOnDiaObjectId'] == 24580707377481276]

In [None]:
create_mag_errors(86058.796875, 488.690796)

In [None]:
output_dir = "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/test"

for object_id in good_index:
    try:
        # Extract the lightcurve
        single_lc = all_forced_sources[all_forced_sources['diaObjectId'] == object_id].copy()

        # Compute magnitude
        single_lc['psfMag'] = create_mag(single_lc['psfFlux'])

        # Select columns
        selected = single_lc[[
            'visit', 'coord_ra', 'coord_dec', 'tract', 'patch',
            'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector',
            'mjd', 'psfFlux', 'psfMag', 'band'
        ]]

        # Save to Parquet
        output_path = f"{output_dir}/w_18_objectId_{int(object_id)}.parquet"
        selected.to_parquet(output_path, index=False)
    
    except Exception as e:
        print(f"Failed for object {object_id}: {e}")

In [None]:
period = float(RR_LSST_computed_long[RR_LSST_computed_long['diaObjectId_right'] == int(object_id)]['PF_left'].values[0])

In [None]:
RR_LSST_computed_long[RR_LSST_computed_long['diaObjectId_right'] == int(object_id)]

In [None]:
# visit	coord_ra	coord_dec	tract	patch	forcedSourceOnDiaObjectId	objectId  detector, psfMag, MJD
# popt  of the sinusoid fit in output per band too  
# position of all RR Lyrae in the M49 field 

In [None]:
single_lc

In [None]:
single_lc['psfMag'] = create_mag(single_lc['psfFlux'])
single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 'forcedSourceId', 'objectId', 'detector', 'mjd', 'psfFlux', 'psfMag', 'band' ]]

In [None]:
#single_lc_with_columns.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_18_objectId_"+str(int(single_lc_with_columns['objectId'].values[0]))).parquet")

# Locations of all RRLyrae in M49

In [None]:
import pandas as pd
from pathlib import Path

# Base directory where the files are
base_dir = Path("/sdf/home/n/ncaplar")

# List of filenames
filenames = ["212_m7.tsv", "216_m17.tsv", "m49.tsv"]

# Column names based on your file structure
column_names = [
    '_RAJ2000', '_DEJ2000', 'Source', 'PF', 'P1O',
    'Gmagavg', 'RA_ICRS', 'DE_ICRS'
]

# Dictionary to store the dataframes
dfs = {}

for file in filenames:
    file_path = base_dir / file
    
    # Read and find where the actual data starts
    with open(file_path, "r") as f:
        lines = f.readlines()

    start_idx = None
    for i, line in enumerate(lines):
        if line.strip().startswith('_RAJ2000|'):
            start_idx = i
            break

    if start_idx is None:
        raise ValueError(f"Could not find table header in {file}")

    data_start = start_idx + 3  # skip header + units + dashed lines

    # Read the actual table
    df = pd.read_csv(
        file_path,
        sep="|",
        skiprows=data_start,
        names=column_names,
        engine="python",
    )

    df.columns = df.columns.str.strip()
    dfs[file.replace(".tsv", "")] = df  # store with nice key like '212_m7'

# Unpack individual DataFrames
df_212_m7 = dfs["212_m7"]
df_216_m17 = dfs["216_m17"]
df_m49 = dfs["m49"]

# Example: print how many rows were loaded
for name, df in dfs.items():
    print(f"{name}: {len(df)} rows loaded.")

In [None]:
#single_lc_with_columns.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_18_objectId_"+str(int(single_lc_with_columns['objectId'].values[0]))).parquet")

In [None]:
# all the locations of the RR Lyrae stars in the M49 field 
df_m49.to_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_18_objectId_RR_Lyrae_all_positions.parquet")

# Fit with GAIA parameters

In [None]:
intersect_ids_w19

In [None]:
intersect_ids_w19

In [None]:
allowed_suffix_band = {
    6808: ["g"],
    6548: ["r"],
    2537: ["r"],
    3772: ["r"],
    1651: ["g"],
    6416: ["u"]
}

In [None]:
band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown"
}

# Loop over all object IDs
for obj_id in intersect_ids_w19:
    
    obj_suffix = int(str(obj_id)[-4:])
    if obj_suffix not in allowed_suffix_band:
        continue

    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == obj_id].copy()

    valid_bands = allowed_suffix_band[obj_suffix]
    single_lc = single_lc[single_lc['band'].isin(valid_bands)]
    if single_lc.empty:
        continue
    
    #

    # Remove flagged measurements
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]

    # Convert flux to magnitude
    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])

    single_lc_with_columns = single_lc[['visit', 'coord_ra', 'coord_dec', 'tract', 'patch', 
                                        'forcedSourceOnDiaObjectId', 'diaObjectId', 'detector', 
                                        'mjd', 'psfFlux', 'psfMag', 'band']]
    
    single_lc_with_columns.to_parquet(
        f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/GAIA/w_19_objectId_{int(obj_id)}_band_{band}.parquet"
    )

    # Keep only u, g, r, i bands
    single_lc = single_lc[single_lc['band'].isin(["u", "g", "r", "i"])]
    if single_lc.empty:
        continue

    # Get Gaia-derived period and Fourier shape parameters
    row = RR_LSST_computed_long_w19[
        RR_LSST_computed_long_w19['diaObjectId_right'] == int(obj_id)
    ]
    if row.empty:
        continue

    try:
        period = float(row['PF_left'].values[0])
        R21 = float(row['R21G_left'].values[0])
        R31 = float(row['R31G_left'].values[0])
        phi21 = float(row['phi21G_left'].values[0])
        phi31 = float(row['phi31G_left'].values[0])
    except Exception:
        continue

    # Set up plot
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    # Fit and plot each band separately
    band_fit_results = {}
    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)
        mag, mag_err = create_mag_errors(flux, flux_err)

        if len(time) < 10 or np.any(np.isnan(mag)):
            continue

        try:
            popt, model_func = fit_constrained_fourier(time, mag, mag_err, period, R21, R31, phi21, phi31)
            band_fit_results[band] = (popt[0], popt[1], popt[2])
        except Exception as e:
            print(f"Fit failed for {obj_id} in band {band}: {e}")
            continue

        
        # Time-domain model
        t_model = np.linspace(np.min(time), np.max(time), 10000)
        mag_fit_time = model_func((t_model % period) / period)

        # Phase model
        phase = (time % period) / period
        phase_fit = np.linspace(0, 1, 5000)
        mag_fit_phase = model_func(phase_fit)

        # Evaluate model at observation phases
        mag_model_phase = model_func(phase)

        # Save full light curve (data + model evaluated at data points)
        output_df = pd.DataFrame({
            "mjd": time,
            "psfFlux": flux,
            "psfFluxErr": flux_err,
            "psfMag": mag,
            "psfMagErr": mag_err,
            "phase": phase,
            "mag_model_phase": mag_model_phase,
            "band": band,
            "diaObjectId": obj_id
        })

        output_df.to_parquet(
            f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/GAIA/w_19_objectId_phase_fit_{int(obj_id)}_band_{band}.parquet"
        )

        # Save time-domain model curve (dense)
        model_time_df = pd.DataFrame({
            "t_model": t_model,
            "mag_fit_time": mag_fit_time,
            "band": band,
            "diaObjectId": obj_id
        })
        model_time_df.to_parquet(
            f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/GAIA/w_19_time_fit_{int(obj_id)}_band_{band}.parquet"
        )

        # Save phase-domain model curve (dense)
        model_phase_df = pd.DataFrame({
            "phase_fit": phase_fit,
            "mag_fit_phase": mag_fit_phase,
            "band": band,
            "diaObjectId": obj_id
        })
        model_phase_df.to_parquet(
            f"/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/GAIA/w_19_phase_model_{int(obj_id)}_band_{band}.parquet"
        )


        color = band_colors.get(band, "gray")
        axes[0].errorbar(time, mag, yerr=mag_err, fmt='o', alpha=0.6, label=f"{band}-band", color=color)
        axes[0].plot(t_model, mag_fit_time, '--', color=color)

        axes[1].errorbar(phase, mag, yerr=mag_err, fmt='o', alpha=0.6, color=color)
        axes[1].plot(phase_fit, mag_fit_phase, '--', color=color)

        global_ymin = min(global_ymin, np.nanmin(mag))
        global_ymax = max(global_ymax, np.nanmax(mag))

    # Final plot formatting
    if global_ymin == np.inf:
        continue

    for ax in axes:
        ax.set_ylim(global_ymax + 0.2, global_ymin - 0.2)
        ax.grid(True, alpha=0.3)

    axes[0].set_xlabel("MJD")
    axes[0].set_ylabel("Magnitude")
    axes[0].set_title("Time Domain")
    axes[0].legend()

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    plt.suptitle(f"diaObjectId {obj_id} | Period = {period:.5f} d", fontsize=14)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

    print(f"Fit parameters for diaObjectId {obj_id} (Period = {period:.5f} d):")
    for band_label, (A0, A1, phi1) in band_fit_results.items():
        print(f"  {band_label}-band: A₀ = {A0:.3f}, A₁ = {A1:.3f}, ϕ₁ = {phi1:.3f}")


In [None]:
row

## Load and visualize

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import os

# Base directory where all parquet files are saved
base_dir = "/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/w_19/GAIA/"

# Color map
band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown"
}

# Find all phase-fit files (observed data + model at data points)
phase_obs_files = sorted(glob.glob(os.path.join(base_dir, "w_19_objectId_phase_fit_*_band_*.parquet")))

for phase_obs_path in phase_obs_files:
    base_name = os.path.basename(phase_obs_path)
    obj_id = base_name.split("_")[5]
    band = base_name.split("_")[7].replace(".parquet", "")

    # Construct paths to time model and phase model files
    time_model_path = os.path.join(base_dir, f"w_19_time_fit_{obj_id}_band_{band}.parquet")
    phase_model_path = os.path.join(base_dir, f"w_19_phase_model_{obj_id}_band_{band}.parquet")

    # Skip if required files are missing
    if not os.path.exists(time_model_path) or not os.path.exists(phase_model_path):
        print(f"Missing model files for object {obj_id}, band {band}")
        continue

    # Load data
    df_obs = pd.read_parquet(phase_obs_path)
    df_time_model = pd.read_parquet(time_model_path)
    df_phase_model = pd.read_parquet(phase_model_path)

    # Plotting
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    color = band_colors.get(band, "gray")

    # Time domain panel
    axes[0].errorbar(df_obs["mjd"], df_obs["psfMag"], yerr=df_obs["psfMagErr"],
                     fmt='o', label="Observed", color=color, alpha=0.6)
    axes[0].plot(df_time_model["t_model"], df_time_model["mag_fit_time"], '--', color=color, label="Model")

    axes[0].set_xlabel("MJD")
    axes[0].set_ylabel("Magnitude")
    axes[0].set_title("Time Domain")
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    # Phase-folded panel
    df_obs_sorted = df_obs.sort_values("phase")
    df_phase_model_sorted = df_phase_model.sort_values("phase_fit")

    axes[1].errorbar(df_obs_sorted["phase"], df_obs_sorted["psfMag"], yerr=df_obs_sorted["psfMagErr"],
                     fmt='o', color=color, alpha=0.6, label="Observed")
    axes[1].plot(df_phase_model_sorted["phase_fit"], df_phase_model_sorted["mag_fit_phase"],
                 '--', color=color, label="Model")

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")
    axes[1].grid(True, alpha=0.3)

    plt.suptitle(f"diaObjectId {obj_id} | Band: {band}", fontsize=14)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

# Sinus fit with parameters

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit

# Band colors
band_colors = {
    "u": "blue", "g": "green", "r": "red", "i": "brown"
}

# Fit shared-phase Fourier model
def fit_shared_phase_fourier(times, mags, mag_errs, bands, period):
    unique_bands = np.unique(bands)
    band_idx = {b: i for i, b in enumerate(unique_bands)}
    band_indices = np.array([band_idx[b] for b in bands])

    def model(t, *params):
        phi1 = params[0]
        A0_list = params[1:1+len(unique_bands)]
        A1_list = params[1+len(unique_bands):]
        phase = (t % period) / period
        return np.array([
            A0_list[i] + A1_list[i] * np.cos(2 * np.pi * phase[j] + phi1)
            for j, i in enumerate(band_indices)
        ])

    A0_init = [np.mean(mags[bands == b]) for b in unique_bands]
    A1_init = [0.2 for _ in unique_bands]
    p0 = [0.0] + A0_init + A1_init

    popt, _ = curve_fit(model, times, mags, p0=p0, sigma=mag_errs, absolute_sigma=True, maxfev=10000)

    phi1 = popt[0]
    A0_fit = dict(zip(unique_bands, popt[1:1+len(unique_bands)]))
    A1_fit = dict(zip(unique_bands, popt[1+len(unique_bands):]))

    return A0_fit, A1_fit, phi1


# Main loop
for obj_id in intersect_ids_w19:  # Make sure this is defined
    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == obj_id].copy()

    # Remove flagged rows
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]

    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])

    # Use only u, g, r, i bands
    single_lc = single_lc[single_lc['band'].isin(["u", "g", "r", "i"])]
    if single_lc.empty:
        continue

    # Extract period
    row = RR_LSST_computed_long_w19[
        RR_LSST_computed_long_w19['diaObjectId_right'] == int(obj_id)
    ]
    if row.empty:
        continue

    try:
        period = float(row['PF_left'].values[0])
    except Exception:
        continue

    # Gather all bands for shared fit
    times_all, mags_all, magerrs_all, bands_all = [], [], [], []
    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)
        mag, mag_err = create_mag_errors(flux, flux_err)

        if len(time) < 10:
            continue

        times_all.append(time)
        mags_all.append(mag)
        magerrs_all.append(mag_err)
        bands_all.append(np.full_like(time, band, dtype=object))

    if not times_all:
        continue

    # Concatenate for joint fit
    time_all = np.concatenate(times_all)
    mag_all = np.concatenate(mags_all)
    magerr_all = np.concatenate(magerrs_all)
    band_all = np.concatenate(bands_all)

    # Fit
    try:
        A0_fit, A1_fit, phi1 = fit_shared_phase_fourier(time_all, mag_all, magerr_all, band_all, period)
    except Exception as e:
        print(f"Fit failed for {obj_id}: {e}")
        continue

    # Plotting
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band in A0_fit:
        mask = (band_all == band)
        t = time_all[mask]
        m = mag_all[mask]
        m_err = magerr_all[mask]
        phase = ((t % period) / period)

        color = band_colors.get(band, "gray")

        # Plot time-domain
        axes[0].errorbar(t, m, yerr=m_err, fmt='o', alpha=0.6, label=f"{band}-band", color=color)
        t_model = np.linspace(t.min(), t.max(), 1000)
        model_t = A0_fit[band] + A1_fit[band] * np.cos(2 * np.pi * ((t_model % period) / period) + phi1)
        axes[0].plot(t_model, model_t, '--', color=color)

        # Plot phase-folded
        phase_model = np.linspace(0, 1, 500)
        mag_model = A0_fit[band] + A1_fit[band] * np.cos(2 * np.pi * phase_model + phi1)
        axes[1].errorbar(phase, m, yerr=m_err, fmt='o', alpha=0.6, color=color)
        axes[1].plot(phase_model, mag_model, '--', color=color)

        global_ymin = min(global_ymin, np.nanmin(m))
        global_ymax = max(global_ymax, np.nanmax(m))

    if global_ymin == np.inf:
        continue

    for ax in axes:
        ax.set_ylim(global_ymax + 0.2, global_ymin - 0.2)
        ax.grid(True, alpha=0.3)

    axes[0].set_xlabel("MJD")
    axes[0].set_ylabel("Magnitude")
    axes[0].set_title("Time Domain")
    axes[0].legend()

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    plt.suptitle(f"diaObjectId {obj_id} | Period = {period:.5f} d", fontsize=14)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()

    # Print fit summary
    print(f"Shared-phase fit for diaObjectId {obj_id} (Period = {period:.5f} d):")
    for band in A0_fit:
        print(f"  {band}-band: A₀ = {A0_fit[band]:.3f}, A₁ = {A1_fit[band]:.3f}")
    print(f"  Shared ϕ₁ = {phi1:.3f}\n")

# Fit with Sesar

In [None]:
RR_LSST_computed_long_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/RR_LSST_computed_long_w19.parquet")
all_forced_sources_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/all_forced_sources_w19.parquet")
visit_ids_w19 = all_forced_sources_w19['visit'].unique()
times_w19 = np.load('/sdf/home/n/ncaplar/rrlyrae_lightcurves/times_w19.npy')
filtered_visits_df_w19 = pd.read_parquet("/sdf/group/rubin/shared/notebooks/mjuric/rfl/variable_objects/filtered_visits_df_w19.parquet")
visit_to_mjd_w19 = dict(zip(filtered_visits_df_w19['visit_id'], filtered_visits_df_w19['exp_midpt_mjd']))
all_forced_sources_w19['mjd'] = all_forced_sources_w19['visit'].map(visit_to_mjd_w19)

In [None]:
intersect_ids_w19 = np.array([69261123651633168, 70922760599109657, 70922898038063320,
       70927777120911374, 70930113583120389, 70934168032247856,
       72578144074203143, 72582679559667755, 72584053949202435,
       72584191388155907, 72585222180306950, 72585771936120856,
       72586871447748649, 72591613091643426, 72597866564026416,
       72598553758793772, 72601233818386462, 74238475351621651,
       74239231265865808, 74240536935923747, 74241224130691109,
       74241636447551510, 74242667239702537, 74243423153946646,
       74244522665574414, 74246446810923009, 74247477603074095,
       74247615042027587, 74249126870515732, 74249195589992487,
       74252562844352545, 74255242903945239, 74256479854526548,
       74257304488247305, 74258060402491403, 75889735658111016,
       75894133704622084, 75895508094156808, 75896813764214861,
       75897019922645015, 75897294800551970, 75897638397935633])

In [None]:
files = sorted(glob.glob('/sdf/home/n/ncaplar/templates/11*g.dat'))
plt.figure()
for f in files:
    phase, amp = np.genfromtxt(f,unpack=True)
    plt.plot(np.append(phase,phase+1),np.append(amp,amp), c='tab:green', label=f)
plt.ylim(1.1,-0.1)
plt.legend()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
from functools import partial
import glob

band_colors = {
    "u": "blue",
    "g": "green",
    "r": "red",
    "i": "brown"
}

# Load all template files
template_files = sorted(glob.glob('/sdf/home/n/ncaplar/templates/11*g.dat'))
if not template_files:
    raise RuntimeError("No template files found!")

def empirical_model_fit(phase, A0, A1, shift, amp_template, template_phase):
    shifted_phase = (phase + shift) % 1.0
    interp = interp1d(template_phase, amp_template, kind='linear', fill_value="extrapolate")
    return A0 + A1 * interp(shifted_phase)

# Loop over all object IDs
for obj_id in intersect_ids_w19:
    single_lc = all_forced_sources_w19[all_forced_sources_w19['diaObjectId'] == obj_id].copy()
    single_lc = single_lc[single_lc['band'].isin(["u", "g", "r", "i"])]
    if single_lc.empty:
        continue

    # Remove flagged measurements
    flag_cols = [col for col in single_lc.columns if 'flag' in col.lower()]
    flag_mask = ~(single_lc[flag_cols].any(axis=1))
    single_lc = single_lc[flag_mask]
    single_lc['psfMag'] = create_mag(single_lc['psfFlux'])

    # Get Gaia-derived period
    row = RR_LSST_computed_long_w19[
        RR_LSST_computed_long_w19['diaObjectId_right'] == int(obj_id)
    ]
    if row.empty:
        continue

    try:
        period = float(row['PF_left'].values[0])
    except Exception:
        continue

    fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
    global_ymin = np.inf
    global_ymax = -np.inf

    for band, group in single_lc.groupby("band"):
        time = group["mjd"].values.astype(float)
        flux = group["psfFlux"].values.astype(float)
        flux_err = group["psfFluxErr"].values.astype(float)
        mag, mag_err = create_mag_errors(flux, flux_err)

        if len(time) < 10 or np.any(np.isnan(mag)):
            continue

        phase = (time % period) / period

        best_chi2 = np.inf
        best_fit = None
        best_template = None

        for template_file in template_files:
            try:
                template_phase, amp_template = np.genfromtxt(template_file, unpack=True)
                template_phase = template_phase % 1.0

                fit_model = partial(empirical_model_fit, amp_template=amp_template, template_phase=template_phase)
                init_guess = [np.median(mag), (np.max(mag) - np.min(mag)) / 2, 0.0]
                bounds = ([-np.inf, 0, -0.5], [np.inf, 10, 0.5])

                popt, _ = curve_fit(fit_model, phase, mag, p0=init_guess, sigma=mag_err, bounds=bounds)

                # Compute reduced chi-squared
                residuals = mag - fit_model(phase, *popt)
                chi2 = np.sum((residuals / mag_err)**2)
                dof = len(mag) - len(popt)
                reduced_chi2 = chi2 / dof if dof > 0 else np.inf

                if reduced_chi2 < best_chi2:
                    best_chi2 = reduced_chi2
                    best_fit = (popt, amp_template, template_phase)
                    best_template = template_file

            except Exception as e:
                continue

        if best_fit is None:
            print(f"All fits failed for {obj_id} in band {band}")
            continue

        A0_fit, A1_fit, shift_fit = best_fit[0]
        amp_template, template_phase = best_fit[1], best_fit[2]
        fit_model = partial(empirical_model_fit, amp_template=amp_template, template_phase=template_phase)

        # Evaluate model
        phase_fit = np.linspace(0, 1, 500)
        mag_fit_phase = fit_model(phase_fit, *best_fit[0])
        t_model = np.linspace(np.min(time), np.max(time), 1000)
        mag_fit_time = fit_model((t_model % period) / period, *best_fit[0])

        color = band_colors.get(band, "gray")
        axes[0].errorbar(time, mag, yerr=mag_err, fmt='o', alpha=0.6, label=f"{band}-band", color=color)
        axes[0].plot(t_model, mag_fit_time, '--', color=color)

        axes[1].errorbar(phase, mag, yerr=mag_err, fmt='o', alpha=0.6, color=color)
        axes[1].plot(phase_fit, mag_fit_phase, '--', color=color)

        global_ymin = min(global_ymin, np.nanmin(mag))
        global_ymax = max(global_ymax, np.nanmax(mag))

        print(f"Best template for {band}-band (diaObjectId {obj_id}):")
        print(f"  File: {best_template}")
        print(f"  A₀ = {A0_fit:.3f}, A₁ = {A1_fit:.3f}, shift = {shift_fit:.3f}, χ²_red = {best_chi2:.3f}")

    if global_ymin == np.inf:
        continue

    for ax in axes:
        ax.set_ylim(global_ymax + 0.2, global_ymin - 0.2)
        ax.grid(True, alpha=0.3)

    axes[0].set_xlabel("MJD")
    axes[0].set_ylabel("Magnitude")
    axes[0].set_title("Time Domain")
    axes[0].legend()

    axes[1].set_xlabel("Phase")
    axes[1].set_xlim(0, 1)
    axes[1].set_title("Phase Folded")

    plt.suptitle(f"diaObjectId {obj_id} | Period = {period:.5f} d", fontsize=14)
    plt.tight_layout()
    plt.subplots_adjust(top=0.88)
    plt.show()