In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.table import Table

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.dpi'] = 150


In [None]:
data_dir = Path('/Users/marchuertascompany/Documents/data/COSMOS-Web/catalogs')
catalog_path = data_dir / 'COSMOSWeb_mastercatalog_v1.fits'

if not catalog_path.exists():
    raise FileNotFoundError(f"Catalog not found at {catalog_path}")

catalog_path


In [None]:
photom_cols = ['id', 'warn_flag', 'mag_model_f444w', 'flag_star_hsc']
lephare_cols = ['type', 'zfinal', 'mass_med', 'ssfr_med', 'mabs_nuv', 'mabs_r', 'mabs_j']

cat_photom = Table.read(catalog_path, hdu=1)[photom_cols]
lephare_raw = Table.read(catalog_path, hdu=2)
cat_lephare = lephare_raw[lephare_cols]

catalog = cat_lephare.copy()
for col in photom_cols:
    catalog[col] = cat_photom[col]

catalog


In [None]:
# Load family morphology probabilities
morph_pred_path = Path('/Users/marchuertascompany/Documents/data/COSMOS-Web/zoobot/ilbert/ilbert_visual_zoobot_morphology.fits')
family_cols = ['family_ELLIPTICAL', 'family_S0', 'family_EARLY_DISK', 'family_LATE_DISK']

if not morph_pred_path.exists():
    raise FileNotFoundError(f"Morphology catalog not found at {morph_pred_path}")

morph_table = Table.read(morph_pred_path)
morph_df = morph_table.to_pandas()


def normalize_ids(values):
    series = pd.Series(values).astype(str).str.strip()
    series = series.str.replace(r'\.0$', '', regex=True)
    return series


morph_df['id'] = normalize_ids(morph_df['id'])
catalog_ids = normalize_ids(catalog['id'])

family_matrix = (
    morph_df.set_index('id')
    .reindex(catalog_ids)[family_cols]
    .to_numpy()
)

for idx, col in enumerate(family_cols):
    values = np.asarray(family_matrix[:, idx], dtype=float)
    catalog[col] = values

family_available = np.isfinite(family_matrix).any(axis=1)
catalog['has_any_morphology'] = family_available

valid_count = int(family_available.sum())
print(f"Family morphology available for {valid_count:,} sources ({valid_count/len(catalog):.1%}).")


In [None]:
# Compute rest-frame color combinations
mabs_nuv = np.ma.filled(np.asarray(catalog['mabs_nuv'], dtype=float), np.nan)
mabs_r = np.ma.filled(np.asarray(catalog['mabs_r'], dtype=float), np.nan)
mabs_j = np.ma.filled(np.asarray(catalog['mabs_j'], dtype=float), np.nan)
mass_log = np.ma.filled(np.asarray(catalog['mass_med'], dtype=float), np.nan)
z = np.asarray(catalog['zfinal'], dtype=float)

nuv_minus_r = mabs_nuv - mabs_r
r_minus_j = mabs_r - mabs_j

catalog['nuv_minus_r'] = nuv_minus_r
catalog['r_minus_j'] = r_minus_j

# Clean-galaxy mask from the tutorial
clean_mask = (
    (np.asarray(catalog['type']) == 0) &
    (np.asarray(catalog['warn_flag']) == 0) &
    (np.abs(np.asarray(catalog['mag_model_f444w'])) < 30) &
    (np.asarray(catalog['flag_star_hsc']) == 0)
)

# Require finite colors and masses
finite_mask = (
    np.isfinite(nuv_minus_r) &
    np.isfinite(r_minus_j) &
    np.isfinite(mass_log)
)

clean_mask &= finite_mask

# Low-mass and quiescent requirements
low_mass_mask = mass_log < 10.0
quiescent_mask = (nuv_minus_r > 3.1) & (nuv_minus_r > 3.0 * r_minus_j + 1.0)

final_mask = clean_mask & low_mass_mask & quiescent_mask

quiescent_sample = catalog[final_mask]
print(f"Total catalog sources        : {len(catalog):,}")
print(f"Clean galaxy sample          : {clean_mask.sum():,}")
print(f"Low-mass quiescent selection : {len(quiescent_sample):,}")


In [None]:
# Family morphology classification
family_cols = ['family_ELLIPTICAL', 'family_S0', 'family_EARLY_DISK', 'family_LATE_DISK']
class_labels = np.array(['elliptical', 's0', 'early-disk', 'late-disk'])

scores_matrix = np.column_stack([
    np.asarray(catalog[col], dtype=float) for col in family_cols
])

scores_for_sort = np.nan_to_num(scores_matrix, nan=-np.inf)
order = np.argsort(scores_for_sort, axis=1)
main_idx = order[:, -1]

finite_counts = np.isfinite(scores_matrix).sum(axis=1)

main_class = np.full(len(catalog), 'unclassified', dtype='<U16')
main_score = np.full(len(catalog), np.nan)
mask_main = finite_counts > 0
main_class[mask_main] = class_labels[main_idx[mask_main]]
main_score[mask_main] = np.take_along_axis(scores_matrix, main_idx[:, None], axis=1)[mask_main, 0]

catalog['morph_main_class'] = main_class
catalog['morph_main_score'] = main_score
catalog['morph_unique_class'] = main_class

print('Primary morphology counts (all sources with classifications):')
for label in class_labels:
    count = np.count_nonzero(main_class == label)
    if count > 0:
        print(f"  {label:12s}: {count:,}")


In [None]:
# Load group catalog and membership catalog (same selection as reference notebook)
group_dir = Path('/Users/marchuertascompany/Documents/data/COSMOS-Web/groups')
groups_path = group_dir / 'groups.fits'
membership_path = group_dir / 'memberships.fits'

if not groups_path.exists():
    raise FileNotFoundError(f"Group catalog not found at {groups_path}")
if not membership_path.exists():
    raise FileNotFoundError(f"Membership catalog not found at {membership_path}")

groups = Table.read(groups_path).to_pandas()
member = Table.read(membership_path).to_pandas()

# Cuts for SN, LAMBDA_STAR, MSKFRC (same as reference)
groups = groups[
    (groups['SN_NOCL'] >= 10) &
    (groups['LAMBDA_STAR'] > 10.55) &
    (groups['MSKFRC'] < 0.2)
]

member = member[member['ID'].isin(groups['ID'])]

# Normalize IDs for matching
member['GALID'] = member['GALID'].astype(str).str.strip().str.replace(r'\.0$', '', regex=True)
catalog_ids = pd.Series(catalog['id']).astype(str).str.strip().str.replace(r'\.0$', '', regex=True)

# Field selection: FIELD_PROB > 0.8, choose min ASSOC_PROB per galaxy (same as reference)
field_filtered = member[member['FIELD_PROB'] > 0.8]
idx_field = field_filtered.groupby('GALID')['ASSOC_PROB'].idxmin()
field_unique = field_filtered.loc[idx_field]
field_prob = field_unique.set_index('GALID')['FIELD_PROB']

# Group selection: ASSOC_PROB > 0.5, choose max ASSOC_PROB per galaxy (same as reference)
group_filtered = member[member['ASSOC_PROB'] > 0.5]
idx_group = group_filtered.groupby('GALID')['ASSOC_PROB'].idxmax()
group_unique = group_filtered.loc[idx_group]
assoc_prob = group_unique.set_index('GALID')['ASSOC_PROB']

catalog['field_prob'] = catalog_ids.map(field_prob)
catalog['assoc_prob'] = catalog_ids.map(assoc_prob)

field_mask = catalog['field_prob'] > 0.8
group_mask = catalog['assoc_prob'] > 0.5

print(f"Field prob > 0.8: {field_mask.sum():,}")
print(f"Group assoc prob > 0.5: {group_mask.sum():,}")


In [None]:
# Morphology fractions by redshift bin for groups vs field
z_bins = np.arange(0.0, 5.6, 0.5)
z_labels = [f"{z_bins[i]:.1f} <= z < {z_bins[i+1]:.1f}" for i in range(len(z_bins) - 1)]

class_palette = {
    'late-disk': '#4C72B0',
    'early-disk': '#8172B2',
    's0': '#ED680F',
    'elliptical': '#C44E52'
}

desired_order = ['late-disk', 'early-disk', 's0', 'elliptical']

fig, axes = plt.subplots(1, 2, figsize=(16, 4.5), sharey=True)

for ax, (env_mask, title) in zip(axes, [(group_mask, 'Groups'), (field_mask, 'Field')]):
    subset_mask = final_mask & catalog['has_any_morphology'] & env_mask
    overall_classes = [
        cls for cls in desired_order
        if cls in set(catalog['morph_unique_class'][subset_mask])
    ]

    positions = np.arange(len(z_labels))
    bottom = np.zeros(len(z_labels))

    for cls in overall_classes:
        counts = []
        for z_min, z_max in zip(z_bins[:-1], z_bins[1:]):
            bin_mask = subset_mask & (catalog['zfinal'] >= z_min) & (catalog['zfinal'] < z_max)
            total = np.count_nonzero(bin_mask)
            if total == 0:
                counts.append(0.0)
            else:
                cls_count = np.count_nonzero(catalog['morph_unique_class'][bin_mask] == cls)
                counts.append(cls_count / total)
        fractions = np.array(counts)
        ax.bar(
            positions,
            fractions,
            bottom=bottom,
            width=0.7,
            color=class_palette.get(cls, '#333333'),
            label=cls
        )
        bottom += fractions

    ax.set_xticks(positions)
    ax.set_xticklabels(z_labels, rotation=45, ha='right')
    ax.set_ylim(0, 1)
    ax.set_xlabel('Redshift bin')
    ax.set_title(title)
    ax.grid(axis='y', linestyle=':', alpha=0.4)

axes[0].set_ylabel('Morphology fraction')
handles = [
    plt.Line2D([0], [0], marker='s', color=class_palette.get(cls, '#333333'),
               linestyle='', markersize=8, label=cls)
    for cls in desired_order
]
axes[-1].legend(handles=handles, loc='upper left', bbox_to_anchor=(1.02, 1), frameon=False)

plt.tight_layout(rect=[0, 0, 0.88, 1])
plt.show()


In [None]:
# Join group properties onto member galaxies for group-centric metrics
# Requires group_mask, catalog, member, groups defined in previous cells

# Normalize IDs
member['GALID'] = member['GALID'].astype(str).str.strip().str.replace(r'\.0$', '', regex=True)
member['ID'] = member['ID'].astype(str)
groups['ID'] = groups['ID'].astype(str)

# Keep only group members based on assoc_prob mapping
catalog_ids = pd.Series(catalog['id']).astype(str).str.strip().str.replace(r'\.0$', '', regex=True)
assoc_prob = catalog['assoc_prob']

member_group = member[member['ASSOC_PROB'] > 0.5].copy()
member_group = member_group.merge(groups[['ID', 'RA', 'DEC', 'LAMBDA_STAR', 'Z']], on='ID', how='inner')

# Match member rows to catalog rows
member_group = member_group.set_index('GALID')

# Build arrays for RA/DEC lookup
catalog_ra = pd.Series(catalog['ra'], index=catalog_ids)
catalog_dec = pd.Series(catalog['dec'], index=catalog_ids)

member_group['ra_gal'] = catalog_ra.reindex(member_group.index)
member_group['dec_gal'] = catalog_dec.reindex(member_group.index)

# Drop rows without galaxy positions
member_group = member_group.dropna(subset=['ra_gal', 'dec_gal'])

# Haversine angular separation in degrees
ra1 = np.deg2rad(member_group['ra_gal'].to_numpy())
dec1 = np.deg2rad(member_group['dec_gal'].to_numpy())
ra2 = np.deg2rad(member_group['RA'].to_numpy())
dec2 = np.deg2rad(member_group['DEC'].to_numpy())

dra = ra1 - ra2
ddec = dec1 - dec2

a = np.sin(ddec / 2) ** 2 + np.cos(dec1) * np.cos(dec2) * np.sin(dra / 2) ** 2
c = 2 * np.arcsin(np.minimum(1.0, np.sqrt(a)))
member_group['theta_deg'] = np.rad2deg(c)

# Bring morphology + selection flags into the member table
member_group['final_mask'] = catalog_ids.isin(member_group.index).values & final_mask
member_group['morph_class'] = pd.Series(catalog['morph_unique_class'], index=catalog_ids).reindex(member_group.index)
member_group['has_any_morphology'] = pd.Series(catalog['has_any_morphology'], index=catalog_ids).reindex(member_group.index)

# Keep only quiescent low-mass group members with morphology
member_group = member_group[member_group['final_mask'] & member_group['has_any_morphology']]

print(f"Group members with morphology + selection: {len(member_group):,}")


In [None]:
# Two bins in group-centric distance and richness

# Distance bins by median
valid_theta = member_group['theta_deg'].to_numpy()
if valid_theta.size == 0:
    raise RuntimeError('No group members with valid positions to compute distances.')

median_theta = np.nanmedian(valid_theta)
member_group['dist_bin'] = np.where(member_group['theta_deg'] <= median_theta, 'inner', 'outer')

# Richness bins by median LAMBDA_STAR
valid_lambda = member_group['LAMBDA_STAR'].to_numpy()
median_lambda = np.nanmedian(valid_lambda)
member_group['richness_bin'] = np.where(member_group['LAMBDA_STAR'] <= median_lambda, 'low-richness', 'high-richness')

print(f"Median theta_deg: {median_theta:.4f} deg")
print(f"Median LAMBDA_STAR: {median_lambda:.3f}")


In [None]:
# Morphology fractions by redshift bin for group-centric distance bins
z_bins = np.arange(0.0, 5.6, 0.5)
z_labels = [f"{z_bins[i]:.1f} <= z < {z_bins[i+1]:.1f}" for i in range(len(z_bins) - 1)]

class_palette = {
    'late-disk': '#4C72B0',
    'early-disk': '#8172B2',
    's0': '#ED680F',
    'elliptical': '#C44E52'
}

desired_order = ['late-disk', 'early-disk', 's0', 'elliptical']

fig, axes = plt.subplots(1, 2, figsize=(16, 4.5), sharey=True)

for ax, (bin_label, title) in zip(axes, [('inner', 'Inner (<= median)'), ('outer', 'Outer (> median)')]):
    subset = member_group[member_group['dist_bin'] == bin_label]

    positions = np.arange(len(z_labels))
    bottom = np.zeros(len(z_labels))

    for cls in desired_order:
        counts = []
        for z_min, z_max in zip(z_bins[:-1], z_bins[1:]):
            bin_mask = (subset['Z'] >= z_min) & (subset['Z'] < z_max)
            total = np.count_nonzero(bin_mask)
            if total == 0:
                counts.append(0.0)
            else:
                cls_count = np.count_nonzero(subset['morph_class'][bin_mask] == cls)
                counts.append(cls_count / total)
        fractions = np.array(counts)
        ax.bar(
            positions,
            fractions,
            bottom=bottom,
            width=0.7,
            color=class_palette.get(cls, '#333333'),
            label=cls
        )
        bottom += fractions

    ax.set_xticks(positions)
    ax.set_xticklabels(z_labels, rotation=45, ha='right')
    ax.set_ylim(0, 1)
    ax.set_xlabel('Redshift bin')
    ax.set_title(title)
    ax.grid(axis='y', linestyle=':', alpha=0.4)

axes[0].set_ylabel('Morphology fraction')
handles = [
    plt.Line2D([0], [0], marker='s', color=class_palette.get(cls, '#333333'),
               linestyle='', markersize=8, label=cls)
    for cls in desired_order
]
axes[-1].legend(handles=handles, loc='upper left', bbox_to_anchor=(1.02, 1), frameon=False)

plt.tight_layout(rect=[0, 0, 0.88, 1])
plt.show()


In [None]:
# Morphology fractions by redshift bin for richness bins
z_bins = np.arange(0.0, 5.6, 0.5)
z_labels = [f"{z_bins[i]:.1f} <= z < {z_bins[i+1]:.1f}" for i in range(len(z_bins) - 1)]

class_palette = {
    'late-disk': '#4C72B0',
    'early-disk': '#8172B2',
    's0': '#ED680F',
    'elliptical': '#C44E52'
}

desired_order = ['late-disk', 'early-disk', 's0', 'elliptical']

fig, axes = plt.subplots(1, 2, figsize=(16, 4.5), sharey=True)

for ax, (bin_label, title) in zip(axes, [('low-richness', 'Low richness'), ('high-richness', 'High richness')]):
    subset = member_group[member_group['richness_bin'] == bin_label]

    positions = np.arange(len(z_labels))
    bottom = np.zeros(len(z_labels))

    for cls in desired_order:
        counts = []
        for z_min, z_max in zip(z_bins[:-1], z_bins[1:]):
            bin_mask = (subset['Z'] >= z_min) & (subset['Z'] < z_max)
            total = np.count_nonzero(bin_mask)
            if total == 0:
                counts.append(0.0)
            else:
                cls_count = np.count_nonzero(subset['morph_class'][bin_mask] == cls)
                counts.append(cls_count / total)
        fractions = np.array(counts)
        ax.bar(
            positions,
            fractions,
            bottom=bottom,
            width=0.7,
            color=class_palette.get(cls, '#333333'),
            label=cls
        )
        bottom += fractions

    ax.set_xticks(positions)
    ax.set_xticklabels(z_labels, rotation=45, ha='right')
    ax.set_ylim(0, 1)
    ax.set_xlabel('Redshift bin')
    ax.set_title(title)
    ax.grid(axis='y', linestyle=':', alpha=0.4)

axes[0].set_ylabel('Morphology fraction')
handles = [
    plt.Line2D([0], [0], marker='s', color=class_palette.get(cls, '#333333'),
               linestyle='', markersize=8, label=cls)
    for cls in desired_order
]
axes[-1].legend(handles=handles, loc='upper left', bbox_to_anchor=(1.02, 1), frameon=False)

plt.tight_layout(rect=[0, 0, 0.88, 1])
plt.show()
