# 02 - Galaxy Main Sequence Analysis (Figure 1)

This notebook generates **Figure 1** from the paper using the exact original code, adapted for the refactored data structure.

**Goal**: Reproduce Figure 1 exactly as in the original paper.

In [None]:
# Imports (adapted for refactored structure)
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import os

# Add src to path
sys.path.append('../src')

from utils.plotting import draw_grid_A
from utils.analysis import (
    sim_name, colors, zoom_name, zoom_colors, zoom_markers,
    SM_axis_label, SFR_axis_label
)
from utils.data_processing import load_zoom_data

%matplotlib inline

In [None]:
# Load processed simulation data (instead of loading raw .mat files)
with open('../data/sim_data_processed.pkl', 'rb') as f:
    sim_data = pickle.load(f)

print("Loaded processed simulation data:")
for sim in sim_name:
    if sim_data[sim] is not None:
        ngal = sim_data[sim]['ngal']
        print(f"{sim:>15s}: {ngal:>8,d} galaxies")
    else:
        print(f"{sim:>15s}: No data")

In [None]:
# Load zoom simulation data
zoom_data = load_zoom_data()

print("\nLoaded zoom simulation data:")
for zoom in zoom_name:
    if zoom_data[zoom] is not None:
        ngal = zoom_data[zoom]['ngal']
        print(f"{zoom:>15s}: {ngal:>8,d} galaxies")
    else:
        print(f"{zoom:>15s}: No data")

In [None]:
# EXACT ORIGINAL CODE (from cell 11 in SFHs_figures.ipynb)

# draw grid
minsm  = np.min([(np.min(sim_data[sim]['sm'])) for sim in sim_name[:-1]]) # exclude UM
maxsm  = np.max([(np.max(sim_data[sim]['sm'])) for sim in sim_name])
minsfr = np.min([(np.min(sim_data[sim]['sfh_raw'].T[-1][sim_data[sim]['sfh_raw'].T[-1] > 0.])) for sim in sim_name[:-2]]) # ignore 0s, UM, SC
maxsfr = np.max([(np.max(sim_data[sim]['sfh_raw'].T[-1])) for sim in sim_name])
ax = draw_grid_A(xlabel = SM_axis_label, ylabel=SFR_axis_label, 
                 xlim = [minsm, maxsm], ylim = [minsfr, maxsfr])


# plot
for sim in sim_name[::-1]:
    # plot individual sims
    sm = sim_data[sim]['sm']
    sfr = sim_data[sim]['sfh_raw'].T[-1]
    if (sim == 'Mufasa') | (sim == 'Simba'):
        v = 1e10
    else:
        v = 1e9
    mask = sm <= v
    
    c = colors[sim]
    ax[sim].loglog(sm[mask], sfr[mask], '.', c='grey', ms=1)
    ax[sim].loglog(sm[~mask], sfr[~mask], '.', c=c, ms=1)
    
    ax[sim].axvline(v,c='r')
    
    ax[sim].text(10**8.2,10**1.5,sim,fontsize=16,bbox=dict(facecolor='white', 
                                                           edgecolor='white', linewidth=0))
    
    # xticks by hand
    ax[sim].set_xticks([1e8,1e9,1e10,1e11,1e12])
    ax[sim].set_xticklabels(["","$10^9$","","$10^{11}$",""])
    ax[sim].set_xticks([7e7,8e7,9e7,
                        2e8,3e8,4e8,5e8,6e8,7e8,9e8,
                        2e9,3e9,4e9,5e9,6e9,7e9,9e9,
                        2e10,3e10,4e10,5e10,6e10,7e10,9e10,
                        2e11,3e11,4e11,5e11,6e11,7e11,9e11,
                        2e12,3e12,4e12],minor=True)
    
    # plot combined  
    ax['large'].loglog(sm[mask], sfr[mask], '.', c='grey', ms=3, alpha=0.2)
    ax['large'].loglog(sm[~mask], sfr[~mask], '.', c=c, ms=3, alpha=0.2)

# Add mass cut lines to combined panel
ax['large'].axvline(1e9,c='r')
ax['large'].axvline(1e10,c='r')

# Plot zoom simulations on top (from original code)
for zoom in zoom_name:
    if zoom_data[zoom] is not None:
        sm = zoom_data[zoom]['sm']
        sfr = zoom_data[zoom]['sfh_raw'].T[-1]
        ax['large'].loglog(sm, sfr, '.', c=zoom_colors[zoom], marker=zoom_markers[zoom], 
                          mec='k', ms=10, label=zoom)

ax['large'].text(10**8,10**1.68,'All simulations',fontsize=16, 
                 bbox=dict(facecolor='white', edgecolor='white', linewidth=0))

plt.savefig('../figures/figure1_main_sequence.png', bbox_inches='tight', dpi=300)
plt.savefig('../figures/figure1_main_sequence.pdf', bbox_inches='tight')
    
plt.show()
plt.close()

## Summary

This notebook reproduces **Figure 1** exactly as in the original paper, showing the galaxy main sequence (stellar mass vs star formation rate) for all simulations. 

Key features:
- Points below mass cuts shown in grey, above mass cuts in simulation colors
- Red vertical lines indicate mass cuts ($10^9 M_\odot$ for most, $10^{10} M_\odot$ for Mufasa/Simba)
- Combined panel shows all simulations together
- Manual tick control for precise formatting

**Files generated:**
- `../figures/figure1_main_sequence.png`
- `../figures/figure1_main_sequence.pdf`