
# Build monthly SST anomaly forcing (1955–2099) from an annual anomaly CSV

This notebook takes an **annual SST anomaly** time series (e.g., `anomaly_df.csv` with columns `year, anomaly`) and produces:

- A **monthly anomaly** CSV for **1955–2099**
- An **annual anomaly** CSV for **1955–2099** (derived from the monthly series)

This is designed for ecological model forcing where you want a smooth monthly anomaly trajectory but only have annual projections for part of the period.

## Default assumptions (editable)
- Years **before the first provided year** (e.g., before 2021) are set to **0.0°C anomaly**
- Years **after the last provided year** (e.g., 2090–2099) are held at the **last available anomaly**
- Annual values are converted to monthly via **time interpolation** between January-1 anchors

You can switch these behaviors in the settings cell below.


## 0) Imports

In [None]:

import pandas as pd
import numpy as np
from pathlib import Path


## 1) Settings

In [None]:

# -------------------------
# INPUT
# -------------------------
# Put anomaly_df.csv in the same folder as this notebook, or set a full path.
INPUT_CSV = "anomaly_df.csv"   # must contain columns: year, anomaly

# -------------------------
# OUTPUT WINDOW
# -------------------------
START_YEAR = 1955
END_YEAR = 2099

# -------------------------
# BEHAVIOR OUTSIDE PROVIDED YEARS
# -------------------------
# For years before the first year in INPUT_CSV:
PRE_FIRST_YEAR_VALUE = 0.0     # set to np.nan if you prefer missing values

# For years after the last year in INPUT_CSV:
POST_LAST_YEAR_MODE = "hold"   # "hold" or "extrapolate_linear"
# If extrapolating, use the last N years to fit a line:
EXTRAPOLATE_N_YEARS = 10

# -------------------------
# ANNUAL -> MONTHLY INTERPOLATION
# -------------------------
# Anchor annual anomalies at Jan 1 of each year ("jan1") or Jul 1 ("jul1")
ANCHOR = "jan1"  # "jan1" or "jul1"


## 2) Load and validate input

In [None]:

input_path = Path(INPUT_CSV)
if not input_path.exists():
    raise FileNotFoundError(
        f"Could not find {INPUT_CSV!r}. Put it next to this notebook or set INPUT_CSV to the correct path."
    )

df_in = pd.read_csv(input_path)

# Basic validation / cleanup
required = {"year", "anomaly"}
if not required.issubset(df_in.columns):
    raise ValueError(f"Input CSV must contain columns {required}, but has {list(df_in.columns)}")

df_in = df_in[["year", "anomaly"]].copy()
df_in["year"] = df_in["year"].astype(int)
df_in = df_in.sort_values("year").drop_duplicates("year")

first_year = int(df_in["year"].min())
last_year = int(df_in["year"].max())

print("Loaded:", input_path.resolve())
print("Input years:", first_year, "to", last_year)
df_in.head()


## 3) Build a complete annual series (START_YEAR–END_YEAR)

In [None]:

years_full = pd.DataFrame({"year": np.arange(START_YEAR, END_YEAR + 1)})
annual = years_full.merge(df_in, on="year", how="left")

# Fill before first provided year
annual.loc[annual["year"] < first_year, "anomaly"] = PRE_FIRST_YEAR_VALUE

# Fill within provided years: forward-fill is safe if input has gaps; otherwise it does nothing
annual["anomaly"] = annual["anomaly"].ffill()

# Extend after last provided year
if END_YEAR > last_year:
    if POST_LAST_YEAR_MODE == "hold":
        annual.loc[annual["year"] > last_year, "anomaly"] = float(df_in.loc[df_in["year"].idxmax(), "anomaly"])
    elif POST_LAST_YEAR_MODE == "extrapolate_linear":
        # Fit a line to the last N years of available data
        tail = df_in.tail(min(EXTRAPOLATE_N_YEARS, len(df_in))).copy()
        x = tail["year"].values.astype(float)
        y = tail["anomaly"].values.astype(float)
        m, b = np.polyfit(x, y, 1)
        mask = annual["year"] > last_year
        annual.loc[mask, "anomaly"] = m * annual.loc[mask, "year"].values + b
    else:
        raise ValueError("POST_LAST_YEAR_MODE must be 'hold' or 'extrapolate_linear'")

annual.head(10), annual.tail(10)


## 4) Convert annual anomalies to monthly anomalies (smooth monthly series)

In [None]:

# Monthly timeline (month starts)
monthly_dates = pd.date_range(f"{START_YEAR}-01-01", f"{END_YEAR}-12-01", freq="MS")

# Choose anchor dates for annual anomalies
if ANCHOR.lower() == "jan1":
    anchor_dates = pd.to_datetime(annual["year"].astype(str) + "-01-01")
elif ANCHOR.lower() == "jul1":
    anchor_dates = pd.to_datetime(annual["year"].astype(str) + "-07-01")
else:
    raise ValueError("ANCHOR must be 'jan1' or 'jul1'")

anchors = pd.Series(annual["anomaly"].values, index=anchor_dates).sort_index()

# Ensure endpoints exist for interpolation
anchors.loc[pd.Timestamp(f"{START_YEAR}-01-01")] = anchors.loc[pd.Timestamp(f"{START_YEAR}-01-01")] if pd.Timestamp(f"{START_YEAR}-01-01") in anchors.index else PRE_FIRST_YEAR_VALUE
anchors.loc[pd.Timestamp(f"{END_YEAR}-01-01")] = anchors.loc[pd.Timestamp(f"{END_YEAR}-01-01")] if pd.Timestamp(f"{END_YEAR}-01-01") in anchors.index else anchors.iloc[-1]
anchors = anchors.sort_index()

# Interpolate to monthly
monthly = anchors.reindex(anchors.index.union(monthly_dates)).sort_index().interpolate(method="time")
monthly = monthly.reindex(monthly_dates).ffill().bfill()

monthly_df = pd.DataFrame({"time": monthly.index, "anomaly_C": monthly.values})
monthly_df["year"] = monthly_df["time"].dt.year
monthly_df["month"] = monthly_df["time"].dt.month

monthly_df.head(12), monthly_df.tail(12)


## 5) Derive annual anomalies from the monthly series

In [None]:

annual_df = monthly_df.groupby("year", as_index=False)["anomaly_C"].mean()
annual_df.head(), annual_df.tail()


## 6) Save outputs

In [None]:

monthly_out = Path(f"insular_pacific_hawaii_sst_anomaly_monthly_{START_YEAR}_{END_YEAR}_from_paper.csv")
annual_out  = Path(f"insular_pacific_hawaii_sst_anomaly_annual_{START_YEAR}_{END_YEAR}_from_paper.csv")

monthly_df.to_csv(monthly_out, index=False)
annual_df.to_csv(annual_out, index=False)

print("Wrote:", monthly_out.resolve())
print("Wrote:", annual_out.resolve())


## 7) Quick plots (optional)

In [None]:

import matplotlib.pyplot as plt

plt.figure()
plt.plot(monthly_df["time"], monthly_df["anomaly_C"])
plt.title("Monthly SST anomaly forcing")
plt.xlabel("Time")
plt.ylabel("Anomaly (°C)")
plt.show()

plt.figure()
plt.plot(annual_df["year"], annual_df["anomaly_C"], marker="o", linewidth=1)
plt.title("Annual SST anomaly (from monthly mean)")
plt.xlabel("Year")
plt.ylabel("Anomaly (°C)")
plt.show()
