In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np

# Load the data
file_path = '~/Desktop/Summer 2025 Python/c46204.csv'
ds = pd.read_csv(file_path)

# Parse datetime
ds['DATE'] = pd.to_datetime(ds['DATE'], format='%m/%d/%Y %H:%M')
ds = ds[(ds['DATE'] >= '2014-01-01') & (ds['DATE'] <= '2026-01-01')]

# Extract wind direction and speed (first set of columns)
ds['WDIR'] = pd.to_numeric(ds['WDIR'], errors='coerce')     # Wind Direction (degrees)
ds['WSPD'] = pd.to_numeric(ds['WSPD'], errors='coerce')     # Wind Speed (m/s or knots)
ds['SSTP'] = pd.to_numeric(ds['SSTP'], errors='coerce')     # Sea surface temp

# Plot
fig, ax1 = plt.subplots(figsize=(12, 5))

# Wind speed
ax1.scatter(ds['DATE'], ds['WSPD'], label='Wind Speed', color='blue', s = 1)
ax1.set_ylabel('Wind Speed (m/s)', color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# Formatting
ax1.set_title('Wind Speed and Direction from C46204')
ax1.set_xlabel('Date')
ax1.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))

fig.tight_layout()

# Plot
fig, ax1 = plt.subplots(figsize=(12, 5))

# Wind speed
ax1.scatter(ds['DATE'], ds['SSTP'], label='Sea Surface Temperature', color='red', s = 1)
ax1.set_ylabel('Sea Surface Temperature', color='red')
ax1.tick_params(axis='y', labelcolor='red')

# Formatting
ax1.set_title('Sea Surface Temperature from C46204')
ax1.set_xlabel('Date')
ax1.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))

fig.tight_layout()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np

# --- Load data ---
file_path = '~/Desktop/Summer 2025 Python/c46204.csv'
ds = pd.read_csv(file_path)

# --- Parse datetime & filter ---
ds['DATE'] = pd.to_datetime(ds['DATE'], format='%m/%d/%Y %H:%M')
ds = ds[(ds['DATE'] >= '2021-01-01') & (ds['DATE'] <= '2026-01-01')]

# --- Ensure numeric ---
ds['WDIR'] = pd.to_numeric(ds['WDIR'], errors='coerce')   # deg true (from)
ds['WSPD'] = pd.to_numeric(ds['WSPD'], errors='coerce')   # m/s
ds['ATMS'] = pd.to_numeric(ds['ATMS'], errors='coerce')   # mbar (hPa)
ds['DRYT'] = pd.to_numeric(ds.get('DRYT'), errors='coerce')  # air temp (°C), if present

# --- Air density from pressure & temp ---
# ATMS: mbar -> Pa (×100). DRYT: °C -> K (+273.15)
# ρ_air = p / (R * T), R = 287.06 J kg^-1 K^-1
R = 287.06
p_Pa = ds['ATMS'] * 100.0
T_K  = 273.15 + ds['DRYT']
ds['rho_air'] = p_Pa / (R * T_K)

# # Fallback where missing/non-finite -> 1.225 kg/m^3
# rho_air = rho_air.where(np.isfinite(rho_air), other=1.225)
# ds['rho_air'] = rho_air

# --- Function: Ekman transport projected onto fixed 135° True OFFSHORE axis
# Convention HERE (PFEL/Bakun): OFFSHORE (toward 135°) => POSITIVE
def compute_ekman_transport_bakun(speed, dir_from_deg_true, rho_air,
                                  phi_for_f=51.38, Cd=1.3e-3, rho_w=1025.0):
    """
    Returns M_offshore (m^2/s per unit width) projected along 135° True,
    with POSITIVE = offshore (upwelling-favorable), NEGATIVE = onshore.
    """
    # Wind (met 'from') -> east/north (toward)
    th = np.deg2rad(dir_from_deg_true)
    u = -speed * np.sin(th)  # east
    v = -speed * np.cos(th)  # north
    W = np.hypot(u, v)

    # Stress (east,north)
    tau_u = rho_air * Cd * W * u
    tau_v = rho_air * Cd * W * v

    # Ekman transport vector (east,north): M = (k × tau)/(rho_w f)
    f = 2.0 * 7.2921e-5 * np.sin(np.deg2rad(phi_for_f))
    M_e = -tau_v / (rho_w * f)  # east
    M_n =  tau_u / (rho_w * f)  # north

    # Unit vector TOWARD 135° (PFEL offshore axis)
    theta_off = np.deg2rad(135.0)
    e_off = (np.sin(theta_off), np.cos(theta_off))  # (east, north)

    # Project: positive along e_off = offshore (Bakun positive)
    M_off = M_e * e_off[0] + M_n * e_off[1]
    return M_off

# --- Calculate Bakun-style index per 100 m shoreline ---
ds['M'] = compute_ekman_transport_bakun(ds['WSPD'], ds['WDIR'], ds['rho_air']) * 100.0  # m^3/s per 100 m

# --- Plot as bars (positive = upwelling/offshore; negative = downwelling/onshore) ---
fig, ax = plt.subplots(figsize=(12,5))

colors = np.where(ds['M'] >= 0, 'steelblue', 'tomato')  # blue = upwelling (+), red = downwelling (-)
ax.bar(ds['DATE'], ds['M'], width=1.0, color=colors, align='center')

ax.axhline(0, color='k', ls='--', lw=0.8)
ax.set_ylabel(r'Bakun Upwelling Index (m$^3$ s$^{-1}$ per 100 m)', fontsize=12)
ax.set_xlabel('Date', fontsize=12)
ax.set_title('Bakun Index at C46204 (Offshore Axis = 135° True; + = Upwelling)', fontsize=14)

date_fmt = DateFormatter('%b %Y')
ax.xaxis.set_major_formatter(date_fmt)

plt.tight_layout()
plt.show()


import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
from pathlib import Path

# -----------------------------
# Config
# -----------------------------
# Path to your PFEL daily file that contains the header and the "YYYYMMDD Index" table
pfel_path = Path('~/Desktop/p05dayac.all').expanduser()

# Optional date filter (set to None to keep all)
DATE_START = '2021-01-01'          # e.g., '2021-01-01'
DATE_END   = None          # e.g., '2026-01-01'

# Toggle a 4-week time-based rolling mean bar plot instead of daily
USE_4W_ROLLING = False     # set True for rolling bars

# -----------------------------
# Parse the PFEL file
# -----------------------------
# Find the row index where the actual two-column table starts (line after "YYYYMMDD Index")
with open(pfel_path, 'r') as f:
    lines = f.readlines()

start_idx = None
for i, line in enumerate(lines):
    if line.strip().startswith('YYYYMMDD'):
        start_idx = i + 1  # data begins after this line
        break

if start_idx is None:
    raise ValueError("Couldn't find the 'YYYYMMDD Index' header in the file.")

# Read the two columns (YYYYMMDD, Index); whitespace-delimited
df = pd.read_csv(
    pfel_path,
    delim_whitespace=True,
    header=None,
    names=['DATE', 'Index'],
    skiprows=start_idx
)

# Drop any non-numeric DATE rows (e.g., trailing lines)
df = df[pd.to_numeric(df['DATE'], errors='coerce').notna()].copy()

# Convert types
df['DATE'] = pd.to_datetime(df['DATE'].astype(str), format='%Y%m%d')
df['Index'] = pd.to_numeric(df['Index'], errors='coerce')

# Replace -9999 with NaN and drop
df.loc[df['Index'] == -9999, 'Index'] = np.nan
df = df.dropna(subset=['Index'])

# Optional date filter
if DATE_START is not None:
    df = df[df['DATE'] >= pd.to_datetime(DATE_START)]
if DATE_END is not None:
    df = df[df['DATE'] <= pd.to_datetime(DATE_END)]

# -----------------------------
# Prepare series to plot
# -----------------------------
# PFEL convention: Positive = offshore (upwelling), Negative = onshore (downwelling)
# Units already: m^3 s^-1 per 100 m coastline (from PFEL docs)
df = df.sort_values('DATE').set_index('DATE')

if USE_4W_ROLLING:
    series = df['Index'].rolling(window='28D', min_periods=1).mean()
    plot_title_suffix = ' (4-week rolling mean)'
else:
    series = df['Index']
    plot_title_suffix = ' (Daily)'

# -----------------------------
# Plot bars matching your C46204 style
# -----------------------------
fig, ax = plt.subplots(figsize=(12, 5))

colors = np.where(series.values >= 0, 'steelblue', 'tomato')  # upwelling (+) vs downwelling (−)
ax.bar(series.index, series.values, width=1.0, color=colors, align='center')

ax.axhline(0, color='k', ls='--', lw=0.8)
ax.set_ylabel(r'Bakun Upwelling Index (m$^3$ s$^{-1}$ per 100 m)', fontsize=12)
ax.set_xlabel('Date', fontsize=12)
ax.set_title('Bakun Index at 51°N, 131°W — + Offshore (Upwelling)' + plot_title_suffix, fontsize=14)

date_fmt = DateFormatter('%b %Y')
ax.xaxis.set_major_formatter(date_fmt)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# =========================
# Config (edit paths)
# =========================
c46204_path = Path('~/Desktop/Summer 2025 Python/c46204.csv').expanduser()
pfel_path   = Path('~/Desktop/p05dayac.all').expanduser()  # PFEL daily Bakun text file
out_path    = Path('~/Desktop/Summer 2025 Python/bakun_combined_daily.csv').expanduser()

DATE_START = '2000-01-01'   # or None
DATE_END   = '2026-01-01'   # or None
assume_knots = False        # set True if WSPD in c46204 is in knots

# =========================
# Helpers
# =========================
def compute_ekman_transport_bakun(speed, dir_from_deg_true, rho_air,
                                  phi_for_f=51.38, Cd=1.3e-3, rho_w=1025.0):
    """
    Returns M_offshore (m^2/s per unit width) projected along 135° True,
    with POSITIVE = offshore (upwelling-favorable), NEGATIVE = onshore.
    Inputs can be pandas Series; vectorized operations are used.
    """
    th = np.deg2rad(dir_from_deg_true)
    u = -speed * np.sin(th)  # eastward wind (toward)
    v = -speed * np.cos(th)  # northward wind (toward)
    W = np.hypot(u, v)

    tau_u = rho_air * Cd * W * u
    tau_v = rho_air * Cd * W * v

    f = 2.0 * 7.2921e-5 * np.sin(np.deg2rad(phi_for_f))
    M_e = -tau_v / (rho_w * f)  # east
    M_n =  tau_u / (rho_w * f)  # north

    theta_off = np.deg2rad(135.0)  # PFEL offshore axis
    e_off_e = np.sin(theta_off)
    e_off_n = np.cos(theta_off)

    M_off = M_e * e_off_e + M_n * e_off_n
    return M_off  # m^2/s per unit width

def parse_pfel_daily_file(pfel_file_path):
    """
    Parses a PFEL daily Bakun index file that includes a header and a
    'YYYYMMDD Index' table. Returns a DataFrame with columns: DATE, PFEL (m^3/s per 100 m).
    """
    with open(pfel_file_path, 'r') as f:
        lines = f.readlines()

    start_idx = None
    for i, line in enumerate(lines):
        if line.strip().startswith('YYYYMMDD'):
            start_idx = i + 1
            break
    if start_idx is None:
        raise ValueError("Couldn't find the 'YYYYMMDD' header in PFEL file.")

    df = pd.read_csv(
        pfel_file_path,
        delim_whitespace=True,
        header=None,
        names=['DATE', 'Index'],
        skiprows=start_idx
    )
    df = df[pd.to_numeric(df['DATE'], errors='coerce').notna()].copy()
    df['DATE'] = pd.to_datetime(df['DATE'].astype(str), format='%Y%m%d')
    df['Index'] = pd.to_numeric(df['Index'], errors='coerce')
    df.loc[df['Index'] == -9999, 'Index'] = np.nan
    df = df.dropna(subset=['Index']).sort_values('DATE').reset_index(drop=True)
    df = df.rename(columns={'Index': 'PFEL'})
    return df[['DATE', 'PFEL']]

# =========================
# 1) Load & process C46204 buoy -> daily Bakun-style index (per 100 m)
# =========================
ds = pd.read_csv(c46204_path)
ds['DATE'] = pd.to_datetime(ds['DATE'], format='%m/%d/%Y %H:%M', errors='coerce')

if DATE_START is not None:
    ds = ds[ds['DATE'] >= pd.to_datetime(DATE_START)]
if DATE_END is not None:
    ds = ds[ds['DATE'] <= pd.to_datetime(DATE_END)]

# Ensure numeric
for col in ['WDIR', 'WSPD', 'ATMS', 'DRYT']:
    if col in ds.columns:
        ds[col] = pd.to_numeric(ds[col], errors='coerce')

# Optional: convert knots to m/s
if assume_knots and 'WSPD' in ds.columns:
    ds['WSPD'] = ds['WSPD'] * 0.514444

# Air density from pressure & dry-air temperature (fallbacks applied)
# ATMS given in mbar (hPa) -> Pa; DRYT in °C -> K; R = 287.06 J/(kg K)
R = 287.06
p_Pa = (ds['ATMS'] * 100.0) if 'ATMS' in ds.columns else np.nan
T_K  = (273.15 + ds['DRYT']) if 'DRYT' in ds.columns else 273.15 + 15.0
rho_air = p_Pa / (R * T_K)
rho_air = rho_air.where(np.isfinite(rho_air), other=1.225)  # fallback to 1.225 kg/m^3

# Compute per-sample M (m^2/s), then convert to per-100 m shoreline
M_off = compute_ekman_transport_bakun(
    speed=ds['WSPD'],
    dir_from_deg_true=ds['WDIR'],
    rho_air=rho_air
)
ds['C46204'] = M_off * 100.0  # m^3/s per 100 m

# Daily average (date-only, local naive)
ds['DATE_DAY'] = ds['DATE'].dt.floor('D')
buoy_daily = (ds.groupby('DATE_DAY', as_index=False)['C46204']
                .mean()
                .rename(columns={'DATE_DAY': 'DATE'}))

# =========================
# 2) Load & process PFEL daily file
# =========================
pfel_daily = parse_pfel_daily_file(pfel_path)

# Optional: clip PFEL to buoy date window to ensure overlap
start = buoy_daily['DATE'].min()
end   = buoy_daily['DATE'].max()
pfel_daily = pfel_daily[(pfel_daily['DATE'] >= start) & (pfel_daily['DATE'] <= end)]

# =========================
# 3) Merge on DATE and save
# =========================
combined = (pd.merge(buoy_daily, pfel_daily, on='DATE', how='inner')
              .sort_values('DATE')
              .reset_index(drop=True))

# Columns: DATE, C46204 (m^3/s per 100 m), PFEL (m^3/s per 100 m)
combined.to_csv(out_path, index=False)

print(f"Saved {len(combined)} daily rows to: {out_path}")
print(combined.head(5))

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from pathlib import Path

# --- Load merged daily CSV from the previous step ---
combined_path = Path('~/Desktop/Summer 2025 Python/bakun_combined_daily.csv').expanduser()
df = pd.read_csv(combined_path, parse_dates=['DATE']).sort_values('DATE')

# --- Compute per-panel ranges for sizing ---
def total_range(series):
    if not series.notna().any():
        return 1.0
    return float(series.max() - series.min())

range_c = total_range(df['C46204'])
range_p = total_range(df['PFEL'])

height_ratios = [range_c, range_p]

# --- Colors by sign ---
colors_c = np.where(df['C46204'] >= 0, 'steelblue', 'tomato')
colors_p = np.where(df['PFEL']   >= 0, 'steelblue', 'tomato')

# --- Make subplots ---
fig, (ax1, ax2) = plt.subplots(
    nrows=2, ncols=1, figsize=(12, 8), sharex=True,
    gridspec_kw={'height_ratios': height_ratios}
)

# --- Top panel: C46204 ---
ax1.bar(df['DATE'], df['C46204'], width=1.0, color=colors_c, align='center')
ax1.axhline(0, color='k', ls='--', lw=0.8)
ax1.set_ylabel(r'C46204 (m$^3$ s$^{-1}$ per 100 m)', fontsize=12)
ax1.set_title('Daily Bakun Index — C46204 vs PFEL 51°N, 131°W', fontsize=14)
ax1.set_ylim(df['C46204'].min(), df['C46204'].max())

# --- Bottom panel: PFEL ---
ax2.bar(df['DATE'], df['PFEL'], width=1.0, color=colors_p, align='center')
ax2.axhline(0, color='k', ls='--', lw=0.8)
ax2.set_ylabel(r'PFEL (m$^3$ s$^{-1}$ per 100 m)', fontsize=12)
ax2.set_ylim(df['PFEL'].min(), df['PFEL'].max())

# --- Shared x-axis formatting ---
ax2.set_xlabel('Date', fontsize=12)
date_fmt = DateFormatter('%b %Y')
ax2.xaxis.set_major_formatter(date_fmt)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from pathlib import Path

# --- Load merged daily CSV ---
combined_path = Path('~/Desktop/Summer 2025 Python/bakun_combined_daily.csv').expanduser()
df = pd.read_csv(combined_path, parse_dates=['DATE']).sort_values('DATE')

# --- Ensure daily frequency if needed (optional) ---
# df = df.set_index('DATE').asfreq('D')  # uncomment only if you want explicit daily rows
# df = df.reset_index()

# --- 15-day rolling means (time-based window) ---
df_rolling = df.copy()
df_rolling = df_rolling.set_index('DATE')
roll_c = df_rolling['C46204'].rolling(window='30D', min_periods=1).mean()
roll_p = df_rolling['PFEL'].rolling(window='30D', min_periods=1).mean()

# Back to columns
roll = pd.DataFrame({'DATE': roll_c.index, 'C46204_roll15': roll_c.values, 'PFEL_roll15': roll_p.values})

# --- Compute ranges for subplot sizing (use smoothed series) ---
def total_range(series):
    if not np.isfinite(series).any():
        return 1.0
    smin = np.nanmin(series)
    smax = np.nanmax(series)
    return float((smax - smin) if np.isfinite(smax - smin) and (smax - smin) > 0 else 1.0)

range_c = total_range(roll['C46204_roll15'].values)
range_p = total_range(roll['PFEL_roll15'].values)
height_ratios = [range_c, range_p]

# --- Colors by sign of the rolling mean ---
colors_c = np.where(roll['C46204_roll15'] >= 0, 'steelblue', 'tomato')
colors_p = np.where(roll['PFEL_roll15']   >= 0, 'steelblue', 'tomato')

# --- Build figure ---
fig, (ax1, ax2) = plt.subplots(
    nrows=2, ncols=1, figsize=(12, 8), sharex=True,
    gridspec_kw={'height_ratios': height_ratios}
)

# --- Top: C46204 (15D rolling) ---
ax1.bar(roll['DATE'], roll['C46204_roll15'], width=1.0, color=colors_c, align='center')
ax1.axhline(0, color='k', ls='--', lw=0.8)
ax1.set_ylabel(r'C46204 (15-day mean)  [m$^3$ s$^{-1}$ per 100 m]', fontsize=12)
ax1.set_title('Bakun Upwelling Index — 15-day Rolling Averages', fontsize=14)
ax1.set_ylim(np.nanmin(roll['C46204_roll15']), np.nanmax(roll['C46204_roll15']))

# --- Bottom: PFEL (15D rolling) ---
ax2.bar(roll['DATE'], roll['PFEL_roll15'], width=1.0, color=colors_p, align='center')
ax2.axhline(0, color='k', ls='--', lw=0.8)
ax2.set_ylabel(r'PFEL 51°N, 131°W (15-day mean)  [m$^3$ s$^{-1}$ per 100 m]', fontsize=12)
ax2.set_ylim(np.nanmin(roll['PFEL_roll15']), np.nanmax(roll['PFEL_roll15']))

# --- Shared x-axis ---
ax2.set_xlabel('Date', fontsize=12)
date_fmt = DateFormatter('%b %Y')
ax2.xaxis.set_major_formatter(date_fmt)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from scipy import stats

# --- Load merged daily CSV ---
combined_path = Path('~/Desktop/Summer 2025 Python/bakun_combined_daily.csv').expanduser()
df = pd.read_csv(combined_path, parse_dates=['DATE']).sort_values('DATE')

# --- Filter to overlapping valid data ---
df_clean = df.dropna(subset=['C46204', 'PFEL'])

# Optional: filter to a specific date range
start_date = "2020-01-01"
end_date   = "2025-01-01"
mask = (df_clean['DATE'] >= start_date) & (df_clean['DATE'] <= end_date)
df_filtered = df_clean.loc[mask]

# --- Run linear regression ---
x = df_filtered['C46204'].values
y = df_filtered['PFEL'].values

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

print(f"Slope:      {slope:.3f}")
print(f"Intercept:  {intercept:.3f}")
print(f"R²:         {r_value**2:.3f}")
print(f"P-value:    {p_value:.3e}")
print(f"Std Error:  {std_err:.3f}")

# --- Scatter + regression line ---
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(x, y, alpha=0.5, label='Daily values')
ax.plot(x, slope*x + intercept, color='red', lw=2, label=f'Fit: y = {slope:.2f}x + {intercept:.2f}')
ax.set_xlabel('C46204  [m$^3$ s$^{-1}$ per 100 m]')
ax.set_ylabel('PFEL  [m$^3$ s$^{-1}$ per 100 m]')
ax.legend()
ax.grid(True)

plt.title(f'C46204 vs PFEL ({start_date} to {end_date})', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from scipy import stats

# --- Load merged daily CSV ---
combined_path = Path('~/Desktop/Summer 2025 Python/bakun_combined_daily.csv').expanduser()
df = pd.read_csv(combined_path, parse_dates=['DATE']).sort_values('DATE')

# --- Calculate 15-day rolling means ---
df['C46204_roll'] = df['C46204'].rolling(window=15, center=True, min_periods=1).mean()
df['PFEL_roll']   = df['PFEL'].rolling(window=15, center=True, min_periods=1).mean()
df_filled = df.copy()
df_filled['C46204_roll'] = df_filled['C46204_roll'].interpolate()
df_filled['PFEL_roll']   = df_filled['PFEL_roll'].interpolate()
df = df_filled
# --- Filter to overlapping valid data ---
df_clean = df.dropna(subset=['C46204_roll', 'PFEL_roll'])

# Optional: restrict to a specific date range
start_date = "2020-01-01"
end_date   = "2025-01-01"
mask = (df_clean['DATE'] >= start_date) & (df_clean['DATE'] <= end_date)
df_filtered = df_clean.loc[mask]

# --- Run linear regression on rolling means ---
x = df_filtered['C46204_roll'].values
y = df_filtered['PFEL_roll'].values

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

print(f"Slope:      {slope:.3f}")
print(f"Intercept:  {intercept:.3f}")
print(f"R²:         {r_value**2:.3f}")
print(f"P-value:    {p_value:.3e}")
print(f"Std Error:  {std_err:.3f}")

# --- Scatter + regression line ---
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(x, y, alpha=0.6, label='15-day rolling means')
ax.plot(x, slope*x + intercept, color='red', lw=2, label=f'Fit: y = {slope:.2f}x + {intercept:.2f}')
ax.set_xlabel('C46204 15-day mean [m$^3$ s$^{-1}$ per 100 m]')
ax.set_ylabel('PFEL 15-day mean [m$^3$ s$^{-1}$ per 100 m]')
ax.legend()
ax.grid(True)

plt.title(f'C46204 vs PFEL (15-day mean, {start_date} to {end_date})', fontsize=14)
plt.tight_layout()
plt.show()

# chagpt wants to try some lag analysis #

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is your combined dataframe with columns 'C46204' and 'PFEL'
# and a datetime index, and you've already done the 15-day rolling mean
rolling_window = 15
df_roll = df[['C46204', 'PFEL']].rolling(window=rolling_window, center=True, min_periods=1).mean()

# Drop rows where both are NaN
df_roll = df_roll.dropna(subset=['C46204', 'PFEL'])

# Function to compute lag correlation
def lag_corr(x, y, lag):
    if lag > 0:
        return x.corr(y.shift(lag))
    elif lag < 0:
        return x.shift(-lag).corr(y)
    else:
        return x.corr(y)

max_lag = 180  # days
lags = np.arange(-max_lag, max_lag+1)
correlations = [lag_corr(df_roll['C46204'], df_roll['PFEL'], lag) for lag in lags]

# Plot
plt.figure(figsize=(10, 6))
plt.plot(lags, correlations, marker='o')
plt.axhline(0, color='k', linestyle='--')
plt.axvline(0, color='k', linestyle='--')
plt.xlabel('Lag (days)\nPositive = PFEL lags C46204')
plt.ylabel('Pearson r')
plt.title(f'Lag Correlation (15-day mean, ±{max_lag} days)')
plt.grid(True)
plt.show()

# lets do some more linear regressions this time sorta mimicking what jackson et al 2021 did:

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import numpy as np
import os
import cmocean as cm
import waypoint_distance as wd
import pandas as pd
from pathlib import Path
from datetime import datetime
from matplotlib.dates import DateFormatter
import gsw
import matplotlib.dates as mdates
%matplotlib widget

# cube = xr.open_dataset(os.path.expanduser('~/Desktop/Summer 2025 Python/calvert_cube.nc'))
new_cube = xr.open_dataset(os.path.expanduser('~/Desktop/Summer 2025 Python/2024_2025_transect_cube.nc'))
new_cube = new_cube.sel(transect=new_cube.transect != '20250317_out')
new_cube = new_cube.assign_coords(along=new_cube['along'] - 25500)
new_cube = new_cube.assign_coords(along = new_cube['along']* (-1))

import pandas as pd
import numpy as np
from scipy import stats

def _pick_var(ds, candidates):
    """Return first existing variable name from candidates list (or None)."""
    for name in candidates:
        if name and name in ds.variables:
            return name
    return None

def _to_named_series(obj, value_col=None, name='index'):
    """Normalize Series/DataFrame -> Series with DatetimeIndex and name."""
    if isinstance(obj, pd.Series):
        s = obj.copy()
        s.index = pd.to_datetime(s.index)
        s.name = name if s.name is None else s.name
        return s
    if isinstance(obj, pd.DataFrame):
        df = obj.copy()
        if 'DATE' in df.columns:
            df['DATE'] = pd.to_datetime(df['DATE'])
            df = df.set_index('DATE')
        else:
            if not isinstance(df.index, pd.DatetimeIndex):
                raise ValueError("Index must be datetime or provide a 'DATE' column.")
        if value_col is None:
            # choose a likely value column
            for guess in ['PFEL', 'C46204', 'Index', 'M', 'value']:
                if guess in df.columns:
                    value_col = guess
                    break
        if value_col is None or value_col not in df.columns:
            raise ValueError(f"Specify value_col (available: {df.columns.tolist()})")
        s = df[value_col].astype(float)
        s.name = name
        return s
    raise TypeError("pfel/buoy must be a pandas Series or DataFrame.")

def regression_sill_vs_indices(
    cube,
    sill_along=0.0,
    sill_depth=130.0,
    pfel=None, pfel_value_col=None,
    buoy=None, buoy_value_col=None,
    daily=True,           # average to daily means
    rolling='15D',        # None or e.g. '15D'
    print_preview=True
):
    """
    Extract T/S/O2 at the sill from an xarray cube and regress vs PFEL and C46204 indices.

    Returns a DataFrame with: Variable, Index, n, slope, intercept, R2, p, stderr
    """

    # ---- choose variable names present in your cube ----
    v_temp = _pick_var(cube, ['potential_temperature', 'temperature'])
    v_salt = _pick_var(cube, ['salinity'])
    v_oxy  = _pick_var(cube, ['oxygen_concentration', 'dissolved_oxygen_ml_l',
                              'oxygen', 'o2'])

    if v_temp is None or v_salt is None:
        raise ValueError(f"Could not find required variables. "
                         f"Found temp={v_temp}, salinity={v_salt}, oxygen={v_oxy}")

    # ---- select sill slice (nearest along, then nearest depth) ----
    # Variables are (transect, depth, along); time is (transect, along)
    sill_along_val = float(sill_along)
    ds_sill_along = cube.sel(along=sill_along_val, method='nearest')

    temp_prof = ds_sill_along[v_temp]      # (transect, depth)
    salt_prof = ds_sill_along[v_salt]
    oxy_prof  = ds_sill_along[v_oxy] if v_oxy is not None else None

    temp_sel = temp_prof.sel(depth=sill_depth, method='nearest')  # (transect,)
    salt_sel = salt_prof.sel(depth=sill_depth, method='nearest')  # (transect,)
    oxy_sel  = oxy_prof.sel(depth=sill_depth, method='nearest') if oxy_prof is not None else None

    # times from time(transect, along) at the same along location
    tvec = ds_sill_along['time'].values  # (transect,)
    times = pd.to_datetime(tvec)

    # ---- build DataFrame of sill properties ----
    data = {
        'time': times,
        'temp': np.asarray(temp_sel.values, dtype=float),
        'salinity': np.asarray(salt_sel.values, dtype=float),
    }
    if oxy_sel is not None:
        data['oxygen'] = np.asarray(oxy_sel.values, dtype=float)

    df = pd.DataFrame(data).set_index('time').sort_index()

    # optional daily averaging (PFEL & buoy are daily)
    if daily:
        df = df.resample('D').mean()

    # optional rolling mean
    if rolling:
        df = df.rolling(rolling, min_periods=1).mean()

    # ---- normalize external indices to Series ----
    series_list = []
    if pfel is not None:
        s_pfel = _to_named_series(pfel, value_col=pfel_value_col, name='PFEL')
        if rolling:
            s_pfel = s_pfel.rolling(rolling, min_periods=1).mean()
        series_list.append(s_pfel)
    if buoy is not None:
        s_buoy = _to_named_series(buoy, value_col=buoy_value_col, name='C46204')
        if rolling:
            s_buoy = s_buoy.rolling(rolling, min_periods=1).mean()
        series_list.append(s_buoy)

    # ---- regressions ----
    results = []
    for s in series_list:
        merged = df.join(s.rename('index'), how='inner')
        for var in ['temp', 'oxygen', 'salinity']:
            if var not in merged.columns:
                continue
            x = merged['index'].to_numpy()
            y = merged[var].to_numpy()
            m = np.isfinite(x) & np.isfinite(y)
            if m.sum() < 3:
                continue
            slope, intercept, r, p, stderr = stats.linregress(x[m], y[m])
            results.append({
                'Variable': var,
                'Index': s.name,
                'n': int(m.sum()),
                'slope': slope,
                'intercept': intercept,
                'R2': r**2,
                'p': p,
                'stderr': stderr
            })

    if not results:
        raise RuntimeError("No valid regressions computed. "
                           "Check variable names, time alignment, or data coverage.")

    out = pd.DataFrame(results).sort_values(['Variable', 'Index']).reset_index(drop=True)
    if print_preview:
        print(out.to_string(index=False))
    return out

# pfel_daily: columns ['DATE','PFEL']
# buoy_daily: columns ['DATE','C46204']

results = regression_sill_vs_indices(
    cube=new_cube,
    sill_along=0,                # your sill along-location (m)
    sill_depth=130,              # target depth (m), nearest used
    pfel=pfel_daily,  pfel_value_col='PFEL',
    buoy=buoy_daily,  buoy_value_col='C46204',
    daily=True,                  # average glider to daily to match indices
    rolling='15D'                # apply same 15-day smoothing to both sides
)

In [None]:
new_cube

# i got a blank space babyyyy and i'll write your name #

In [None]:
import numpy as np
np.max(ds['WSPD']) * 3.6

In [None]:
%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

# Load the data
file_path = '~/Desktop/Summer 2025 Python/c46204.csv'
ds = pd.read_csv(file_path)

# Parse datetime
ds['DATE'] = pd.to_datetime(ds['DATE'], format='%m/%d/%Y %H:%M')
ds = ds[(ds['DATE'] >= '2023-01-01') & (ds['DATE'] <= '2024-01-01')]

# Convert wind columns to numeric
ds['WDIR'] = pd.to_numeric(ds['WDIR'], errors='coerce')
ds['WSPD'] = pd.to_numeric(ds['WSPD'], errors='coerce')

# Set datetime index
ds.set_index('DATE', inplace=True)

# Keep only numeric columns we want to resample
daily = ds[['WSPD', 'WDIR']].resample('1D').mean()
monthly = ds[['WSPD', 'WDIR']].resample('1M').mean()
weekly = ds[['WSPD', 'WDIR']].resample('1W').mean()

# Plot daily wind speed
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(daily.index, daily['WSPD'], width=0.8, color='blue')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('Daily Mean Wind Speed from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

# Plot daily wind direction
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(daily.index, daily['WDIR'], width=0.8, color='orange')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('Daily Mean Wind Direction from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

# Plot montly wind speed
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(monthly.index, monthly['WSPD'], color='blue')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('Monthly Mean Wind Speed from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

# Plot monthly wind direction
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(monthly.index, monthly['WDIR'], color='orange')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('Monthly Mean Wind Direction from C46204')
ax.set_xlabel('Date')
from matplotlib.dates import MonthLocator, DateFormatter
ax.xaxis.set_major_locator(MonthLocator(bymonth=[1, 7]))  # Jan, May, Sep
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

# Plot weekly wind speed
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(weekly.index, weekly['WSPD'], color='blue')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('Weekly Mean Wind Speed from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

# Plot weekly wind direction
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(weekly.index, weekly['WDIR'], color='orange')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('Weekly Mean Wind Direction from C46204')
ax.set_xlabel('Date')
from matplotlib.dates import MonthLocator, DateFormatter
ax.xaxis.set_major_locator(MonthLocator(bymonth=[1, 7]))  # Jan, May, Sep
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import MonthLocator, DateFormatter

# Plot weekly wind speed as a bar plot
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(weekly.index, weekly['WSPD'], width=6, color='blue', align='center')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('Weekly Mean Wind Speed from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b %d\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

# Plot weekly wind direction as a bar plot
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(weekly.index, weekly['WDIR'], width=6, color='orange', align='center')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('Weekly Mean Wind Direction from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_locator(MonthLocator(bymonth=[1, 7]))  # Jan, July ticks
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

In [None]:
# Compute 7-day rolling means
rolling = ds[['WSPD', 'WDIR']].rolling('7D').mean()

# Wind Speed: bar plot of 7-day rolling mean
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(rolling.index, rolling['WSPD'], width=1, color='blue', align='center')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('7-Day Rolling Mean Wind Speed from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

# Wind Direction: bar plot of 7-day rolling mean
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(rolling.index, rolling['WDIR'], width=1, color='orange', align='center')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('7-Day Rolling Mean Wind Direction from C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

# Step 1: 7-day rolling mean (daily resolution)
rolling = ds[['WSPD', 'WDIR']].rolling('7D').mean()

# Step 2: Resample the rolling means to 1-week frequency
weekly_from_rolling = rolling.resample('1W').mean()

# Step 3a: Plot weekly wind speed (smoothed)
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(weekly_from_rolling.index, weekly_from_rolling['WSPD'], width=6, color='blue')
ax.set_ylabel('Wind Speed (m/s)', color='blue')
ax.set_title('Weekly Mean Wind Speed (from 7-Day Rolling) – C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

# Step 3b: Plot weekly wind direction (smoothed)
fig, ax = plt.subplots(figsize=(12, 5))
ax.bar(weekly_from_rolling.index, weekly_from_rolling['WDIR'], width=6, color='orange')
ax.set_ylabel('Wind Direction (°)', color='orange')
ax.set_title('Weekly Mean Wind Direction (from 7-Day Rolling) – C46204')
ax.set_xlabel('Date')
ax.xaxis.set_major_formatter(DateFormatter('%b\n%Y'))
ax.tick_params(axis='x', rotation=45)
fig.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load and parse
file_path = '~/Desktop/Summer 2025 Python/c46204.csv'
ds = pd.read_csv(file_path)
ds['DATE'] = pd.to_datetime(ds['DATE'], format='%m/%d/%Y %H:%M')
ds = ds[ds['DATE'] >= '2020-01-01']

# Convert wind direction and speed to numeric
ds['WDIR'] = pd.to_numeric(ds['WDIR'], errors='coerce')
ds['WSPD'] = pd.to_numeric(ds['WSPD'], errors='coerce')

# Classify wind direction into regimes
def classify_wind_dir(wdir):
    if pd.isna(wdir):
        return 'Other'
    elif 300 <= wdir <= 340:
        return 'Upwelling'
    elif 120 <= wdir <= 160:
        return 'Downwelling'
    else:
        return 'Other'

ds['WindCategory'] = ds['WDIR'].apply(classify_wind_dir)

# Set datetime index
ds.set_index('DATE', inplace=True)

# Count days per week by category
weekly_counts = ds.groupby([pd.Grouper(freq='1W'), 'WindCategory']).size().unstack(fill_value=0)

# Plot stacked bar of Upwelling vs Downwelling counts
fig, ax = plt.subplots(figsize=(14, 5))
weekly_counts[['Upwelling', 'Downwelling']].plot(
    kind='bar',
    stacked=True,
    ax=ax,
    color=['blue', 'orange'],
    width=0.8
)

ax.set_title('Weekly Counts of Upwelling vs Downwelling-Favorable Winds')
ax.set_ylabel('Hourly Observations per Week')
ax.set_xlabel('Week')
ax.legend(title='Wind Regime')
ax.set_xticks(range(0, len(weekly_counts), 4))
ax.set_xticklabels(
    [d.strftime('%b\n%Y') for d in weekly_counts.index[::4]],
    rotation=45
)
plt.tight_layout()
plt.show()