In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

# Set the data directory
BASE_DIR = Path("/data/wang/Result_Data/alldoxy")  # Base directory

# Define target depth levels
TARGET_DEPTHS = [
    1,10,20,30,40,50,60,70,80,90,100,
    110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,
    270,280,290,300,310,320,330,340,350,360,370,380,390,400,410,420,
    430,440,450,460,470,480,490,500,510,520,530,540,550,560,570,580,
    590,600,610,620,630,640,650,660,670,680,690,700,710,720,730,740,
    750,760,770,780,790,800,820,840,860,880,900,920,940,960,980,1000,
    1020,1040,1060,1080,1100,1120,1140,1160,1180,1200,1220,1240,1260,
    1280,1300,1320,1340,1360,1380,1400,1420,1440,1460,1480,1500,1520,
    1540,1560,1580,1600,1620,1640,1660,1680,1700,1720,1740,1760,1780,
    1800,1820,1840,1860,1880,1900,1920,1940,1960,1980,2000,2100,2200,
    2300,2400,2500,2600,2700,2800,2900,3000,3100,3200,3300,3400,3500,
    3600,3700,3800,3900,4000,4100,4200,4300,4400,4500,4600,4700,4800,4900,
    5000,5100,5200,5300,5400,5500
]  # Target depth levels

# Compute days_since_start and related temporal encodings
def calculate_temporal_encodings(df):
    # Compute days_since_start
    base_date = datetime(1950, 1, 1)
    df["Date"] = pd.to_datetime(df["Date"])
    df["days_since_start"] = (df["Date"] - base_date).dt.days

    # Compute year_norm
    df["year_norm"] = df["days_since_start"] / 365.25

    # Compute month_sin and month_cos
    df["month_sin"] = np.sin(2 * np.pi * (df["Month"] - 1) / 12)
    df["month_cos"] = np.cos(2 * np.pi * (df["Month"] - 1) / 12)

    # Compute month_sin (m-4) and month_cos (m-4)
    df["Month_sin"] = np.sin(2 * np.pi * (df["Month"] - 7) / 12)
    df["Month_cos"] = np.cos(2 * np.pi * (df["Month"] - 4) / 12)

    # Compute month_index
    df["month_index"] = (df["Date"].dt.year - 1950) * 12 + df["Date"].dt.month

    # Round to 3 decimals
    df["year_norm"] = df["year_norm"].round(3)
    df["month_sin"] = df["month_sin"].round(3)
    df["month_cos"] = df["month_cos"].round(3)
    df["Month_sin"] = df["Month_sin"].round(3)
    df["Month_cos"] = df["Month_cos"].round(3)
    df["month_index"] = df["month_index"].round(3)

    return df

# Compute longitude encodings lon_cos20 and lon_sin110
def calculate_lon_encodings(df):
    df["lon_cos20"] = np.cos(np.deg2rad(df["Longitude"] - 20))
    df["lon_sin110"] = np.sin(np.deg2rad(df["Longitude"] - 110))

    # Round to 3 decimals
    df["lon_cos20"] = df["lon_cos20"].round(3)
    df["lon_sin110"] = df["lon_sin110"].round(3)

    return df

# Batch-process all *_TRAIN.csv files under each target depth folder
def process_csv_files():
    for depth_folder in TARGET_DEPTHS:
        folder_path = BASE_DIR / f"{depth_folder}dbar"
        csv_files = list(folder_path.glob("*_TRAIN.csv"))  # Find all TRAIN CSV files in this depth folder

        for csv_file in csv_files:
            print(f"Processing file: {csv_file}")
            # Read the CSV file
            df = pd.read_csv(csv_file)

            # Add temporal encodings
            df = calculate_temporal_encodings(df)

            # Add longitude encodings
            df = calculate_lon_encodings(df)

            # Overwrite the original file
            df.to_csv(csv_file, index=False)
            print(f"Updated and saved file: {csv_file}")

# Run the processing
process_csv_files()