In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import sys
import os
from torch.utils.data import DataLoader
import pandas as pd

  import pynvml  # type: ignore[import]


In [2]:
# compactly add project src and analysis/zero-shot to sys.path if not already present
for rel in ('src', 'analysis/finetuning', 'analysis/forecasting'):
    p = os.path.abspath(os.path.join(os.getcwd(), rel))
    if p not in sys.path:
        sys.path.append(p)

# now imports that rely on those paths
from utils import SequentialDeepONetDataset
from helper import load_model_experiment, convert2dim, fit, compute_metrics_region, plot_field_region
from forecasting_analysis import create_windows_forecasting_with_index
from finetune import create_contiguous_adaptation_set, create_eval_set_after_contiguous_adaptation, freeze_for_new_station_adaptation, expand_lstm_input_dim_correct, mask_new_station, fine_tune_adapt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
# original input sensor data: neutron monitor data 
input_sensor = np.load('data/neutron_data_22yrs.npy')

# location
trunk = np.load('data/grid_points_025.npy')

# Normalize trunk input
trunk[:, 0] = (trunk[:, 0] - np.min(trunk[:, 0])) / (np.max(trunk[:, 0]) - np.min(trunk[:, 0]))
trunk[:, 1] = (trunk[:, 1] - np.min(trunk[:, 1])) / (np.max(trunk[:, 1]) - np.min(trunk[:, 1]))

In [4]:
# from pathlib import Path
# 
# base_dir = Path("data/DoseNumpy025/10m")
# 
# years = range(2001, 2024)  # 2001â€“2023 inclusive
# 
# arrays = []
# for year in years:
#     print(f"Loading data for year: {year}")
#     fname = base_dir / f"dose_{year}_10m.npy"
#     arr = np.load(fname)          # shape e.g. (T_year, H, W) or (T_year, N_points)
#     arrays.append(arr)
# 
# # Concatenate along time axis (axis=0)
# dose_all = np.concatenate(arrays, axis=0)
# 
# print("Per-year shape:", arrays[0].shape)
# print("Combined shape:", dose_all.shape)

In [5]:
output = np.load('data/DoseNumpy025/dose_2001_2023_10m.npy')

In [6]:
dates = pd.date_range("2001-01-01", "2023-12-31", freq="D")

W, H = 30, 0
X_all, y_all, tgt_idx = create_windows_forecasting_with_index(input_sensor, output, W, H)
tgt_dates = dates[tgt_idx]

train_mask = (tgt_dates <= pd.Timestamp("2021-12-31"))
val_mask   = (tgt_dates >= pd.Timestamp("2022-01-01")) & (tgt_dates <= pd.Timestamp("2022-12-31"))
test_mask  = (tgt_dates >= pd.Timestamp("2023-01-01")) & (tgt_dates <= pd.Timestamp("2023-12-31"))

X_train, y_train = X_all[train_mask], y_all[train_mask]
X_val,   y_val   = X_all[val_mask],   y_all[val_mask]
X_test,  y_test  = X_all[test_mask],  y_all[test_mask]

# check shapes
print("Train set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)

# remove unused variables to free memory
del output

Train set: torch.Size([7641, 30, 12]) torch.Size([7641, 1038961])
Validation set: torch.Size([365, 30, 12]) torch.Size([365, 1038961])
Test set: torch.Size([365, 30, 12]) torch.Size([365, 1038961])


In [7]:
scaler_input = MinMaxScaler()
X_train_scaled = scaler_input.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_val_scaled   = scaler_input.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
X_test_scaled  = scaler_input.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

In [8]:
# Fit on ALL training pixels (flattened)
scaler_target = MinMaxScaler()

# Transform sets
y_train_scaled = scaler_target.fit_transform(y_train)[..., np.newaxis]
y_val_scaled   = scaler_target.transform(y_val)[..., np.newaxis]
y_test_scaled  = scaler_target.transform(y_test)[..., np.newaxis]

: 

: 

: 