In [38]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
import os
from pathlib import Path

import lightning.pytorch as pl
import torch
import wandb
from sdofm import utils
from sdofm.datasets import SDOMLDataModule, DegradedSDOMLDataModule
from sdofm.pretraining import MAE, SAMAE
from sdofm.finetuning import VirtualEVE

In [40]:
import omegaconf

cfg = omegaconf.OmegaConf.load("../experiments/ablation_virtualeve.yaml")

In [41]:
data_module = SDOMLDataModule(
    hmi_path=(
        os.path.join(
            cfg.data.sdoml.base_directory,
            cfg.data.sdoml.sub_directory.hmi,
        )
        if cfg.data.sdoml.sub_directory.hmi
        else None
    ),
    aia_path=(
        os.path.join(
            cfg.data.sdoml.base_directory,
            cfg.data.sdoml.sub_directory.aia,
        )
        if cfg.data.sdoml.sub_directory.aia
        else None
    ),
    eve_path=os.path.join(
        cfg.data.sdoml.base_directory, cfg.data.sdoml.sub_directory.eve
    ),
    # eve_path=None,
    components=cfg.data.sdoml.components,
    wavelengths=cfg.data.sdoml.wavelengths,
    ions=cfg.data.sdoml.ions,
    frequency=cfg.data.sdoml.frequency,
    batch_size=cfg.model.opt.batch_size,
    num_workers=cfg.data.num_workers,
    val_months=cfg.data.month_splits.val,
    test_months=cfg.data.month_splits.test,
    holdout_months=cfg.data.month_splits.holdout,
    cache_dir=os.path.join(
        cfg.data.sdoml.base_directory,
        cfg.data.sdoml.sub_directory.cache,
    ),
    min_date=cfg.data.min_date,
    max_date=cfg.data.max_date,
    num_frames=cfg.data.num_frames,
    drop_frame_dim=cfg.data.drop_frame_dim,
)
data_module.setup()

[* CACHE SYSTEM *] Found cached index data in /mnt/sdoml/cache/aligndata_HMI_FULL_AIA_FULL_EVE_FULL_12min.csv.
[* CACHE SYSTEM *] Found cached normalization data in /mnt/sdoml/cache/normalizations_HMI_FULL_AIA_FULL_EVE_FULL_12min.json.
[* CACHE SYSTEM *] Found cached HMI mask data in /mnt/sdoml/cache/hmi_mask_512x512.npy.


In [42]:
x, y = next(iter(data_module.train_ds))

In [34]:
import numpy as np

_eve_norm = np.array(data_module.normalizations["EVE"]["eve_norm"], dtype=np.float32)
eve_norm = torch.Tensor(_eve_norm).float()
norm_mean = eve_norm[0]
norm_stdev = eve_norm[1]
y = torch.Tensor(y.reshape(-1))
y = y * norm_stdev[None] + norm_mean[None]

In [43]:
y.shape

(38,)

In [24]:
norm_stdev[None]

tensor([[9.1819e-06, 5.3562e-06, 3.7777e-07, 4.8048e-06, 8.2602e-06, 8.8243e-06,
         9.0311e-06, 8.6722e-06, 1.4570e-07, 2.2082e-05, 1.4295e-05, 1.4213e-06,
         8.0152e-07, 2.2668e-07, 2.0717e-07, 1.2205e-06, 1.9483e-06, 1.0076e-05,
         6.4953e-07, 5.4268e-06, 4.4292e-05, 8.1017e-06, 4.0567e-06, 3.1560e-06,
         2.4076e-06, 1.4459e-06, 1.5991e-06, 1.9169e-07, 1.1263e-07, 1.8147e-07,
         2.1096e-06, 1.1073e-06, 9.9623e-07, 3.4215e-06, 5.8534e-06, 5.4504e-07,
         4.4998e-06, 2.1494e-06]])

In [7]:
data_module.eve_data.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, icon='table', name='C III (2137380,) floa…

In [9]:
data_module.eve_data["Time"][:-10]

array(['2010-05-01 00:00:10.484', '2010-05-01 00:01:10.484',
       '2010-05-01 00:02:10.484', ..., '2014-05-26 23:47:09.852',
       '2014-05-26 23:48:09.853', '2014-05-26 23:49:09.853'], dtype='<U23')

In [12]:
data_module.aligndata

Unnamed: 0_level_0,idx_131A,idx_1600A,idx_1700A,idx_171A,idx_193A,idx_211A,idx_304A,idx_335A,idx_94A,idx_Bx,idx_By,idx_Bz
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2010-05-13 00:00:00,8937,8942,8085,8934,8945,8933,8937,8945,8691,19726,19726,19726
2010-05-13 00:12:00,8938,8943,8084,8935,8946,8934,8938,8946,8692,19729,19729,19729
2010-05-13 00:24:00,8940,8946,8086,8936,8947,8935,8939,8947,8694,19735,19735,19735
2010-05-13 00:36:00,8942,8948,8089,8939,8950,8938,8942,8950,8696,19710,19710,19710
2010-05-13 00:48:00,8944,8950,8090,8941,8952,8940,8944,8952,8698,19695,19695,19695
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-02 22:00:00,28302,28205,25586,28299,28306,28291,28288,28296,28301,13309,13309,13309
2023-08-02 22:12:00,28299,28203,25566,28297,28305,28296,28293,28294,28302,13356,13356,13356
2023-08-02 22:24:00,28294,28206,25565,28294,28304,28290,28290,28299,28300,13329,13329,13329
2023-08-02 22:36:00,28295,28204,25564,28293,28310,28294,28291,28293,28309,13351,13351,13351


In [6]:
eve_data

NameError: name 'eve_data' is not defined

In [8]:
eve_data = data_module.eve_data

In [9]:
eve_data

<zarr.hierarchy.Group '/'>

In [10]:
import pandas as pd
import numpy as np

df_t_eve = pd.DataFrame(
    {
        "Time": pd.to_datetime(data_module.eve_data["Time"][:]),
        "idx_eve": np.arange(0, len(data_module.eve_data["Time"])),
    }
)
df_t_eve["Time"] = pd.to_datetime(df_t_eve["Time"]).dt.round("12min")
df_t_obs_eve = df_t_eve.drop_duplicates(subset="Time", keep="first").set_index("Time")

In [38]:
join_series = data_module.aligndata

In [40]:
join_series = join_series.join(df_t_obs_eve, how="inner")

In [76]:
join_series

Unnamed: 0_level_0,idx_131A,idx_1600A,idx_1700A,idx_171A,idx_193A,idx_211A,idx_304A,idx_335A,idx_94A,idx_Bx,idx_By,idx_Bz,idx_eve
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-05-13 00:00:00,8937,8942,8085,8934,8945,8933,8937,8945,8691,19726,19726,19726,17274
2010-05-13 00:12:00,8938,8943,8084,8935,8946,8934,8938,8946,8692,19729,19729,19729,17286
2010-05-13 00:24:00,8940,8946,8086,8936,8947,8935,8939,8947,8694,19735,19735,19735,17298
2010-05-13 00:36:00,8942,8948,8089,8939,8950,8938,8942,8950,8696,19710,19710,19710,17310
2010-05-13 00:48:00,8944,8950,8090,8941,8952,8940,8944,8952,8698,19695,19695,19695,17322
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-05-26 23:00:00,9220,9222,8324,9208,9218,9220,9218,9217,9221,37135,37135,37135,2137314
2014-05-26 23:12:00,9222,9224,8325,9210,9220,9222,9220,9219,9223,37048,37048,37048,2137326
2014-05-26 23:24:00,9224,9226,8327,9212,9222,9224,9222,9221,9225,37057,37057,37057,2137338
2014-05-26 23:36:00,9226,9228,8329,9214,9224,9226,9224,9223,9227,37118,37118,37118,2137350


In [77]:
from sdofm.constants import ALL_IONS

available_mask = np.array([True] * join_series.shape[0])
for ion in ALL_IONS:
    ion_data = eve_data[ion]
    if available_mask is None:
        available_mask = (ion_data[join_series["idx_eve"]] > 0).astype(bool)
    else:
        available_mask = available_mask & (ion_data[join_series["idx_eve"]] > 0).astype(
            bool
        )

    # join_series = join_series.loc[ion_data[join_series["idx_eve"]] > 0]
# join_series

In [87]:
ion_data[join_series["idx_eve"]] > 0

array([ True,  True,  True, ..., False, False, False])

In [78]:
np.sum(available_mask)

16

In [83]:
type(ion_data[join_series["idx_eve"]] > 0)

numpy.ndarray

In [81]:
np.array([True, True, True]) & np.array([True, False, True])

array([ True, False,  True])

In [71]:
ion_data[join_series["idx_eve"]] > 0

array([ True,  True,  True, ..., False, False, False])

In [63]:
35299 / 142116

0.2483816037603085

In [62]:
available_mask.shape

(144766,)

In [84]:
for ion in ALL_IONS:
    check = ion_data[join_series["idx_eve"]] > 0
    print(check.shape, np.sum(check), (ion_data[join_series["idx_eve"]] > 0)[:20])

(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
(144766,) 35299 [ True  Tru

In [35]:
eve_data[ion][2136594]

0.00015577368

In [27]:
ion_data

array([ 1.3445999e-04,  1.3361037e-04,  1.3478851e-04, ...,
       -1.0000000e+00, -1.0000000e+00, -1.0000000e+00], dtype=float32)

In [28]:
sum(ion_data > 0)

359262

In [21]:
df_t_obs_eve

Unnamed: 0_level_0,idx_eve
Time,Unnamed: 1_level_1
2010-05-01 00:00:00,0
2010-05-01 00:12:00,6
2010-05-01 00:24:00,18
2010-05-01 00:36:00,30
2010-05-01 00:48:00,42
...,...
2014-05-26 23:12:00,2137326
2014-05-26 23:24:00,2137338
2014-05-26 23:36:00,2137350
2014-05-26 23:48:00,2137362


In [11]:
data_module.training_years

[2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020,
 2021,
 2022,
 2023]

In [30]:
data_module.aligndata.shape

(16, 13)