In [1]:
# General imports
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, Iterable, Any
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

plt.rcParams["figure.figsize"] = (18, 10)
plt.rcParams["figure.facecolor"] = "white"

# ML imports
import torch
from torch.utils import data
from torch import nn
import torch.nn.functional as F
import einops
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
import wandb
import sklearn.manifold

# power_perceiver imports
from power_perceiver.dataset import NowcastingDataset
from power_perceiver.consts import BatchKey
from power_perceiver.data_loader import HRVSatellite, PV, Sun
from power_perceiver.xr_batch_processor import SelectPVSystemsNearCenterOfImage, ReduceNumPVSystems, ReduceNumTimesteps
from power_perceiver.np_batch_processor import EncodeSpaceTime, Topography
from power_perceiver.transforms.satellite import PatchSatellite
from power_perceiver.transforms.pv import PVPowerRollingWindow

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#DATA_PATH = Path("~/dev/ocf/power_perceiver/data_for_testing/").expanduser()

DATA_PATH = Path(
    "/mnt/storage_ssd_4tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v15/")
assert DATA_PATH.exists()

In [3]:
def get_dataloader(data_path: Path, tag: str) -> data.DataLoader:
    assert tag in ["train", "validation"]
    assert data_path.exists()
    
    xr_batch_processors = [
        SelectPVSystemsNearCenterOfImage(),
        ReduceNumPVSystems(requested_num_pv_systems=8),
        ]
    
    if tag == "train":
        xr_batch_processors.append(ReduceNumTimesteps(requested_timesteps=4))
    
    dataset = NowcastingDataset(
        data_path=data_path,
        data_loaders=[
            HRVSatellite(
                transforms=[PatchSatellite()]
                ), 
            PV(
                transforms=[PVPowerRollingWindow()]
                ),
            Sun(),
        ],
        xr_batch_processors=xr_batch_processors,
        np_batch_processors=[
            EncodeSpaceTime(),
            Topography("/home/jack/europe_dem_2km_osgb.tif"),
            ]
    )

    dataloader = data.DataLoader(
        dataset,
        batch_size=None,
        num_workers=16,
        pin_memory=True,
    )
    
    return dataloader

#train_dataloader = get_dataloader(DATA_PATH)
train_dataloader = get_dataloader(DATA_PATH / "train", tag="train")
val_dataloader = get_dataloader(DATA_PATH / "test", tag="validation")

  return self._crs.to_proj4(version=version)
  return self._crs.to_proj4(version=version)
  return self._crs.to_proj4(version=version)
  return self._crs.to_proj4(version=version)


In [4]:
for batch in train_dataloader:
    break

In [5]:
batch[BatchKey.pv].shape

torch.Size([31, 4, 8])

In [6]:
batch[BatchKey.pv_time_utc].shape

torch.Size([31, 4])

In [7]:
batch[BatchKey.pv].shape

torch.Size([31, 4, 8])

In [8]:
from power_perceiver.pytorch_modules.satellite_processor import HRVSatelliteProcessor
from power_perceiver.pytorch_modules.query_generator import QueryGenerator
from power_perceiver.pytorch_modules.self_attention import PerceiverIO


@dataclass(eq=False)  # See https://discuss.pytorch.org/t/typeerror-unhashable-type-for-my-torch-nn-module/109424/6
class Model(pl.LightningModule):
    encoder_query_dim: int = 64
    num_encoder_query_elements: int = 64
    decoder_query_dim: int = 36  # decoder_query will be automatically padded with zeros to get to this size.
    num_fourier_features: int = 16 # TOTAL for both x and y
    pv_system_id_embedding_dim: int = 16
    byte_array_dim: int = 35
    num_encoder_heads: int = 8
    num_decoder_heads: int = 6
    dropout: float = 0.0
    share_weights_across_latent_transformer_layers: bool = False
    num_latent_transformer_encoders: int = 4
    
    # Other params:
    num_elements_query_padding: int = 0  # Probably keep this at zero while using MultiLayerTransformerEncoder or Perceiver IO

    def __post_init__(self):
        super().__init__()
        self.hrvsatellite_processor = HRVSatelliteProcessor()
        
        self.encoder_query = nn.Parameter(
            torch.randn(self.num_encoder_query_elements, self.encoder_query_dim) / 5)
        
        self.decoder_query_generator = QueryGenerator(
            num_fourier_features=self.num_fourier_features,  # TOTAL (for both x and y)
            pv_system_id_embedding_dim=self.pv_system_id_embedding_dim,
            num_elements_query_padding=self.num_elements_query_padding)
        
        self.perceiver_io = PerceiverIO(
            encoder_query_dim=self.encoder_query_dim,
            decoder_query_dim=self.decoder_query_dim,
            byte_array_dim=self.byte_array_dim,
            num_encoder_heads=self.num_encoder_heads,
            num_decoder_heads=self.num_decoder_heads,
            dropout=self.dropout,
            share_weights_across_latent_transformer_layers=self.share_weights_across_latent_transformer_layers,
            num_latent_transformer_encoders=self.num_latent_transformer_encoders,
            )

        self.output_module = nn.Sequential(
            nn.Linear(in_features=self.decoder_query_dim, out_features=self.decoder_query_dim),
            nn.ReLU(),
            nn.Linear(in_features=self.decoder_query_dim, out_features=1),
        )

        # Do this at the end of __post_init__ to capture model topology to wandb:
        self.save_hyperparameters()
        
    def forward(self, x: dict[BatchKey, torch.Tensor]) -> torch.Tensor:       
        original_batch_size = x[BatchKey.pv].shape[0]
        byte_array = self.hrvsatellite_processor(x)
        
        # Get decoder query
        decoder_query = self.decoder_query_generator(x)
        # Pad with zeros if necessary to get up to self.decoder_query_dim:
        decoder_query = self._maybe_pad_with_zeros(decoder_query)            
        
        # Repeat encoder query for each example in the batch:
        encoder_query = einops.repeat(
            self.encoder_query, 
            "element feature -> example element feature",
            example=byte_array.shape[0],
            )
        
        # Run through the Perceiver IO:
        out = self.perceiver_io(
            encoder_query=encoder_query,
            byte_array=byte_array,
            decoder_query=decoder_query,
            )
        
        out = self.output_module(out)
        
        # Reshape back to (batch_size, n_timesteps, ...)
        return einops.rearrange(
            out, 
            "(batch_size n_timesteps) ... -> batch_size n_timesteps ...", 
            batch_size=original_batch_size)
        
    def _maybe_pad_with_zeros(self, tensor: torch.Tensor) -> torch.Tensor:
        num_zeros_to_pad = self.decoder_query_dim - tensor.shape[-1]        
        assert num_zeros_to_pad >= 0, f"{self.query_dim=}, {tensor.shape=}"
        if num_zeros_to_pad > 0:
            zero_padding_shape = tensor.shape[:2] + (num_zeros_to_pad,)
            zero_padding = torch.zeros(*zero_padding_shape, dtype=tensor.dtype, device=tensor.device)
            tensor = torch.concat((tensor, zero_padding), dim=2)
        return tensor
    
    def _training_or_validation_step(
            self, 
            batch: dict[BatchKey, torch.Tensor], 
            batch_idx: int, 
            tag: str
        ) -> dict[str, object]:
        """
        Args:
            batch: The training or validation batch.  A dictionary.
            tag: Either "train" or "validation"
            batch_idx: The index of the batch.
        """
        actual_pv_power = batch[BatchKey.pv]
        #actual_pv_power = torch.nan_to_num(actual_pv_power, nan=0.0)
        actual_pv_power = torch.where(
            batch[BatchKey.pv_mask].unsqueeze(1), 
            actual_pv_power, 
            torch.tensor(0.0, dtype=actual_pv_power.dtype, device=actual_pv_power.device))

        predicted_pv_power = self(batch).squeeze()
        #mse_loss = F.mse_loss(predicted_pv_power, actual_pv_power, reduction="none").mean(dim=1).float()
        #mse_loss = masked_mean(mse_loss, mask=batch[BatchKey.pv_mask])        
        mse_loss = F.mse_loss(predicted_pv_power, actual_pv_power)
        
        self.log(f"{tag}/mse", mse_loss)
        
        return {
            'loss': mse_loss,
            'predicted_pv_power': predicted_pv_power,
            }
    
    def training_step(self, batch: dict[BatchKey, torch.Tensor], batch_idx: int) -> dict[str, object]:
        return self._training_or_validation_step(batch=batch, batch_idx=batch_idx, tag="train")
    
    def validation_step(self, batch: dict[BatchKey, torch.Tensor], batch_idx: int) -> dict[str, object]:
        return self._training_or_validation_step(batch=batch, batch_idx=batch_idx, tag="validation")
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

In [9]:
model = Model()
#model = Model.load_from_checkpoint(
#    "~/dev/ocf/power_perceiver/notebooks/2022-04-04_train_ML_model/model.ckpt")


In [10]:
model_output = model(batch)

In [11]:
wandb_logger = WandbLogger(
    project="power_perceiver", 
    entity="openclimatefix",
    log_model="all",
    )

# log gradients, parameter histogram and model topology
wandb_logger.watch(model, log="all")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mopenclimatefix[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.14 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [12]:
from power_perceiver.analysis.plot_timeseries import LogTimeseriesPlots
from power_perceiver.analysis.plot_tsne import LogTSNEPlot

trainer = pl.Trainer(
    gpus=[3],
    max_epochs=-1,
    logger=wandb_logger,
    callbacks=[
        LogTimeseriesPlots(),
        LogTSNEPlot(),
        ]
    )

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [13]:
trainer.fit(
    model=model, 
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    )

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5]

  | Name                    | Type                  | Params
------------------------------------------------------------------
0 | hrvsatellite_processor  | HRVSatelliteProcessor | 0     
1 | decoder_query_generator | QueryGenerator        | 32.0 K
2 | perceiver_io            | PerceiverIO           | 1.1 M 
3 | output_module           | Sequential            | 1.4 K 
------------------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.729     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|                                                             | 0/2 [00:01<?, ?it/s]



Epoch 0:   0%|                                                                               | 0/8800 [00:00<?, ?it/s]



Epoch 0:  95%|█████████████████████████████████████████  | 8400/8800 [03:18<00:09, 42.42it/s, loss=0.0228, v_num=jcp6]



Epoch 1: 100%|███████████████████████████████████████████| 8800/8800 [03:35<00:00, 40.89it/s, loss=0.0228, v_num=jcp6]



Epoch 1:  95%|█████████████████████████████████████████  | 8400/8800 [06:58<00:19, 20.08it/s, loss=0.0187, v_num=jcp6]



Epoch 2: 100%|███████████████████████████████████████████| 8800/8800 [07:15<00:00, 20.22it/s, loss=0.0187, v_num=jcp6]



Epoch 2:  95%|█████████████████████████████████████████  | 8400/8800 [10:43<00:30, 13.05it/s, loss=0.0163, v_num=jcp6]



Epoch 3: 100%|███████████████████████████████████████████| 8800/8800 [11:00<00:00, 13.32it/s, loss=0.0163, v_num=jcp6]



Epoch 3:  95%|█████████████████████████████████████████  | 8400/8800 [14:17<00:40,  9.80it/s, loss=0.0154, v_num=jcp6]



Epoch 4: 100%|███████████████████████████████████████████| 8800/8800 [14:34<00:00, 10.07it/s, loss=0.0154, v_num=jcp6]



Epoch 4:  10%|████▏                                     | 883/8800 [16:16<2:25:57,  1.11s/it, loss=0.0143, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 4:  10%|████▎                                      | 884/8800 [16:16<2:25:47,  1.11s/it, loss=0.014, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 4:  11%|████▍                                     | 937/8800 [16:18<2:16:48,  1.04s/it, loss=0.0145, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 4:  11%|████▍                                     | 938/8800 [16:18<2:16:39,  1.04s/it, loss=0.0145, v_num=jcp6]

Exception ignored in: 

Epoch 4:  11%|████▍                                     | 938/8800 [16:18<2:16:39,  1.04s/it, loss=0.0147, v_num=jcp6]

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 4:  11%|████▍                                     | 940/8800 [16:18<2:16:19,  1.04s/it, loss=0.0149, v_num=jcp6]

Exception ignored in: Exception ignored in: 

Epoch 4:  11%|████▍                                     | 940/8800 [16:18<2:16:19,  1.04s/it, loss=0.0148, v_num=jcp6]

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280><function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
        self._shutdown_workers()self._shutdown_workers()

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
        if w.is_alive():if w.is_alive():

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
  File "/home/jac

Epoch 4:  21%|████████▊                                | 1889/8800 [16:41<1:01:03,  1.89it/s, loss=0.0138, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 4:  21%|████████▊                                | 1890/8800 [16:41<1:01:01,  1.89it/s, loss=0.0138, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 4:  21%|████████▊                                | 1891/8800 [16:41<1:00:59,  1.89it/s, loss=0.0138, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    

Epoch 4:  22%|████████▊                                | 1892/8800 [16:41<1:00:57,  1.89it/s, loss=0.0138, v_num=jcp6]

self._shutdown_workers()

Epoch 4:  22%|████████▊                                | 1892/8800 [16:41<1:00:57,  1.89it/s, loss=0.0135, v_num=jcp6]


  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
Exception ignored in:     if w.is_alive():<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    assert self._parent_pid == os.getpid(), 'can only test a child process'    
self._shutdown_workers()AssertionError
:   File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
can only test a child process    
if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._paren

Epoch 4:  22%|█████████▌                                 | 1959/8800 [16:43<58:23,  1.95it/s, loss=0.0137, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 4:  95%|█████████████████████████████████████████  | 8400/8800 [19:12<00:54,  7.29it/s, loss=0.0147, v_num=jcp6]



Epoch 5: 100%|███████████████████████████████████████████| 8800/8800 [19:29<00:00,  7.52it/s, loss=0.0147, v_num=jcp6]



Epoch 5:  95%|█████████████████████████████████████████  | 8400/8800 [22:44<01:04,  6.16it/s, loss=0.0143, v_num=jcp6]



Epoch 6: 100%|███████████████████████████████████████████| 8800/8800 [23:01<00:00,  6.37it/s, loss=0.0143, v_num=jcp6]



Epoch 6:  95%|██████████████████████████████████████████  | 8400/8800 [26:24<01:15,  5.30it/s, loss=0.014, v_num=jcp6]



Epoch 7: 100%|████████████████████████████████████████████| 8800/8800 [26:41<00:00,  5.49it/s, loss=0.014, v_num=jcp6]



Epoch 7:  95%|█████████████████████████████████████████  | 8400/8800 [29:57<01:25,  4.67it/s, loss=0.0137, v_num=jcp6]



Epoch 8: 100%|███████████████████████████████████████████| 8800/8800 [30:15<00:00,  4.85it/s, loss=0.0137, v_num=jcp6]



Epoch 8:  95%|█████████████████████████████████████████  | 8400/8800 [33:29<01:35,  4.18it/s, loss=0.0135, v_num=jcp6]



Epoch 9: 100%|███████████████████████████████████████████| 8800/8800 [35:06<00:00,  4.18it/s, loss=0.0135, v_num=jcp6]



Epoch 9:  95%|█████████████████████████████████████████  | 8400/8800 [38:23<01:49,  3.65it/s, loss=0.0132, v_num=jcp6]



Epoch 10: 100%|██████████████████████████████████████████| 8800/8800 [38:39<00:00,  3.79it/s, loss=0.0132, v_num=jcp6]



Epoch 10:  95%|█████████████████████████████████████████  | 8400/8800 [41:55<01:59,  3.34it/s, loss=0.013, v_num=jcp6]



Epoch 11: 100%|███████████████████████████████████████████| 8800/8800 [42:12<00:00,  3.47it/s, loss=0.013, v_num=jcp6]



Epoch 11:  95%|████████████████████████████████████████  | 8400/8800 [45:27<02:09,  3.08it/s, loss=0.0128, v_num=jcp6]



Epoch 12: 100%|██████████████████████████████████████████| 8800/8800 [47:04<00:00,  3.12it/s, loss=0.0128, v_num=jcp6]



Epoch 12:  95%|████████████████████████████████████████  | 8400/8800 [50:31<02:24,  2.77it/s, loss=0.0125, v_num=jcp6]



Epoch 13: 100%|██████████████████████████████████████████| 8800/8800 [50:48<00:00,  2.89it/s, loss=0.0125, v_num=jcp6]



Epoch 13:  95%|████████████████████████████████████████  | 8400/8800 [54:03<02:34,  2.59it/s, loss=0.0123, v_num=jcp6]



Epoch 14: 100%|██████████████████████████████████████████| 8800/8800 [54:20<00:00,  2.70it/s, loss=0.0123, v_num=jcp6]



Epoch 14:  95%|████████████████████████████████████████  | 8400/8800 [58:10<02:46,  2.41it/s, loss=0.0121, v_num=jcp6]



Epoch 15: 100%|██████████████████████████████████████████| 8800/8800 [58:27<00:00,  2.51it/s, loss=0.0121, v_num=jcp6]



Epoch 15:  38%|███████████████▎                        | 3376/8800 [59:56<1:36:18,  1.07s/it, loss=0.0118, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 15:  65%|█████████████████████████▉              | 5693/8800 [1:00:57<33:16,  1.56it/s, loss=0.0103, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 15:  65%|██████████████████████████▏             | 5752/8800 [1:00:59<32:19,  1.57it/s, loss=0.0112, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
Exception ignored in:   File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
<function _MultiProcessi

Epoch 15:  95%|██████████████████████████████████████▏ | 8400/8800 [1:02:11<02:57,  2.25it/s, loss=0.0119, v_num=jcp6]



Epoch 16: 100%|████████████████████████████████████████| 8800/8800 [1:02:28<00:00,  2.35it/s, loss=0.0119, v_num=jcp6]



Epoch 16:  95%|██████████████████████████████████████▏ | 8400/8800 [1:05:53<03:08,  2.12it/s, loss=0.0117, v_num=jcp6]



Epoch 17: 100%|████████████████████████████████████████| 8800/8800 [1:06:10<00:00,  2.22it/s, loss=0.0117, v_num=jcp6]



Epoch 17:  95%|██████████████████████████████████████▏ | 8400/8800 [1:10:01<03:20,  2.00it/s, loss=0.0118, v_num=jcp6]



Epoch 18: 100%|████████████████████████████████████████| 8800/8800 [1:10:19<00:00,  2.09it/s, loss=0.0118, v_num=jcp6]



Epoch 18:  95%|██████████████████████████████████████▏ | 8400/8800 [1:13:58<03:31,  1.89it/s, loss=0.0116, v_num=jcp6]



Epoch 19: 100%|████████████████████████████████████████| 8800/8800 [1:14:16<00:00,  1.97it/s, loss=0.0116, v_num=jcp6]



Epoch 19:  53%|████████████████████▏                 | 4664/8800 [1:16:15<1:07:37,  1.02it/s, loss=0.0104, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  54%|████████████████████▎                 | 4718/8800 [1:16:17<1:06:00,  1.03it/s, loss=0.0103, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  54%|████████████████████▍                 | 4719/8800 [1:16:17<1:05:58,  1.03it/s, loss=0.0103, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child processException ignored in: 


Epoch 19:  54%|████████████████████▍                 | 4720/8800 [1:16:17<1:05:57,  1.03it/s, loss=0.0103, v_num=jcp6]

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>Exception ignored in: 


Epoch 19:  54%|████████████████████▍                 | 4720/8800 [1:16:17<1:05:57,  1.03it/s, loss=0.0103, v_num=jcp6]

<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>Traceback (most recent call last):

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()    
self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
Exception ignored in:   File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>    
if w.is_alive():    
Traceback (most recent call last):
if w.is_alive():  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocess

Epoch 19:  54%|████████████████████▍                 | 4721/8800 [1:16:17<1:05:55,  1.03it/s, loss=0.0103, v_num=jcp6]


Traceback (most recent call last):


Epoch 19:  54%|████████████████████▍                 | 4721/8800 [1:16:17<1:05:55,  1.03it/s, loss=0.0102, v_num=jcp6]

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 19:  54%|█████████████████████▍                  | 4722/8800 [1:16:17<1:05:53,  1.03it/s, loss=0.01, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  91%|████████████████████████████████████▎   | 7995/8800 [1:17:50<07:50,  1.71it/s, loss=0.0104, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  91%|████████████████████████████████████▎   | 7996/8800 [1:17:50<07:49,  1.71it/s, loss=0.0104, v_num=jcp6]


Traceback (most recent call last):


Epoch 19:  91%|████████████████████████████████████▎   | 7996/8800 [1:17:50<07:49,  1.71it/s, loss=0.0105, v_num=jcp6]

  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch 19:  91%|████████████████████████████████████▎   | 7998/8800 [1:17:50<07:48,  1.71it/s, loss=0.0104, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  92%|████████████████████████████████████▌   | 8056/8800 [1:17:52<07:11,  1.72it/s, loss=0.0104, v_num=jcp6]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f99e7608280>
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/home/jac

Epoch 19:  95%|██████████████████████████████████████▏ | 8400/8800 [1:18:03<03:43,  1.79it/s, loss=0.0116, v_num=jcp6]



Epoch 19:  99%|███████████████████████████████████████▍| 8678/8800 [1:18:17<01:06,  1.85it/s, loss=0.0116, v_num=jcp6]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
Exception in thread Thread-55 (_pin_memory_loop):
Traceback (most recent call last):
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/threading.py", line 1009, in _bootstrap_inner
    self.run()
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/threading.py", line 946, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 28, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
  File "/home/jack/miniconda3/envs/power_perceiver/lib/python3.10/site-packages/torch/multiprocessing/reductions.py", line 295, in rebuild_storage_fd
    fd = df.detach()
  File "/home/jack/miniconda3/envs/power