In this notebook we want to understand which are the median counts for pixel values of the EUV channels. The goal is to identify some costants by channel to be used for scaling.

In [1]:
import logging
import sys
import matplotlib.pyplot as plt
import numpy as np
import PIL.Image
from matplotlib.pyplot import imshow

import torch
from torch import nn, optim
from sdo.sdo_dataset import SDO_Dataset
from torch.utils.data import DataLoader

%matplotlib inline

In [2]:
#just a way to get nice logging messages from the sdo package
logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s"
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout, format=logformat, datefmt="%Y-%m-%d %H:%M:%S")

In [3]:
subsample = 1
original_ratio = 512
img_shape = int(original_ratio/subsample)
instr = ['AIA', 'AIA', 'AIA']
channels = ['0171', '0193', '0094']

In [4]:
#some cuda initialization
torch.backends.cudnn.enabled = True
cuda_device = 2
if not torch.cuda.is_available():
    raise RuntimeError("CUDA not available! Unable to continue")
device = torch.device("cuda:{}".format(cuda_device))
print("Using device {} for training, current device: {}, total devices: {}".format(
device, torch.cuda.current_device(), torch.cuda.device_count()))

Using device cuda:2 for training, current device: 0, total devices: 6


# Below the mean and mean median values for the raw images

In [5]:
train_data = SDO_Dataset(device=device, instr=instr, channels=channels, yr_range=[2011, 2018], 
                         mnt_step=1, day_step=1, h_step=25, min_step=61, subsample=subsample, 
                         test_ratio= 0.3, normalization=0, scaling=False, shuffle=True)

[2019-07-19 17:26:34] INFO:sdo.sdo_dataset:Loading SDOML from "/gpfs/gpfs_gl4_16mb/b9p111/fdl_sw/SDOML"
[2019-07-19 17:26:34] INFO:sdo.sdo_dataset:Running on months "[1 2 3 4 5 6 7]"
[2019-07-19 17:26:34] INFO:sdo.sdo_dataset:Number of found timestamps = 1664
[2019-07-19 17:26:34] INFO:sdo.sdo_dataset:Number of discarded timestamps = 72
[2019-07-19 17:26:34] INFO:sdo.sdo_dataset:Number of SDO files = 4992


In [11]:
sample_size = 100
data_loader = DataLoader(train_data, batch_size=sample_size, shuffle=False)

In [12]:
mean = np.zeros(len(channels))
median = np.zeros(len(channels))
for batch_index, batch in enumerate(data_loader):
    for i in range(sample_size):
        item = batch[i,:,:,:].cpu().numpy()
        for j, ch in enumerate(channels):
            mean[j] += np.mean(item[j, :, :])
            median[j] += np.median(item[j, :, :])
    median = median/sample_size
    mean = mean/sample_size
    break
print("Median over sample", dict(zip(channels, median)))
print("Mean over sample", dict(zip(channels, mean)))

Median over sample {'0171': 333.38197509765627, '0193': 380.20519302368166, '0094': 1.9156283044815063}
Mean over sample {'0171': 455.10101501464845, '0193': 567.6971893310547, '0094': 3.0502363741397858}


# Below the mean and mean median values for the scaled images

The units used for scaling are the following
AUNIT_BYCH = {'1600': 500.0, '1700': 7000.0, '0094': 10.0, '0131': 80.0, '0171': 2000.0,
               '0193': 3000.0, '0211': 1000.0, '0304': 500.0, '0335': 80.0}

In [13]:
scaled_train_data = SDO_Dataset(device=device, instr=instr, channels=channels, yr_range=[2011, 2018], 
                         mnt_step=1, day_step=1, h_step=25, min_step=61, subsample=subsample, 
                         test_ratio= 0.3, normalization=0, scaling=True, shuffle=True)

[2019-07-19 17:28:36] INFO:sdo.sdo_dataset:Loading SDOML from "/gpfs/gpfs_gl4_16mb/b9p111/fdl_sw/SDOML"
[2019-07-19 17:28:36] INFO:sdo.sdo_dataset:Running on months "[1 2 3 4 5 6 7]"
[2019-07-19 17:28:37] INFO:sdo.sdo_dataset:Number of found timestamps = 1664
[2019-07-19 17:28:37] INFO:sdo.sdo_dataset:Number of discarded timestamps = 72
[2019-07-19 17:28:37] INFO:sdo.sdo_dataset:Number of SDO files = 4992


In [19]:
sample_size = 500
scaled_data_loader = DataLoader(scaled_train_data, batch_size=sample_size, shuffle=False)

In [20]:
mean = np.zeros(len(channels))
mean_median = np.zeros(len(channels))
mean_max = np.zeros(len(channels))
for batch_index, batch in enumerate(scaled_data_loader):
    for i in range(sample_size):
        item = batch[i,:,:,:].cpu().numpy()
        for j, ch in enumerate(channels):
            mean[j] += np.mean(item[j, :, :])
            mean_median[j] += np.median(item[j, :, :])
            mean_max[j] += np.max(item[j, :, :])
    mean_median = mean_median/sample_size
    mean = mean/sample_size
    mean_max = mean_max/sample_size                              
    break
print("Mean Median over sample", dict(zip(channels, mean_median)))
print("Mean over sample", dict(zip(channels, mean)))
print("Mean Max over sample", dict(zip(channels, mean_max)))

Mean Median over sample {'0171': 0.16778671279549598, '0193': 0.13025649194419384, '0094': 0.19687237605452537}
Mean over sample {'0171': 0.23262306255102158, '0193': 0.19759163378179073, '0094': 0.32357423728704454}
Mean Max over sample {'0171': 6.849643320798874, '0193': 6.164027611970901, '0094': 56.6010144405365}
