In [1]:
from torch.utils.data import DataLoader
from tqdm import tqdm

import numpy as np
import pandas as pd
import torch

from data.satellite import SatelliteData, ALL_BANDS, RGB_BANDS, Normalization
from data.kth import KTH
from data.bair import RobotPush

import utils

In [2]:
eo_train_data = SatelliteData(data_root="../Arizona-processed4", seq_len=15, train=True, bands_to_keep=RGB_BANDS)
kth_train_data = KTH(data_root="data", seq_len=15, image_size=64, train=True)
bair_train_data = RobotPush(data_root="data/bair", seq_len=15, image_size=64, train=True)

Using: 2168 for training


In [14]:
eo_train_data_unnormed = SatelliteData(
    data_root="../Arizona-processed4", 
    seq_len=60, 
    train=True, 
    bands_to_keep=ALL_BANDS,
    normalization=Normalization.Z
)


Using: 2168 for training


In [15]:
eo_train_data_unnormed[0].numpy().min(axis=(0,2,3))

array([-0.61095955, -1.79794701, -1.94557768, -1.76195801, -1.98504522,
       -3.78593999, -3.60417963, -3.79372866, -3.73807109, -4.5316649 ,
       -0.64018989, -2.60262782, -2.04602812, -3.84381163])

In [16]:
eo_train_data_unnormed[0].numpy().max(axis=(0,2,3))

array([ 7.22329046, 12.62534067,  9.98013857,  6.91259465,  4.69064942,
        4.56465246,  4.03936434,  4.70392032,  4.04023199,  3.96229015,
        4.9344242 ,  3.80572036,  3.04844489,  2.92709548])

In [6]:
dataset_maxs = np.zeros(len(ALL_BANDS))
dataset_mins = np.ones(len(ALL_BANDS))
for i in tqdm(range(len(eo_train_data_unnormed))):
    inst = eo_train_data_unnormed[i].numpy()
    inst_maxs = inst.max(axis=(0,2,3))
    inst_mins = inst.min(axis=(0,2,3))
    dataset_maxs[dataset_maxs < inst_maxs] = inst_maxs[dataset_maxs < inst_maxs]
    dataset_mins[dataset_mins > inst_mins] = inst_mins[dataset_mins > inst_mins]

100%|██████████| 2168/2168 [04:44<00:00,  7.61it/s]


In [7]:
{"max": list(dataset_maxs), "min": list(dataset_mins)}

{'max': [2.8,
  1.9872,
  1.9873,
  2.2221,
  1.5505,
  1.8669,
  2.2471,
  2.1799,
  1.9167,
  2.8,
  0.4114,
  2.2048,
  2.2139,
  0.9995333644423705],
 'min': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  -0.8]}

In [5]:
# Size comparison
print("KTH: ",kth_train_data[0].shape)
print("EO: ", eo_train_data[0].shape)
print("RobotPush: ", bair_train_data[0].shape)

KTH:  torch.Size([15, 64, 64, 1])
EO:  torch.Size([15, 3, 64, 64])
RobotPush:  (15, 64, 64, 3)


In [11]:
dtype = torch.FloatTensor
def get_training_batch(data, name):
    if name == "satellite":
        num_workers = 1
    else:
        num_workers = 5
    train_loader = DataLoader(data,
                          num_workers=num_workers,
                          batch_size=50,
                          shuffle=True,
                          drop_last=True,
                          pin_memory=True)
    while True:
        for sequence in train_loader:
            torch_batch = utils.normalize_data(name, dtype, sequence)
            np_batch = np.array([item.numpy() for item in torch_batch])
            yield np_batch
            
kth_batch_gen = get_training_batch(kth_train_data, "kth")
eo_batch_gen = get_training_batch(eo_train_data, "satellite")
eo_unnormed_batch_gen = get_training_batch(eo_train_data_unnormed, "satellite")
bair_batch_gen = get_training_batch(bair_train_data, "bair")

In [5]:
s = next(eo_batch_gen)

In [6]:
s2 = next(eo_batch_gen)

In [8]:
t1 = next(eo_batch_gen)

In [9]:
t2 = next(eo_batch_gen)

In [13]:
(s != s2).any()

True

In [15]:
(s2 != t2).any()

False

In [11]:
kth_batch = next(kth_batch_gen)
kth_batch2 = next(kth_batch_gen)
assert (kth_batch != kth_batch2).any()

In [12]:
bair_batch = next(bair_batch_gen)
bair_batch2 = next(bair_batch_gen)
assert (bair_batch != bair_batch2).any()

AssertionError: 

In [15]:
bair_batch.shape

(15, 50, 3, 64, 64)

In [12]:
eo_batch = next(eo_batch_gen)
eo_batch2 = next(eo_batch_gen)
assert (eo_batch != eo_batch2).any()

In [13]:
eo_u_batch = next(eo_unnormed_batch_gen)
eo_u_batch2 = next(eo_unnormed_batch_gen)
assert (eo_u_batch != eo_u_batch2).any()

In [14]:
def batch_stats(batch):
    axis = (0,1,3,4)
    return {
        "Shape": batch.shape,
        "Max": batch.max(axis=axis),
        "Min": batch.min(axis=axis),
        "Mean": batch.mean(axis=axis),
        "Variance": batch.var(axis=axis),
        "Std": batch.std(axis=axis),
    }

In [15]:
stats = { 
    "kth1": batch_stats(kth_batch),
    #"kth2": batch_stats(kth_batch2),
    "eo1": batch_stats(eo_batch),
    #"eo2": batch_stats(eo_batch2),
    "eo_u1": batch_stats(eo_u_batch),
    #"eo_u2": batch_stats(eo_u_batch2)
}

In [16]:
pd.DataFrame(stats)

Unnamed: 0,kth1,eo1,eo_u1
Shape,"(15, 50, 1, 64, 64)","(15, 50, 3, 64, 64)","(15, 50, 3, 64, 64)"
Max,[1.0],"[28.951277, 47.63986, 36.912502]","[1.907, 1.8688, 1.1752]"
Min,[0.011764706],"[-2.7390916, -4.0216584, -4.981337]","[0.0, 0.0, 0.0]"
Mean,[0.61469316],"[0.28564572, 0.2947104, 0.3144054]","[0.1820168, 0.15614001, 0.14855538]"
Variance,[0.043017842],"[2.134563, 2.158881, 2.204485]","[0.0077295825, 0.002825007, 0.0017347226]"
Std,[0.20740743],"[1.4610144, 1.4693131, 1.4847507]","[0.08791804, 0.053150795, 0.04165]"


In [33]:
mean = np.full((64,64,3), eo_u_batch.mean(axis=(0,1,3,4))).swapaxes(0,2)

In [37]:
std = np.full((64,64,3), eo_u_batch.std(axis=(0,1,3,4))).swapaxes(0,2)

In [39]:
new_b = (eo_u_batch - mean)/std

In [41]:
new_b.mean()

-4.412863e-07

In [42]:
new_b.std()

1.0000001

In [43]:
new_b.max()

32.22266