In [1]:
import numpy as np
import tensorflow as tf
import os

In [12]:
# Change to whatever directory has needed tfrecords
TFRECORDS_DIR = '../data/nga_lsms_tfrecords/'

In [13]:
BANDS = ['BLUE', 'GREEN', 'RED', 'NIR', 'SW_IR1', 'SW_IR2', 'TEMP', 'VIIRS', "DELTA_TEMP"]

In [14]:
def create_single_feature_set(filename):
    record = tf.data.TFRecordDataset(filenames=[filename])
    feature_set = parse_features(record=record)
    
    return feature_set

def parse_features(record):
    raw_example = next(iter(record)) 
    example = tf.train.Example.FromString(raw_example.numpy())
    
    return example.features.feature

In [15]:
tfrecords = sorted([f for f in os.listdir(TFRECORDS_DIR) if not f.startswith('.')])
for i, file in enumerate(tfrecords):
    tfrecords[i] = os.path.join(TFRECORDS_DIR, file)
num_records = len(tfrecords)

def calculate_band_means():
    band_means = {'BLUE': 0, 'GREEN': 0, 'RED': 0, 'NIR': 0, 'SW_IR1': 0, 'SW_IR2': 0, 'TEMP': 0, 'VIIRS': 0, 'DELTA_TEMP': 0}

    for tf_r in tfrecords:
        feature_set = create_single_feature_set(filename=tf_r)

        for band_name in BANDS:
            band = np.array(feature_set[band_name].float_list.value, dtype=np.float32).reshape(255, 255)
            band_means[band_name] += np.mean(band[:,:])
            
    band_means = {key: value / num_records for key, value in band_means.items()}
    
    return band_means        

def calculate_band_stdevs(band_means):
    band_stdevs = {'BLUE': 0, 'GREEN': 0, 'RED': 0, 'NIR': 0, 'SW_IR1': 0, 'SW_IR2': 0, 'TEMP': 0, 'VIIRS': 0, 'DELTA_TEMP': 0}
    
    for tf_r in tfrecords:
        feature_set = create_single_feature_set(filename=tf_r)

        for band_name in BANDS:
            band = np.array(feature_set[band_name].float_list.value, dtype=np.float32).reshape(255, 255)
            band_stdevs[band_name] += ((band - band_means[band_name])**2).sum()/(band.shape[0]*band.shape[1])
            
    band_stdevs = {key: np.sqrt(value / num_records) for key, value in band_stdevs.items()}
    
    return band_stdevs
    

In [16]:
band_means = calculate_band_means()

In [17]:
band_stdevs = calculate_band_stdevs(band_means)

In [18]:
print(f'Band Means: {band_means}\n')
print(f'Band Standard Deviations: {band_stdevs}\n')

Band Means: {'BLUE': 0.06394327203741834, 'GREEN': 0.11046460040106762, 'RED': 0.13331832385103914, 'NIR': 0.28515732947262695, 'SW_IR1': 0.2940725715657443, 'SW_IR2': 0.21185806173034388, 'TEMP': 309.76055477080405, 'VIIRS': 0.9838503891800041, 'DELTA_TEMP': 272.43254182862273}

Band Means: {'BLUE': 0.02769227383368525, 'GREEN': 0.03875152334541102, 'RED': 0.0650641224982296, 'NIR': 0.06078692154635367, 'SW_IR1': 0.10890123369640009, 'SW_IR2': 0.10343445270835655, 'TEMP': 4.544446413905368, 'VIIRS': 7.623023631186187, 'DELTA_TEMP': 3348.9040754298007}

