# Analysis of AMSR2 data

This notebook allows the calculation of the mean and standard deviation of the AMSR2 data for each frequency band following each polarisation.
This data will allow the AMSR2 data tensor to be standardised during concatenation in the network.

## Import

In [1]:
import glob
import argparse
import datetime
import os
from os.path import basename, dirname, join
from operator import itemgetter
import random
import json
import numpy as np
import matplotlib.pyplot as plt
import statistics

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-i5kcojd2 because the default path (/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


## Path of data

In [2]:
input_dir_json = '/tf/data/hugo_sod/'
idir = '/tf/data/hugo_sod/output_preprocessed/'

## Processed

In [3]:
with open(f'{idir}processed_files.json') as fichier_json:
    all_nc = json.load(fichier_json)
npz_files=[]


for nc in all_nc :
    name = nc[:15]
    files = sorted(glob.glob(f'{idir}/{name}/*.npz'))
    npz_files += files
random.shuffle(npz_files)
print(len(npz_files))

888522


In [4]:
amsr2_var_names = ['btemp_6_9h',
                   'btemp_6_9v',
                   'btemp_7_3h',
                   'btemp_7_3v',
                   'btemp_10_7h',
                   'btemp_10_7v',
                   'btemp_18_7h',
                   'btemp_18_7v',
                   'btemp_23_8h',
                   'btemp_23_8v',
                   'btemp_36_5h',
                   'btemp_36_5v',
                   'btemp_89_0h',
                   'btemp_89_0v'
                  ]
dims_amsr2 = np.load(npz_files[0])[amsr2_var_names[0]].shape
dims_amsr2= (*dims_amsr2, len(amsr2_var_names))

In [5]:
z= []
for i in range(len(amsr2_var_names)):
    z.append([])
for ID in npz_files[:2000]:
    batch = {}
    batch.update(np.load(ID))
    for j, amsr2_name in enumerate(amsr2_var_names):
        z[j].append(list(batch.get(amsr2_name).flatten()))

In [6]:
dict_bandes={}
for i,bande in enumerate(z):
    data_bande=sum(bande,[])
    mean_ = statistics.mean(data_bande)
    stdev = np.std(data_bande)
    result = [mean_, stdev]
    dict_bandes.update( {amsr2_var_names[i] : result} )
    
print(dict_bandes)

{'btemp_6_9h': [149.23788, 56.49082], 'btemp_6_9v': [204.98549, 36.33368], 'btemp_7_3h': [150.37196, 56.59285], 'btemp_7_3v': [205.71017, 36.34493], 'btemp_10_7h': [155.304, 56.9012], 'btemp_10_7v': [210.9305, 34.505795], 'btemp_18_7h': [168.22734, 50.159405], 'btemp_18_7v': [221.29988, 26.990482], 'btemp_23_8h': [185.93535, 40.547512], 'btemp_23_8v': [229.78435, 20.768389], 'btemp_36_5h': [188.98741, 38.220444], 'btemp_36_5v': [232.11049, 16.431904], 'btemp_89_0h': [216.95062, 20.528406], 'btemp_89_0v': [244.92033, 12.187659]}
