In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hpy as h5
from tqdm import tqdm
import warnings
import time
warnings.filterwarnings('ignore')

# Funciones para binnear

In [2]:
def initbin(numrows):
    basebin = np.zeros((numrows,))
    latbin = np.zeros((numrows,))
    numbin = np.zeros((numrows,))
    basebin[0] = 1
    for row in range(numrows):
        latbin[row] = ((row + 0.5)*180.0/numrows) - 90.0
        numbin[row] = int(2*numrows*np.cos(latbin[row]*np.pi/180.0) + 0.5)
        if(row > 0):
            basebin[row] = basebin[row - 1] + numbin[row - 1]
    totbins = basebin[numrows - 1] + numbin[numrows - 1] - 1  
    return totbins, basebin, numbin, latbin

In [3]:
def lat2row(lat, numrows):
    row = 0*lat
    row = (90 + lat)*numrows/180.0
    row = row.astype(int)
    row[row>=numrows] = numrows - 1
    return row

In [4]:
def constrain_lon(lon):
    lon[lon < -180] = lon[lon < -180] % 180
    lon[lon>180] = lon[lon>180] % -180
    # while(lon < -180):
    #     lon += 360
    # while(lon >  180):
    #     lon -= 360 
    return lon

In [5]:
def rowlon2bin(row, lon, numbin, basebin):
    lon = constrain_lon(lon)
    col = ((lon + 180.0)*numbin[row]/360.0)
    col = col.astype(int)
    mask = np.where(col >= numbin[row])
    col[mask] = numbin[row[mask]] - 1 
    # if(col >= numbin[row]):
    #     col = numbin[row] - 1

    return basebin[row] + col

In [6]:
def constrain_lat(lat):
    lat[lat>90] = 90
    lat[lat<-90] = -90
    # if(lat >  90):
    #     lat = 90
    # if(lat < -90): 
    #     lat = -90 
    return lat

In [7]:
def latlon2bin(numrow,lat,lon,numbin,basebin):
    lat = constrain_lat(lat)
    lon = constrain_lon(lon)

    row = lat2row(lat, numrow)
    binlon = rowlon2bin(row, lon, numbin, basebin)

    return binlon 

In [8]:
# Falta vectorizar
def bin2latlon (_bin, numrows, basebin, numbin, latbin):
    row = numrows - 1
    if(_bin < 1):
        _bin = 1

    while(_bin < basebin[row]):
        row-=1

    clat = latbin[row]
    clon = 360.0*(_bin - basebin[row] + 0.5)/numbin[row] - 180.0
    
    return clat,clon

def bin2bounds(_bin, numrows, basebin, numbin, latbin):
    row = numrows - 1
    if(_bin < 1):
        _bin = 1

    while(_bin < basebin[row]):
        row-=1

    north = latbin[row] + 90.0/numrows
    south = latbin[row] - 90.0/numrows
    lon = 360.0*(_bin - basebin[row] + 0.5)/numbin[row] - 180.0
    west = lon - 180.0/numbin[row]
    east = lon + 180.0/numbin[row]
    return north,south,west,east

# Binneado

In [9]:
# def get_obs_data(ds, product_list, row_idx):
#     return np.array([ds['geophysical_data'][product_list[i]][row_idx,:] for i in range(len(product_list))]).T    

In [10]:
# %%time
# Nota: get_bin_index se corresponde con la funcion latlon2bin y devuelve el numero del bin al que corresponde dada lat, lon
# NSCANS = 3173  # es el nummero de filas del granulo
# NPIXEL = 1468 # es el numero de columnas del granulo

# def get_stats(NSCANS, NPIXEL, LAT, LON, numrow, numbin, basebin, ds, products):
#     for L in tqdm(range(NSCANS)):
#         for I in range(NPIXEL):
#             #if OBS
#             t1 
#             IDX = int(latlon2bin(numrow,LAT[L,I],LON[L,I],numbin,basebin))
#             OBS = get_obs_data(ds, products, L)
#             for J in range(NVARS):
#                 XLOG = np.log(OBS[I,J])
#                 # print(SUMX[IDX,J], XLOG)
#                 SUMX[IDX,J] += XLOG
#                 SUMXX[IDX,J] += XLOG*XLOG
#             N[IDX] = N[IDX] + 1
#             NSEG[IDX] = 1
            
#     SUMX2 = np.zeros((int(totbins), NVARS))
#     SUMXX2 = np.zeros((int(totbins), NVARS))
#     for IDX in tqdm(range(int(totbins))):
#         if N[IDX] > 0:
#             W[IDX] = np.sqrt(N[IDX])
#             for J in range(NVARS):
#                 SUMX2[IDX, J] = SUMX[IDX, J] / W[IDX]
#                 SUMXX2[IDX, J] = SUMXX[IDX, J] / W[IDX]
    
#     return SUMX, SUMXX, SUMX2, SUMXX2, N, W, NSEG

In [24]:
def get_spatial_bin_vectorized(NVARS, lat, lon, numrow, numbin, basebin, ds, products):
    SUMX = np.zeros((int(totbins), NVARS))
    SUMXX = np.zeros((int(totbins), NVARS))
    SUMXw = np.zeros((int(totbins), NVARS))
    SUMXXw = np.zeros((int(totbins), NVARS))
    N =  np.zeros((int(totbins), ))
    W =  np.zeros((int(totbins), ))
    NSEG = np.zeros((int(totbins), ))
    TT = np.zeros((int(totbins), ))
    
    idx = latlon2bin(numrow, lat, lon, numbin, basebin)
    idx = idx.astype(int)
    prod_stack = np.stack(ds['geophysical_data'][products[i]][:] for i in range(len(products)))
    prod_rolled = np.rollaxis(np.rollaxis(prod_stack, 1,0), 2,1)
    XLOG = np.log(prod_rolled)
    XXLOG = XLOG * XLOG
    
    for i in range(NVARS):
        SUMX[idx, i] = XLOG[:,:,i]
        SUMXX[idx, i] = XXLOG[:,:,i]
    
    unique, count = np.unique(idx, return_counts=True)
    N[unique] = count
    W[unique] = np.sqrt(count)
    
    NSEG[idx] = 1 
    
    for i in range(NVARS):
        SUMXw[idx, i] = SUMX[idx, i]/W[idx]
        SUMXXw[idx, i] = SUMXX[idx, i]/W[idx]
    
    return SUMX, SUMXX, SUMXw, SUMXXw, N, W, NSEG    

# Binneando un L2 de MODIS

## Exploremos el netCDF de MODIS

In [12]:
file = 'l2modis/A2022160171000.L2_LAC_OC.nc'
l2modis = h5.File(file,'r')
l2modis.keys()

<KeysViewHDF5 ['bands_per_pixel', 'geophysical_data', 'navigation_data', 'number_of_bands', 'number_of_lines', 'number_of_reflective_bands', 'pixel_control_points', 'pixels_per_line', 'processing_control', 'scan_line_attributes', 'sensor_band_parameters']>

In [13]:
l2modis['navigation_data'].keys()

<KeysViewHDF5 ['longitude', 'latitude', 'cntl_pt_cols', 'cntl_pt_rows', 'tilt']>

In [14]:
modisprod = list(l2modis['geophysical_data'].keys())[:-1]
l2modis['geophysical_data'].keys()

<KeysViewHDF5 ['aot_869', 'angstrom', 'Rrs_412', 'Rrs_443', 'Rrs_469', 'Rrs_488', 'Rrs_531', 'Rrs_547', 'Rrs_555', 'Rrs_645', 'Rrs_667', 'Rrs_678', 'chlor_a', 'chl_ocx', 'Kd_490', 'pic', 'poc', 'ipar', 'nflh', 'par', 'l2_flags']>

In [15]:
l2modis['pixels_per_line'][:].size

1354

In [16]:
l2modis['navigation_data']['longitude'].shape

(2030, 1354)

In [17]:
numrow = 4320

LAT = l2modis['navigation_data']['latitude'][:]
LON = l2modis['navigation_data']['longitude'][:]

In [18]:
lat2row(LAT, numrow)

array([[  1265,   1264,   1264, ...,   1171,   1171,   1171],
       [  1265,   1265,   1264, ...,   1171,   1171,   1171],
       [  1265,   1265,   1265, ...,   1172,   1172,   1172],
       ...,
       [-21816, -21816, -21816, ..., -21816, -21816, -21816],
       [-21816, -21816, -21816, ..., -21816, -21816, -21816],
       [-21816, -21816, -21816, ..., -21816, -21816, -21816]])

In [19]:
(90 + LAT)*numrow/180.0

array([[  1265.1213,   1264.782 ,   1264.4459, ...,   1171.4861,
          1171.4471,   1171.4087],
       [  1265.5377,   1265.1971,   1264.8599, ...,   1171.9253,
          1171.888 ,   1171.8508],
       [  1265.9542,   1265.6125,   1265.2739, ...,   1172.3647,
          1172.3286,   1172.293 ],
       ...,
       [-21816.    , -21816.    , -21816.    , ..., -21816.    ,
        -21816.    , -21816.    ],
       [-21816.    , -21816.    , -21816.    , ..., -21816.    ,
        -21816.    , -21816.    ],
       [-21816.    , -21816.    , -21816.    , ..., -21816.    ,
        -21816.    , -21816.    ]], dtype=float32)

In [20]:
LAT

array([[ -37.28661 ,  -37.30075 ,  -37.314754, ...,  -41.18808 ,
         -41.1897  ,  -41.191303],
       [ -37.26926 ,  -37.28345 ,  -37.29751 , ...,  -41.169777,
         -41.171337,  -41.172882],
       [ -37.251907,  -37.266148,  -37.280254, ...,  -41.15147 ,
         -41.152977,  -41.15446 ],
       ...,
       [-999.      , -999.      , -999.      , ..., -999.      ,
        -999.      , -999.      ],
       [-999.      , -999.      , -999.      , ..., -999.      ,
        -999.      , -999.      ],
       [-999.      , -999.      , -999.      , ..., -999.      ,
        -999.      , -999.      ]], dtype=float32)

## Calculamos el binneado y los estadísticos

In [21]:
numrow = 4320
totbins, basebin, numbin, latbin = initbin(numrow)

In [25]:
%%time
LAT = l2modis['navigation_data']['latitude'][:]
LON = l2modis['navigation_data']['longitude'][:]
NVARS = len(modisprod)
SUMX, SUMXX, SUMX2, SUMXX2, N, W, NSEG = get_spatial_bin_vectorized(NVARS, LAT, LON, numrow, numbin, basebin, l2modis, modisprod)

CPU times: user 2.23 s, sys: 196 ms, total: 2.43 s
Wall time: 2.43 s


In [26]:
SUMX.shape

(23761676, 20)