# Binning the selected profiles

QC criteria is entered to select certain profiles, then it generates binned data at standard depth levels

- 1 x 1 degree binning adjusted O2 profiles (monthly)
- calculate statistical mean, standard deviation, sample count


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd
import os
import re
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# time axis
Y=np.arange(2010,2020,1)
M=np.arange(1,13,1)
time=np.arange('2010-01','2020-01',dtype='datetime64[M]')
#
res=1.0
#
# prepare grid cells
xW=np.arange(-180,180,res)
yS=np.arange(-90,90,res)
xC=np.arange(-180,180,res)+res/2
yC=np.arange(-90,90,res)+res/2
xE=np.arange(-180,180,res)+res
yN=np.arange(-90,90,res)+res
# empty array
Nx=np.size(xC)
Ny=np.size(yC)
#
mn=np.zeros((12*np.size(Y),26,Ny,Nx)) # stat mean
sd=np.zeros((12*np.size(Y),26,Ny,Nx)) # stdev
dd=np.zeros((12*np.size(Y),26,Ny,Nx)) # count

In [3]:
# read in the summary statistics
df1=pd.read_csv('float_Type1_summary_ver.Feb1.csv')
df2=pd.read_csv('float_Type2_summary_ver.Feb1.csv')
df3=pd.read_csv('float_Type3_summary_ver.Feb1.csv')

In [4]:
# prepare helper functions

In [5]:
def bin_float(df,n):
    fn0=df['File'][n]
    # open the data file
    print('reading in '+fn0)
    ds=xr.open_dataset(fn0)
    Ns=np.shape(ds['O2_FLT_ADJUSTED']) # profile, depth
    date=ds['JULD']
    z0=ds['depth']
    time=pd.to_datetime(date)
    long=ds['LONGITUDE'].to_numpy()
    lati=ds['LATITUDE'].to_numpy()
    for p in range(0,Ns[0]):
        if pd.notna(time[p]) & pd.notna(long[p]) & pd.notna(lati[p]):
            x0=long[p]
            y0=lati[p]
            O2_FLT=ds['O2_FLT_ADJUSTED'][p,:]
            yr=int(time[p].year)
            mo=int(time[p].month)
            ix=np.where((x0>=xW)&(x0<xE))
            iy=np.where((y0>=yS)&(y0<yN))
            iyr=np.where((yr==Y))[0]
            imo=np.where((mo==M))[0]
            if (iyr.size>0):
#                print('(i,j,imo,iyr)=',str([ix,iy,imo,iyr]))
                for k in range(0,26):
                    if (np.isnan(O2_FLT[k])==False):
                        mn[imo+12*iyr,k,iy,ix]=np.squeeze(mn[imo+12*iyr,k,iy,ix])+O2_FLT[k]
                        sd[imo+12*iyr,k,iy,ix]=np.squeeze(sd[imo+12*iyr,k,iy,ix])+O2_FLT[k]**2
                        dd[imo+12*iyr,k,iy,ix]=np.squeeze(dd[imo+12*iyr,k,iy,ix])+1
    return ds

In [6]:
# run through a Type
N1=df1.shape
for n in range(N1[0]):
# this is for type 3 only    if n!=77:
    bin_float(df1,n)

reading in compare_woa_v3/1901134_26lev.nc
reading in compare_woa_v3/1901135_26lev.nc
reading in compare_woa_v3/1901152_26lev.nc
reading in compare_woa_v3/1901153_26lev.nc
reading in compare_woa_v3/1901154_26lev.nc
reading in compare_woa_v3/1901155_26lev.nc
reading in compare_woa_v3/1901156_26lev.nc
reading in compare_woa_v3/1901157_26lev.nc
reading in compare_woa_v3/1901158_26lev.nc
reading in compare_woa_v3/1901159_26lev.nc
reading in compare_woa_v3/1901338_26lev.nc
reading in compare_woa_v3/1901339_26lev.nc
reading in compare_woa_v3/1901379_26lev.nc
reading in compare_woa_v3/1901467_26lev.nc
reading in compare_woa_v3/1901468_26lev.nc
reading in compare_woa_v3/1902303_26lev.nc
reading in compare_woa_v3/1902304_26lev.nc
reading in compare_woa_v3/2900128_26lev.nc
reading in compare_woa_v3/2901551_26lev.nc
reading in compare_woa_v3/2901555_26lev.nc
reading in compare_woa_v3/2901559_26lev.nc
reading in compare_woa_v3/2901562_26lev.nc
reading in compare_woa_v3/2901563_26lev.nc
reading in 

reading in compare_woa_v3/6901461_26lev.nc
reading in compare_woa_v3/6901468_26lev.nc
reading in compare_woa_v3/6901486_26lev.nc
reading in compare_woa_v3/6901597_26lev.nc
reading in compare_woa_v3/6901604_26lev.nc
reading in compare_woa_v3/6901646_26lev.nc
reading in compare_woa_v3/6901756_26lev.nc
reading in compare_woa_v3/6902549_26lev.nc
reading in compare_woa_v3/6902686_26lev.nc
reading in compare_woa_v3/6902701_26lev.nc
reading in compare_woa_v3/6902736_26lev.nc
reading in compare_woa_v3/6902740_26lev.nc
reading in compare_woa_v3/6902742_26lev.nc
reading in compare_woa_v3/6902743_26lev.nc
reading in compare_woa_v3/6902805_26lev.nc
reading in compare_woa_v3/6902808_26lev.nc
reading in compare_woa_v3/6902810_26lev.nc
reading in compare_woa_v3/6902880_26lev.nc
reading in compare_woa_v3/6902906_26lev.nc
reading in compare_woa_v3/6902907_26lev.nc
reading in compare_woa_v3/6902908_26lev.nc
reading in compare_woa_v3/6902909_26lev.nc
reading in compare_woa_v3/6902972_26lev.nc
reading in 

In [None]:
# run through a Type
N2=df2.shape
for n in range(N2[0]):
# this is for type 3 only    if n!=77:
    bin_float(df2,n)

In [None]:
# run through a Type
N3=df3.shape
for n in range(N3[0]):
# this is for type 3 only
    if n!=77:
        bin_float(df3,n)

In [None]:
# calculate mean
mn=mn/dd

In [None]:
# calculate standard deviation
sd=np.sqrt(sd/dd - mn**2)

In [None]:
# save the result
zstd=np.array([0,10,20,30,50,75,100,125,150,200,250,300,400,
               500,600,700,800,900,1000,1100,1200,1300,1400,
               1500,1750,2000])
o_mn=xr.DataArray( data=mn, name='o_mn', 
                    dims=['time','depth','lat','lon'],
                    coords={"lon":xC,"lat":yC, "depth":zstd,"time":time},
                    attrs=dict(longname="statistical mean of o2",
                             units="micro-mol/kg"), )
o_sd=xr.DataArray( data=sd,  name='o_sd',
                       dims=['time','depth','lat','lon'],
                    coords={"lon":xC,"lat":yC, "depth":zstd,"time":time},
                  attrs=dict(longname="standard deviation of o2",
                             units="micro-mol/kg"), )
o_dd=xr.DataArray( data=dd,  name='o_dd',
                       dims=['time','depth','lat','lon'],
                    coords={"lon":xC,"lat":yC, "depth":zstd,"time":time},
                  attrs=dict(longname="data count of o2",
                             units="non-dimensional"), )
ds_out=xr.merge([o_mn,o_sd,o_dd])
wn='o2_ARGO_Type123_26lev.nc'
ds_out.to_netcdf(wn,mode='w')