In [26]:
import numpy as np
import xarray as xr
import glob

def monthly_loop(year,month):
    print '----------------'
    print year+'-'+month
    #PATH CONF
    ROOTEN = "/home5/pharos/REFERENCE_DATA/OCEAN_REP/EN4/DATA/G10/PROFILES/" 
    ROOTOC = "/export/home1/MOUNTS/OCCIPUT/OBS/"
    #ROOTOC = "/export/home1/DATA/PIRATE/OBS/" #pour 2007
    pathen = glob.glob(ROOTEN+year+'/EN.4.2.0.f.profiles.g10.'+year+month+'.nc')
    pathoc = glob.glob(ROOTOC+'OBS.y'+year+'m'+month+'/ORCA025.L75-OCCITENS.*_enact_fdbk.nc')
    #SORT PATH SO N_MEMBER = 1:50
    pathoc.sort()

    #SUBSELECT DEF
    def subselect_en4(ds):    
        ds = ds.where((ds.WMO_INST_TYPE.isin([' 831',])) 
              & (ds.LATITUDE.values > 20) & (ds.LATITUDE.values < 45)
              & (ds.LONGITUDE.values > -85) & (ds.LONGITUDE.values < -30)
              ,drop=True)
        return ds
    def subselect_occ(ds):
        ds = ds.where((ds.STATION_TYPE.isin([' 831',])) 
              & (ds.LATITUDE.values > 20) & (ds.LATITUDE.values < 45)
              & (ds.LONGITUDE.values > -85) & (ds.LONGITUDE.values < -30)
              ,drop=True)
        return ds

    #OPEN
    print "open files..."
    EN4=xr.open_mfdataset(pathen,concat_dim='N_PROF',decode_times=False,preprocess=subselect_en4,mask_and_scale=True)
    OCC=xr.open_mfdataset(pathoc,concat_dim='N_MEMBER',decode_times=False,preprocess=subselect_occ,mask_and_scale=True)

    #CORRECT Q PREFIX FOR OCCIPUT ARGO WMO
    print "correct platform identifiers..."
    idq=[i for i,item in enumerate(OCC.STATION_IDENTIFIER[0,:].values) if "Q" in item]
    for k in idq:
        aa=str(OCC.STATION_IDENTIFIER[0,k].values)     
        if aa not in ['Q6901162','Q4901722','Q4902132','Q4902258',
                      'Q4902133','Q4902131','Q6901162','Q4902261',
                      'Q4902262','Q4902258','Q6901160','Q6901161']:
            OCC.STATION_IDENTIFIER.load()
            OCC.STATION_IDENTIFIER[:,k]=aa[1:]+' '

    # Redundant information through the N_MEMBER dimension:
    vlist = ['VARIABLES', 'ENTRIES', 'EXTRA', 'DEPTH_QC', 'DEPTH_QC_FLAGS',
         'JULD_REFERENCE', 'OBSERVATION_QC', 'OBSERVATION_QC_FLAGS',
         'POSITION_QC', 'POSITION_QC_FLAGS', 'JULD_QC', 'JULD_QC_FLAGS',
         'ORIGINAL_FILE_INDEX', 'LATITUDE', 'LONGITUDE', 'JULD',
         'STATION_IDENTIFIER', 'STATION_TYPE', 'DEPTH','POTM_OBS',
         'POTM_LEVEL_QC','PSAL_OBS','PSAL_LEVEL_QC','TEMP','POTM_QC']
    for v in vlist:
        OCC[v] = OCC[v].isel(N_MEMBER=0)
    
    #LOOKUP-ARRAY
    EN4sf= xr.Dataset({'WMO': (['JULD'],  EN4.PLATFORM_NUMBER),
                'LATITUDE': (['JULD'], EN4.LATITUDE),
                'LONGITUDE': (['JULD'], EN4.LONGITUDE),
                'TEMP': (['JULD','N_LEVELS'], EN4.TEMP),
                'POTM': (['JULD','N_LEVELS'], EN4.POTM_CORRECTED),   
                'POTM_QC': (['JULD','N_LEVELS'], EN4.POTM_CORRECTED_QC),      
                'PSAL': (['JULD','N_LEVELS'], EN4.PSAL_CORRECTED),
                'PSAL_QC': (['JULD','N_LEVELS'], EN4.PSAL_CORRECTED_QC),   
                'DEPTH': (['JULD','N_LEVELS'], EN4.DEPH_CORRECTED),                
                },
                 coords={'JULD': EN4.JULD.values})

    #INIT FINAL DATASET
    print "init final dataset..."
    fds= xr.Dataset({'STATION_IDENTIFIER': (['N_OBS'],  OCC.STATION_IDENTIFIER),
                     'LATITUDE': (['N_OBS'],  OCC.LATITUDE), 
                     'LONGITUDE': (['N_OBS'],  OCC.LONGITUDE), 
                     'JULD': (['N_OBS'],  OCC.JULD), 
                     'DEPTH': (['N_OBS','N_LEVELS'],  OCC.DEPTH),                 
                     'POTM_OBS': (['N_OBS','N_LEVELS'],  OCC.POTM_OBS),
                     'POTM_QC': (['N_OBS','N_LEVELS'],  OCC.POTM_LEVEL_QC.astype('int8')),
                     'POTM_Hx': (['N_MEMBER','N_OBS','N_LEVELS'],  OCC.POTM_Hx),
                     'PSAL_OBS': (['N_OBS','N_LEVELS'],  OCC.PSAL_OBS),
                     'PSAL_QC': (['N_OBS','N_LEVELS'],  OCC.PSAL_LEVEL_QC.astype('int8')),
                     'PSAL_Hx': (['N_MEMBER','N_OBS','N_LEVELS'],  OCC.PSAL_Hx),
                     'TEMP_OBS': (['N_OBS','N_LEVELS'],  OCC.TEMP),
                     'JULD_EN4': (['N_OBS'],  OCC.JULD*np.nan),    
                     'LATITUDE_EN4': (['N_OBS'],  OCC.LATITUDE*np.nan), 
                     'LONGITUDE_EN4': (['N_OBS'],  OCC.LONGITUDE*np.nan), 
                     'DEPTH_EN4': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='float_')),                 
                     'POTM_EN4': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='float_')),
                     'POTM_EN4_QC': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='int8')),
                     'PSAL_EN4': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='float_')),
                     'PSAL_EN4_QC': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='int8')), 
                     'TEMP_EN4': (['N_OBS','N_LEVELS_EN4'], np.full((len(OCC.JULD), 400), np.nan,dtype='float_')),
                     'MATCH_EN4': (['N_OBS'], np.full((len(OCC.JULD)), np.nan,dtype='int8')), 
                    })

    fds.JULD.attrs['_FillValue']=99999.0
    fds.JULD.attrs['long_name']='Julian date of OCCIPUT observation'
    fds.JULD_EN4.attrs['_FillValue']=99999.0
    fds.JULD_EN4.attrs['long_name']='Julian date of EN4 matching observation'
    fds.LATITUDE.attrs['_FillValue']=99999.0
    fds.LATITUDE.attrs['long_name']='Latitude of observation from OCCIPUT'
    fds.LATITUDE_EN4.attrs['_FillValue']=99999.0
    fds.LATITUDE_EN4.attrs['long_name']='Latitude of observation from EN4 matching profile'
    fds.LONGITUDE.attrs['_FillValue']=99999.0
    fds.LONGITUDE.attrs['long_name']='Longitude of observation from OCCIPUT'
    fds.LONGITUDE_EN4.attrs['_FillValue']=99999.0
    fds.LONGITUDE_EN4.attrs['long_name']='Longitude of observation from EN4 matching profile'
    fds.DEPTH.attrs['_FillValue']=99999.0
    fds.DEPTH.attrs['long_name']='Depth of observation from OCCIPUT'
    fds.POTM_OBS.attrs['_FillValue']=99999.0
    fds.POTM_OBS.attrs['long_name']='Potential temperature of observation from OCCIPUT'
    fds.POTM_Hx.attrs['_FillValue']=99999.0
    fds.POTM_Hx.attrs['long_name']='Model interpolated potential temperature'
    fds.PSAL_OBS.attrs['_FillValue']=99999.0
    fds.PSAL_OBS.attrs['long_name']='Practical salinity of observation from OCCIPUT'
    fds.PSAL_Hx.attrs['_FillValue']=99999.0
    fds.PSAL_Hx.attrs['long_name']='Model interpolated practical salinity'
    fds.POTM_QC.attrs['_FillValue']=0
    fds.POTM_QC.attrs['long_name']='Quality for each level on potential temperature from OCCIPUT'
    fds.PSAL_QC.attrs['_FillValue']=0
    fds.PSAL_QC.attrs['long_name']='Quality for each level on practical salinity from OCCIPUT'
    fds.TEMP_OBS.attrs['_FillValue']=99999.0
    fds.TEMP_OBS.attrs['long_name']='Temperature of observation from OCCIPUT'
    fds.DEPTH_EN4.attrs['_FillValue']=99999.0
    fds.DEPTH_EN4.attrs['long_name']='Depth of observation from EN4 matching profile'
    fds.POTM_EN4.attrs['_FillValue']=99999.0
    fds.POTM_EN4.attrs['long_name']='Potential temperature of observation from EN4 matching profile'
    fds.PSAL_EN4.attrs['_FillValue']=99999.0
    fds.PSAL_EN4.attrs['long_name']='Practical salinity of observation from EN4 matching profile'
    fds.POTM_EN4_QC.attrs['_FillValue']=0
    fds.POTM_EN4_QC.attrs['long_name']='Quality for each level on potential temperature from EN4 matching profile'
    fds.PSAL_EN4_QC.attrs['_FillValue']=0
    fds.PSAL_EN4_QC.attrs['long_name']='Quality for each level on practical salinity from EN4 matching profile'
    fds.TEMP_EN4.attrs['_FillValue']=99999.0
    fds.TEMP_EN4.attrs['long_name']='Temperature of observation from EN4 matching profile'

    print "search en4 matches..."    
    # MATCH LOOKUP
    tolerance=1.0 #in days
    for i in range(0,len(OCC.STATION_IDENTIFIER)):          
        ac=EN4sf.where(EN4sf.WMO.isin([OCC.STATION_IDENTIFIER[i].values]),drop=True)    
        ac=ac.sortby(ac.JULD)
        #print i,len(ac.JULD)
        if len(ac.JULD)>0:# wmo match found                
            bc=ac.sel(JULD=[OCC.JULD[i].values],method='nearest')
            if(abs(bc.JULD.values-OCC.JULD[i].values)<=tolerance): #date match into tolerance
                #write EN4 matches in dataset                
                fds.MATCH_EN4[i]=1
                fds.JULD_EN4[i]=bc.JULD[0].values 
                fds.LONGITUDE_EN4[i]=bc.LONGITUDE[0].values
                fds.LATITUDE_EN4[i]=bc.LATITUDE[0].values
                fds.TEMP_EN4[i,:]=bc.TEMP[0].values
                fds.DEPTH_EN4[i,:]=bc.DEPTH[0].values            
                fds.PSAL_EN4[i,:]=bc.PSAL[0].values
                fds.PSAL_EN4_QC[i,:]=bc.PSAL_QC[0].values.astype('int8')
                fds.POTM_EN4[i,:]=bc.POTM[0].values
                fds.POTM_EN4_QC[i,:]=bc.POTM_QC[0].values.astype('int8')

            else:
                fds.MATCH_EN4[i]=0
                print i,OCC.JULD[i].values,bc.JULD.values,abs(bc.JULD.values-OCC.JULD[i].values)
        else:
            fds.MATCH_EN4[i]=0
            print i,str(OCC.STATION_IDENTIFIER[i].values)+' not found'
            
    print "write nc file..."  
    fds.to_netcdf('/export/home1/DATA/PIRATE/COLOC_EN4/'+year+'/OCCITENS-EDW-'+year+month+'.nc')            
    EN4.close()
    OCC.close()

In [None]:
for yy in ['2014']:
    for mm in ['05','06']:
        monthly_loop(yy,mm)

----------------
2014-05
open files...
correct platform identifiers...
init final dataset...
search en4 matches...
write nc file...
----------------
2014-06
open files...
correct platform identifiers...
init final dataset...
search en4 matches...
