### Import required libraries 

In [2]:
from IPython.core.interactiveshell import InteractiveShell  #---- Output all jupyter lab inputs instead of the last one
from IPython.display import Markdown, display
InteractiveShell.ast_node_interactivity = "all"             
import glob #---------------------------------------------------- To read the files or folders in a system directory
from netCDF4 import Dataset #------------------------------------ To read nc , nc4 and hdf4 files
import numpy as np
import datetime
import h5py #---------------------------------------------------- To read hdf5 files
from scipy import spatial #-------------------------------------- To extract the values and indices of k nearest neighbors
import pandas as pd
from ast import literal_eval #----------------------------------- For literal evaluation of a string to extract python objects
from pyproj import Proj, transform #----------------------------- To interconvert different projections
import warnings #------------------------------------------------ To suppress warnings
from photutils.utils import ShepardIDWInterpolator as idw #------ To use Shepard's Inverse Distance Weighing Interpolation tool
import re #------------------------------------------------------ To replace characters in a string
warnings.simplefilter('ignore')
def printmd(string):
    display(Markdown(string))



### Define Non-Iterative Functions and Variables

in_proj  = Proj('+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
out_proj = Proj(init='epsg:4326')


def Times(x): #------------------------------------------Extract Time from sounding ID. NOTE the time format is HH:MM:SSSS
    y   = str(x)
    yy  = y[8:]
    yyy = '{}:{}:{}'.format(yy[:2], yy[2:4], yy[4:])
    return yyy

hours = [0, 3000000, 6000000, 9000000, 12000000, 15000000, 18000000, 21000000, 23595900]
def f(x): #---------------------------------------------Extract the hour interval of sif time (which  is in seconds)
    for i in range(len(hours)):
        if (x>hours[i]) and (x<hours[i+1]):
            lb = hours[i]
            ub = hours[i+1]
            return lb,ub
            break

def format_time(t): #----------------------------------- Format the time into HH:MM:SSSS for the raw format HH:MM:SSSSSSSS
    s = t
    return s[:-4]

def nn(latitude_list,longitude_list,target): #---------- Find the index of nearest neighbor (NOTE: absolute difference)
    target_lat, target_lon = target[1], target[0]
    d = [abs(latitude-target_lat) + abs(longitude-target_lon) for latitude,longitude in zip(latitude_list,longitude_list)]
    return np.argmin(d)

def tile_finder(Lat,Lon): #----------------------------- Find modis tile numbers in which the argument lat,lon lies
    in_tile = False
    i = 0
    while(not in_tile):
        in_tile = Lat >= data[i, 4] and Lat <= data[i, 5] and Lon >= data[i, 2] and Lon <= data[i, 3]
        i += 1
    V = str(int(data[i-1, 0])).zfill(2)
    H = str(int(data[i-1, 1])).zfill(2)
    return H,V

def extract_pixel_coordinates(ULx,Uly,LRx,LRy,shape):
    x        = np.linspace(ULx, LRx, shape[0], endpoint=False) + abs((ULx-LRx)/(2*shape[0]))
    y        = np.linspace(ULy, LRy, shape[0], endpoint=False) - abs((ULy-LRy)/(2*shape[0]))
    xx, yy   = np.meshgrid(x,y)
    xs       = xx.flatten()
    ys       = yy.flatten()
    plon, plat = transform(in_proj, out_proj, xs, ys)
    return plon, plat

def temporal_interpolation(time1,val1,time2,val2,timeX):
    df    = pd.DataFrame( [(time1, val1) , (time2, val2)] , columns=['Times','Values'] ) 
    df    = df.set_index('Times')
    df    = pd.Series(df['Values'], index=df.index)
    df.index = pd.to_datetime(df.index)
    inter = df.resample('S').interpolate(method='linear')
    valX  = inter.loc[timeX]
    return valX

sif_file_list     = glob.glob('OCO2_sif/*.nc4')    #--------------------------------------------- List of all OCO2 files
calipso_file_list = glob.glob('OCO2_calipso/*.h5') #--------------------------------------------- List of all OCO2-CALIPSO files
data              = np.genfromtxt('sn_bound_10deg.txt', skip_header = 7, skip_footer = 3) #------ File having tile numbers and IDs

par_folder_list   = glob.glob('MCD18A2/*')
ref_folder_list   = glob.glob('MCD43A4/*')

for sif_file in sif_file_list:
    sif_date        = datetime.datetime.strptime(sif_file.split('_')[3], '%y%m%d').strftime("%Y-%m-%d")
    sif_julian_day  = datetime.datetime.strptime(sif_file.split('_')[3], '%y%m%d').strftime("%j")
    sif             = Dataset(sif_file, mode='r')
    
    calipso_df_list = []
    for num,calipso_file in enumerate(calipso_file_list):
        calipso_date      = datetime.datetime.strptime(calipso_file.split('_')[5], '%y%m%d').strftime("%Y-%m-%d")
        
        if calipso_date  == sif_date:  
            calipso       = h5py.File(calipso_file, mode='r')
            calipso_ID    = calipso['OCO2_sounding_id'                                           ][:]
            calipso_dist  = calipso['matchup_distance_km'                                        ][:]
            calipso_index = calipso['matchup_Xindex'                                             ][:]
            calipso_rows  = [calipso_ID[i][j] for i in range(0,len(calipso_ID))
                             for j in range(0,len(calipso_ID[0]))
                             if calipso_dist[i][j] < 1 ]
            calipso_dfs   = pd.DataFrame(calipso_rows,columns = ['sounding_id']) 
            calipso_df_list.append(calipso_dfs)
            
    calipso_df                     = pd.concat(calipso_df_list, ignore_index = True).drop_duplicates()
    
    cloud_albedo                   = sif.groups['Cloud'].variables['albedo'                 ][:]
    cloud_flag                     = sif.groups['Cloud'].variables['cloud_flag'             ][:]
    cloud_co2_ratio                = sif.groups['Cloud'].variables['co2_ratio'              ][:]
    cloud_delta_surface_pressure   = sif.groups['Cloud'].variables['delta_surface_pressure' ][:]
    cloud_o2_ratio                 = sif.groups['Cloud'].variables['o2_ratio'               ][:]
    vapor_pressure_deficit         = sif.groups['Meteo'].variables['vapor_pressure_deficit' ][:]
    temperature_2m                 = sif.groups['Meteo'].variables['2m_temperature'         ][:]
    temperature_skin               = sif.groups['Meteo'].variables['skin_temperature'       ][:]
    specific_humidity              = sif.groups['Meteo'].variables['specific_humidity'      ][:]
    surface_pressure               = sif.groups['Meteo'].variables['surface_pressure'       ][:]
    wind_speed                     = sif.groups['Meteo'].variables['wind_speed'             ][:]
    continuum_radiance_757nm       = sif.variables['continuum_radiance_757nm'][:]
    continuum_radiance_771nm       = sif.variables['continuum_radiance_771nm'][:]
    daily_correction_factor        = sif.variables['daily_correction_factor' ][:]
    footprint                      = sif.variables['footprint'               ][:]
    IGBP_index                     = sif.variables['IGBP_index'][:]
    latitude                       = sif.variables['latitude'][:]
    longitude                      = sif.variables['longitude'][:]
    measurement_mode               = sif.variables['measurement_mode'][:]
    orbit_number                   = sif.variables['orbit_number'][:]
    reduced_chi2_757nm             = sif.variables['reduced_chi2_757nm'][:]
    reduced_chi2_771nm             = sif.variables['reduced_chi2_771nm'][:]
    sensor_azimuth_angle           = sif.variables['sensor_azimuth_angle'     ][:]
    sensor_zenith_angle            = sif.variables['sensor_zenith_angle'           ][:]
    rSIF_757nm                     = sif.variables['SIF_757nm'                     ][:]
    SIF_757nm_relative             = sif.variables['SIF_757nm_relative'            ][:]
    SIF_757nm_uncert               = sif.variables['SIF_757nm_uncert'              ][:]
    SIF_771nm                      = sif.variables['SIF_771nm'                     ][:]
    SIF_771nm_relative             = sif.variables['SIF_771nm_relative'            ][:]
    SIF_771nm_uncert               = sif.variables['SIF_771nm_uncert'              ][:]
    solar_azimuth_angle            = sif.variables['solar_azimuth_angle'           ][:]
    solar_zenith_angle             = sif.variables['solar_zenith_angle'            ][:]
    sounding_id                    = sif.variables['sounding_id'                   ][:]
    surface_altitude               = sif.variables['surface_altitude'              ][:]
    time                           = sif.variables['time'                          ][:]
    uncorrected_SIF_757nm          = sif.variables['uncorrected_SIF_757nm'         ][:]
    uncorrected_SIF_757nm_relative = sif.variables['uncorrected_SIF_757nm_relative'][:]
    uncorrected_SIF_771nm          = sif.variables['uncorrected_SIF_771nm'         ][:]
    uncorrected_SIF_771nm_relative = sif.variables['uncorrected_SIF_771nm_relative'][:]
    
    
    SIF_757nm                      = rSIF_757nm[rSIF_757nm>0]
    cloud_albedo                   = cloud_albedo[rSIF_757nm>0]
    cloud_flag                     = cloud_flag[rSIF_757nm>0]
    cloud_co2_ratio                = cloud_co2_ratio[rSIF_757nm>0]
    cloud_delta_surface_pressure   = cloud_delta_surface_pressure[rSIF_757nm>0]
    cloud_o2_ratio                 = cloud_o2_ratio[rSIF_757nm>0]
    vapor_pressure_deficit         = vapor_pressure_deficit[rSIF_757nm>0]
    temperature_2m                 = temperature_2m[rSIF_757nm>0]
    temperature_skin               = temperature_skin[rSIF_757nm>0]
    specific_humidity              = specific_humidity[rSIF_757nm>0]
    surface_pressure               = surface_pressure[rSIF_757nm>0]
    wind_speed                     = wind_speed[rSIF_757nm>0]
    continuum_radiance_757nm       = continuum_radiance_757nm[rSIF_757nm>0]
    continuum_radiance_771nm       = continuum_radiance_771nm[rSIF_757nm>0]
    daily_correction_factor        = daily_correction_factor[rSIF_757nm>0]
    footprint                      = footprint[rSIF_757nm>0]
    IGBP_index                     = IGBP_index[rSIF_757nm>0]
    latitude                       = latitude[rSIF_757nm>0]
    longitude                      = longitude[rSIF_757nm>0]
    measurement_mode               = measurement_mode[rSIF_757nm>0]
    orbit_number                   = orbit_number[rSIF_757nm>0]
    reduced_chi2_757nm             = reduced_chi2_757nm[rSIF_757nm>0]
    reduced_chi2_771nm             = reduced_chi2_771nm[rSIF_757nm>0]
    sensor_azimuth_angle           = sensor_azimuth_angle[rSIF_757nm>0]
    sensor_zenith_angle            = sensor_zenith_angle[rSIF_757nm>0]
    SIF_757nm_relative             = SIF_757nm_relative[rSIF_757nm>0]
    SIF_757nm_uncert               = SIF_757nm_uncert[rSIF_757nm>0]
    SIF_771nm                      = SIF_771nm[rSIF_757nm>0]
    SIF_771nm_relative             = SIF_771nm_relative[rSIF_757nm>0]
    SIF_771nm_uncert               = SIF_771nm_uncert[rSIF_757nm>0]
    solar_azimuth_angle            = solar_azimuth_angle[rSIF_757nm>0]
    solar_zenith_angle             = solar_zenith_angle[rSIF_757nm>0]
    sounding_id                    = sounding_id[rSIF_757nm>0]
    surface_altitude               = surface_altitude[rSIF_757nm>0]
    time                           = time[rSIF_757nm>0]
    uncorrected_SIF_757nm          = uncorrected_SIF_757nm[rSIF_757nm>0]
    uncorrected_SIF_757nm_relative = uncorrected_SIF_757nm_relative[rSIF_757nm>0]
    uncorrected_SIF_771nm          = uncorrected_SIF_771nm[rSIF_757nm>0]
    uncorrected_SIF_771nm_relative = uncorrected_SIF_771nm_relative[rSIF_757nm>0]
      
    sif_rows  = [(SIF_757nm[i], cloud_albedo[i], cloud_flag[i], cloud_co2_ratio[i], cloud_delta_surface_pressure[i], cloud_o2_ratio[i], vapor_pressure_deficit[i],
                  temperature_2m[i], temperature_skin[i], specific_humidity[i], surface_pressure[i], wind_speed[i], continuum_radiance_757nm[i],
                  continuum_radiance_771nm[i],daily_correction_factor[i], footprint[i], IGBP_index[i], latitude[i], longitude[i], measurement_mode[i],
                  orbit_number[i], reduced_chi2_757nm[i],reduced_chi2_771nm[i], sensor_azimuth_angle[i], sensor_zenith_angle[i], SIF_757nm[i], SIF_757nm_relative[i],
                  SIF_757nm_uncert[i], SIF_771nm[i],SIF_771nm_relative[i], SIF_771nm_uncert[i], solar_azimuth_angle[i], solar_zenith_angle[i], sounding_id[i],
                  surface_altitude[i], time[i], uncorrected_SIF_757nm[i], uncorrected_SIF_757nm_relative[i], uncorrected_SIF_771nm[i], uncorrected_SIF_771nm_relative[i])
                  for i in range(0,len(sounding_id))]
    
    column_labels = ['SIF_757nm', 'cloud_albedo', 'cloud_flag', 'cloud_co2_ratio', 'cloud_delta_surface_pressure', 'cloud_o2_ratio', 'vapor_pressure_deficit',
                     'temperature_2m', 'temperature_skin', 'specific_humidity', 'surface_pressure', 'wind_speed', 'continuum_radiance_757nm',
                     'continuum_radiance_771nm','daily_correction_factor', 'footprint', 'IGBP_index', 'latitude', 'longitude', 'measurement_mode',
                     'orbit_number', 'reduced_chi2_757nm','reduced_chi2_771nm', 'sensor_azimuth_angle', 'sensor_zenith_angle', 'SIF_757nm',
                     'SIF_757nm_relative', 'SIF_757nm_uncert', 'SIF_771nm','SIF_771nm_relative', 'SIF_771nm_uncert', 'solar_azimuth_angle', 'solar_zenith_angle',
                     'sounding_id', 'surface_altitude', 'time','uncorrected_SIF_757nm', 'uncorrected_SIF_757nm_relative', 'uncorrected_SIF_771nm',
                     'uncorrected_SIF_771nm_relative']
     
    sif_df                         = pd.DataFrame(sif_rows,columns = column_labels)
    calipso_sif_merger             = pd.merge(sif_df, calipso_df, on = ['sounding_id'], how = 'left')
    calipso_sif_merger['Date']     = calipso_sif_merger['sounding_id'].map(lambda x: '-'.join([str(x)[:4],str(x)[4:6],str(x)[6:]])[:10])    
    calipso_sif_merger['SIF_Time'] = calipso_sif_merger['sounding_id'].map(lambda x: Times(x))
    calipso_sif_merger['tile_h'  ] = calipso_sif_merger.apply(lambda x: tile_finder(x['latitude'], x['longitude'])[0], axis=1)
    calipso_sif_merger['tile_v'  ] = calipso_sif_merger.apply(lambda x: tile_finder(x['latitude'], x['longitude'])[1], axis=1)
    calipso_sif_merger             = calipso_sif_merger.dropna(how='any')
    calipso_sif_merger.to_csv('df_sif_{}.csv'.format(sif_date), index=False)
    
    grp         = calipso_sif_merger.groupby(['tile_h', 'tile_v']).agg(lambda x: list(x))
    grp         = grp.reset_index()
    l_ungrouped = len(calipso_sif_merger)
    l_grouped   = len(grp)
    df          = grp.copy()
    
    print('------------------------------------------------------------------------------------------------------------------------------------------------')
    printmd('**For {}, there are {} sif footprints scattered over {} tiles.**'.format(sif_date, l_ungrouped, l_grouped))
    print('------------------------------------------------------------------------------------------------------------------------------------------------')  
    
    print('PAR Processing Started.\n')
    shape              = (240,240)
    Each_Par_Tile_Data = []
    
    for index,h_sif,v_sif,sif_lon,sif_lat,sif_time in zip(df.index,df['tile_h'],df['tile_v'],df['longitude'],df['latitude'],df['SIF_Time']):
        print('h{}v{}'.format(h_sif,v_sif), flush = True, sep=',', end=' ')
        
        for folder_number in range(len(par_folder_list)):
            par_julian_day    = par_folder_list[folder_number].split('/')[1]
            
            if sif_julian_day == par_julian_day:
                par_file_list = glob.glob(par_folder_list[folder_number]+'/*.hdf')
            
                for num2,par_file in enumerate(par_file_list):
                    h_par = par_file.split('.')[2][1:3]
                    v_par = par_file.split('.')[2][4:6]
                    
                    if (h_par==h_sif) and (v_par==v_sif):
                        par       = Dataset(par_file, mode='r')
                        par_date  = datetime.datetime.strptime(par_file.split('.')[1][1:], '%Y%j').strftime("%Y-%m-%d")
                        gmt_0000  = par.variables['GMT_0000_PAR'][:].flatten()
                        gmt_0300  = par.variables['GMT_0300_PAR'][:].flatten()
                        gmt_0600  = par.variables['GMT_0600_PAR'][:].flatten()
                        gmt_0900  = par.variables['GMT_0900_PAR'][:].flatten()
                        gmt_1200  = par.variables['GMT_1200_PAR'][:].flatten()
                        gmt_1500  = par.variables['GMT_1500_PAR'][:].flatten()
                        gmt_1800  = par.variables['GMT_1800_PAR'][:].flatten()
                        gmt_2100  = par.variables['GMT_2100_PAR'][:].flatten()
                        struct    = getattr(par, 'StructMetadata.0')
                        struct1   = struct[struct.find('UpperLeftPointMtrs'): struct.find('LowerRightMtrs')][19:-3]
                        struct2   = struct[struct.find('LowerRightMtrs')    : struct.find('Projection')    ][15:-3]
                        ULx, ULy  = literal_eval(struct1)
                        LRx, LRy  = literal_eval(struct2)
                        par_lon,par_lat = extract_pixel_coordinates(ULx,ULy,LRx,LRy,shape)
                        tree      = spatial.KDTree(   list(  zip(par_lon, par_lat) ))

                        for sub in range(len(sif_time)):
                            target          = (sif_lat[sub] , sif_lon[sub])
                            sif_time_sub    = re.sub(':', '', sif_time[sub])
                            timeX           = pd.to_datetime(sif_time_sub.ljust(8, "0"), format="%H%M%S%f").strftime("%H:%M:%S")
                            lower_bound_key = str('interp_gmt_')+str(f(int(sif_time_sub))[0]).zfill(8)[:4]
                       
                            if f(int(sif_time_sub))[1] == 23595900:
                                upper_bound_key = str('interp_gmt_')+str(f(int(sif_time_sub))[1]).zfill(8)[:6]
                            else:
                                upper_bound_key = str('interp_gmt_')+str(f(int(sif_time_sub))[1]).zfill(8)[:4]
                        
                            neigh5          = tree.query([(sif_lon[sub], sif_lat[sub])], k=3)[1][0]
                            lon_for_idw     = [par_lon[i] for i in neigh5]
                            lat_for_idw     = [par_lat[i] for i in neigh5]
                            coors_for_idw   = [(i,j) for i,j in zip(lat_for_idw,lon_for_idw)]

                            gmt_0000n       = [gmt_0000[i] for i in neigh5]
                            gmt_0300n       = [gmt_0300[i] for i in neigh5]
                            gmt_0600n       = [gmt_0600[i] for i in neigh5]
                            gmt_0900n       = [gmt_0900[i] for i in neigh5]
                            gmt_1200n       = [gmt_1200[i] for i in neigh5]
                            gmt_1500n       = [gmt_1500[i] for i in neigh5]
                            gmt_1800n       = [gmt_1800[i] for i in neigh5]
                            gmt_2100n       = [gmt_2100[i] for i in neigh5]

                            func_gmt_0000   = idw(coors_for_idw, gmt_0000n)
                            func_gmt_0300   = idw(coors_for_idw, gmt_0300n)
                            func_gmt_0600   = idw(coors_for_idw, gmt_0600n)
                            func_gmt_0900   = idw(coors_for_idw, gmt_0900n)
                            func_gmt_1200   = idw(coors_for_idw, gmt_1200n)
                            func_gmt_1500   = idw(coors_for_idw, gmt_1500n)
                            func_gmt_1800   = idw(coors_for_idw, gmt_1800n)
                            func_gmt_2100   = idw(coors_for_idw, gmt_2100n)

                            interp_gmt_0000 = func_gmt_0000(target)
                            interp_gmt_0300 = func_gmt_0300(target)
                            interp_gmt_0600 = func_gmt_0600(target)
                            interp_gmt_0900 = func_gmt_0900(target)
                            interp_gmt_1200 = func_gmt_1200(target)
                            interp_gmt_1500 = func_gmt_1500(target)
                            interp_gmt_1800 = func_gmt_1800(target)
                            interp_gmt_2100 = func_gmt_2100(target)

                            interp_gmt_235959 = 0
                            mydict = {'interp_gmt_0000':interp_gmt_0000, 'interp_gmt_0300':interp_gmt_0300, 'interp_gmt_0600'  :interp_gmt_0600,
                                      'interp_gmt_0900':interp_gmt_0900, 'interp_gmt_1200':interp_gmt_1200, 'interp_gmt_1500'  :interp_gmt_1500,
                                      'interp_gmt_1800':interp_gmt_1800, 'interp_gmt_2100':interp_gmt_2100, 'interp_gmt_235959':interp_gmt_235959}
                        
                        
                        
                            lower_bound_value = mydict[lower_bound_key]
                            upper_bound_value = mydict[upper_bound_key]

                            if upper_bound_value == interp_gmt_235959:
                                par_file_list = glob.glob(par_folder_list[folder_number+1]+'/*.hdf')

                                for par_file in par_file_list:
                                    h_par = par_file.split('.')[2][1:3]
                                    v_par = par_file.split('.')[2][4:6]

                                    if (h_par == h_sif) and (v_par == v_sif):
                                        par = Dataset(par_file, mode='r')
                                        gmt_235959        = par.variables['GMT_0000_PAR'][:].flatten()
                                        lon_for_idw       = [par_lon[i] for i in neigh5]
                                        lat_for_idw       = [par_lat[i] for i in neigh5]
                                        coors_for_idw     = [(i,j) for i,j in zip(lat_for_idw,lon_for_idw)]
                                        gmt_235959        = [gmt_235959[i] for i in neigh5]
                                        func_gmt_235959   = idw(coors_for_idw, gmt_235959)
                                        interp_gmt_235959 = func_gmt_235959(target)
                                        break
                                upper_bound_value = interp_gmt_235959

                            time1a = pd.to_datetime(lower_bound_key[11:].ljust(8, "0"), format="%H%M%S%f").strftime("%H:%M:%S")
                            time2a = pd.to_datetime(upper_bound_key[11:].ljust(8, "0"), format="%H%M%S%f").strftime("%H:%M:%S")
                            val1   = lower_bound_value
                            val2   = upper_bound_value
                            valX   = temporal_interpolation(time1a,val1,time2a,val2,timeX)
                            Each_Par_Tile_Data.append((sif_lat[sub],sif_lon[sub],valX))
    
    DF = pd.DataFrame(np.array(Each_Par_Tile_Data), columns=['latitude','longitude', 'par'])
    DF.to_csv('df_par_{}.csv'.format(sif_date),index=False)
    print('\n')
    
    
    print('REF Processing Started.\n')
    shape2              = (2400,2400)
    Each_Ref_Tile_Data  = []
    for index,h_sif,v_sif,sif_lon,sif_lat,sif_time in zip(df.index,df['tile_h'],df['tile_v'],df['longitude'],df['latitude'],df['SIF_Time']):
        print('h{}v{}'.format(h_sif,v_sif), flush = True, sep=',', end=' ')
        
        for folder_number in range(len(ref_folder_list)):
            ref_julian_day    = ref_folder_list[folder_number].split('/')[1]
            
            if sif_julian_day == ref_julian_day:
                ref_file_list = glob.glob(ref_folder_list[folder_number]+'/*.hdf')
            
                for num3,ref_file in enumerate(ref_file_list):
                    h_ref = ref_file.split('.')[2][1:3]
                    v_ref = ref_file.split('.')[2][4:6]
                    
                    if (h_ref==h_sif) and (v_ref==v_sif):
                        ref_date   = datetime.datetime.strptime(ref_file.split('.')[1][1:], '%Y%j').strftime("%Y-%m-%d")
                        ref        = Dataset(ref_file, mode='r')
                        nrb1       = ref.variables['Nadir_Reflectance_Band1'][:].flatten()
                        nrb2       = ref.variables['Nadir_Reflectance_Band2'][:].flatten()
                        struct     = getattr(ref, 'StructMetadata.0')
                        struct1    = struct[struct.find('UpperLeftPointMtrs'): struct.find('LowerRightMtrs')][19:-3]
                        struct2    = struct[struct.find('LowerRightMtrs')    : struct.find('Projection')    ][15:-3]
                        ULx, ULy   = literal_eval(struct1)
                        LRx, LRy   = literal_eval(struct2)
                        ref_lon,ref_lat  = extract_pixel_coordinates(ULx,ULy,LRx,LRy,shape2)
                        tree       = spatial.KDTree( list(  zip(ref_lon, ref_lat) ))
                        
                        for sub in range(len(sif_time)):
                            target     = (sif_lat[sub] , sif_lon[sub])
                            ind        = tree.query([(sif_lon[sub],sif_lat[sub])], k=1)[1][0]
                            val1, val2 = nrb1[ind], nrb2[ind] 
                            Each_Ref_Tile_Data.append((sif_lat[sub],sif_lon[sub],nrb1[ind],nrb2[ind]))
                
                        
    DG = pd.DataFrame(np.array(Each_Ref_Tile_Data),columns=['latitude','longitude','nrb1','nrb2'])
    DG.to_csv('df_ref_{}.csv'.format(sif_date),index=False)

------------------------------------------------------------------------------------------------------------------------------------------------


**For 2018-05-01, there are 76530 sif footprints scattered over 103 tiles.**

------------------------------------------------------------------------------------------------------------------------------------------------
PAR Processing Started.

h02v07 h03v06 h07v06 h08v03 h08v05 h09v04 h10v02 h10v03 h10v05 h10v06 h10v08 h10v09 h10v10 h11v02 h11v03 h11v04 h11v11 h12v01 h12v02 h12v03 h12v04 h12v08 h12v09 h12v12 h12v13 h13v01 h13v02 h13v03 h13v04 h13v10 h13v11 h13v13 h14v01 h14v02 h15v01 h16v01 h16v02 h17v00 h17v01 h17v06 h17v07 h17v08 h18v00 h18v01 h18v02 h18v03 h18v04 h19v01 h19v02 h19v03 h19v05 h19v06 h19v07 h20v01 h20v02 h20v03 h20v04 h20v05 h20v08 h20v09 h20v10 h20v11 h20v12 h21v02 h21v03 h21v04 h21v06 h21v07 h22v02 h22v04 h22v05 h22v07 h22v08 h22v10 h23v02 h23v03 h23v04 h23v05 h23v06 h24v02 h24v03 h24v04 h24v05 h25v03 h25v04 h25v06 h26v04 h26v05 h27v06 h27v12 h28v05 h28v06 h28v11 h29v07 h29v08 h29v10 h29v11 h29v12 h30v09 h30v11 h31v11 h32v09 h34v08 

REF Processing Started.

h02v07 h03v06 h07v06 h08v03 h08v05 h09v04 h10v02 h10v03 h10v05 h10v06 h10v08 h10v0

**For 2018-05-02, there are 83236 sif footprints scattered over 108 tiles.**

------------------------------------------------------------------------------------------------------------------------------------------------
PAR Processing Started.

h07v05 h08v03 h08v04 h08v07 h09v02 h09v03 h09v05 h09v06 h09v07 h10v02 h10v04 h11v02 h11v03 h11v07 h11v08 h11v09 h12v02 h12v03 h12v04 h12v09 h12v10 h12v11 h13v02 h13v03 h14v01 h14v02 h14v09 h14v10 h15v01 h16v01 h16v02 h16v06 h16v07 h17v00 h17v01 h17v02 h17v03 h17v04 h18v00 h18v01 h18v02 h18v03 h18v04 h18v05 h18v06 h18v07 h18v08 h19v01 h19v02 h19v03 h19v04 h19v08 h19v09 h19v10 h19v11 h20v01 h20v02 h20v03 h20v04 h20v05 h20v06 h21v02 h21v03 h21v04 h21v05 h21v07 h21v08 h21v09 h21v10 h22v02 h22v03 h22v04 h22v05 h22v06 h23v02 h23v03 h23v05 h23v07 h24v03 h24v04 h24v05 h24v06 h25v03 h25v04 h25v05 h25v06 h25v07 h26v03 h26v04 h26v06 h26v12 h27v05 h27v06 h27v07 h27v11 h28v07 h28v08 h28v09 h28v11 h28v12 h28v13 h29v06 h29v12 h30v11 h31v08 h31v09 h31v10 h33v09 

REF Processing Started.

h07v05 h08v03 h08v04 h08v07 h09v02 h09v03 h09v0

**For 2018-05-03, there are 83471 sif footprints scattered over 105 tiles.**

------------------------------------------------------------------------------------------------------------------------------------------------
PAR Processing Started.

h03v10 h07v07 h08v05 h08v06 h08v07 h09v04 h10v02 h10v03 h10v05 h10v06 h10v08 h10v09 h10v10 h11v02 h11v03 h11v04 h11v10 h11v11 h12v01 h12v02 h12v03 h12v12 h13v01 h13v02 h13v03 h13v04 h13v09 h13v10 h13v11 h13v13 h14v01 h14v02 h14v03 h15v01 h15v02 h16v00 h16v01 h16v02 h17v00 h17v01 h17v04 h17v05 h17v06 h17v07 h17v08 h18v00 h18v01 h18v02 h18v03 h18v04 h19v01 h19v02 h19v03 h19v05 h19v06 h19v07 h20v01 h20v02 h20v03 h20v04 h20v07 h20v08 h20v09 h20v10 h20v11 h20v12 h21v02 h21v04 h21v05 h21v06 h22v02 h22v03 h22v04 h22v05 h22v06 h22v07 h22v08 h22v11 h23v02 h23v03 h23v04 h23v05 h23v06 h24v02 h24v03 h24v05 h25v03 h25v04 h25v05 h25v08 h26v04 h26v05 h26v06 h27v07 h27v08 h27v09 h27v12 h28v06 h29v07 h29v10 h29v11 h30v08 h30v09 h31v07 h32v09 

REF Processing Started.

h03v10 h07v07 h08v05 h08v06 h08v07 h09v04 h10v02 h10v03 h10v05 h10v0

In [None]:
### Merge SIF, PAR and Reflectance together and write data into NETCDF4 format

sif_list = glob.glob('Processed_Sif/*.csv')
par_list = glob.glob('Processed_Par/*.csv')
ref_list = glob.glob('Processed_Ref/*.csv')

for i_sif, i_par, i_ref in zip(sif_list, par_list, ref_list):
    di = pd.read_csv(i_sif)
    di = di.rename({'Unnamed: 0':'index'}, axis=1)
    di = di.set_index(['index'])

    dj       = pd.read_csv(i_par)
    dj       = dj.set_index(['index'])
    dj       = dj.sort_index()
    dj.index = dj.index.map(int)
    
    dk = pd.read_csv(i_ref)
    dk = dk.set_index(['index'])
    dk = dk.sort_index()
    dk.index = dk.index.map(int)
    
    dM1      = di .merge(dj, left_index=True, right_index=True, how='outer')
    dM2      = dM1.merge(dk, left_index=True, right_index=True, how='outer')

    df            = dM2.copy()
    L             = len(df)
    file          = Dataset('RESULT_FILES/sif_par_ref_{}.nc4'.format(i_sif[12:-4]), 'w', format='NETCDF4')
    group_sif     = file.createGroup("Group_SIF"    )
    group_par     = file.createGroup("Group_PAR"    )
    group_ref     = file.createGroup("Group_REF"    )



    vSIF_757nm                          = file.createDimension('SIF_757nm', L)
    vcloud_albedo                       = file.createDimension('cloud_albedo', L)
    vcloud_flag                         = file.createDimension('cloud_flag', L)
    vcloud_co2_ratio                    = file.createDimension('cloud_co2_ratio', L)
    vcloud_delta_surface_pressure       = file.createDimension('cloud_delta_surface_pressure', L)
    vcloud_o2_ratio                     = file.createDimension('cloud_o2_ratio', L)
    vvapor_pressure_deficit             = file.createDimension('vapor_pressure_deficit', L)
    vtemperature_2m                     = file.createDimension('temperature_2m', L)
    vtemperature_skin                   = file.createDimension('temperature_skin', L)
    vspecific_humidity                  = file.createDimension('specific_humidity', L)
    vsurface_pressure                   = file.createDimension('surface_pressure',L)
    vwind_speed                         = file.createDimension('wind_speed',L)
    vcontinuum_radiance_757nm           = file.createDimension('continuum_radiance_757nm',L)
    vcontinuum_radiance_771nm           = file.createDimension('continuum_radiance_771nm',L)
    vdaily_correction_factor            = file.createDimension('daily_correction_factor',L)
    vfootprint                          = file.createDimension('footprint',L)
    vIGBP_index                         = file.createDimension('IGBP_index',L)
    vlatitude                           = file.createDimension('latitude',L)
    vlongitude                          = file.createDimension('longitude',L)
    vmeasurement_mode                   = file.createDimension('measurement_mode',L)
    vorbit_number                       = file.createDimension('orbit_number',L)
    vreduced_chi2_757nm                 = file.createDimension('reduced_chi2_757nm',L)
    vreduced_chi2_771nm                 = file.createDimension('reduced_chi2_771nm',L)
    vsensor_azimuth_angle               = file.createDimension('sensor_azimuth_angle',L)
    vsensor_zenith_angle                = file.createDimension('sensor_zenith_angle',L)
    vSIF_757nm_relative                 = file.createDimension('SIF_757nm_relative',L)
    vSIF_757nm_uncert                   = file.createDimension('SIF_757nm_uncert',L)
    vSIF_771nm                          = file.createDimension('SIF_771nm',L)
    vSIF_771nm_relative                 = file.createDimension('SIF_771nm_relative',L)
    vSIF_771nm_uncert                   = file.createDimension('SIF_771nm_uncert',L)
    vsolar_azimuth_angle                = file.createDimension('solar_azimuth_angle',L)
    vsolar_zenith_angle                 = file.createDimension('solar_zenith_angle',L)
    vsounding_id                        = file.createDimension('sounding_id',L)
    vsurface_altitude                   = file.createDimension('surface_altitude',L)
    vtime                               = file.createDimension('time',L)
    vuncorrected_SIF_757nm              = file.createDimension('uncorrected_SIF_757nm',L)
    vuncorrected_SIF_757nm_relative     = file.createDimension('uncorrected_SIF_757nm_relative',L)
    vuncorrected_SIF_771nm              = file.createDimension('uncorrected_SIF_771nm',L)
    vuncorrected_SIF_771nm_relative     = file.createDimension('uncorrected_SIF_771nm_relative',L)
    vPAR                                = file.createDimension('PAR' ,L)
    vNRB1                               = file.createDimension('NRB1',L)
    vNRB2                               = file.createDimension('NRB2',L)



    vvSIF_757nm                         = file.createVariable('/Group_SIF/SIF_757nm',np.float64, ('SIF_757nm',))
    vvcloud_albedo                      = file.createVariable('/Group_SIF/cloud_albedo', np.float64, ('cloud_albedo',))
    vvcloud_flag                        = file.createVariable('/Group_SIF/cloud_flag', np.float64, ('cloud_flag',))
    vvcloud_co2_ratio                   = file.createVariable('/Group_SIF/cloud_co2_ratio', np.float64,('cloud_co2_ratio',))
    vvcloud_delta_surface_pressure      = file.createVariable('/Group_SIF/cloud_delta_surface_pressure', np.float64,('cloud_delta_surface_pressure',))
    vvcloud_o2_ratio                    = file.createVariable('/Group_SIF/cloud_o2_ratio', np.float64,('cloud_o2_ratio',))
    vvvapor_pressure_deficit            = file.createVariable('/Group_SIF/vapor_pressure_deficit', np.float64,('vapor_pressure_deficit',))
    vvtemperature_2m                    = file.createVariable('/Group_SIF/temperature_2m', np.float64,('temperature_2m',))
    vvtemperature_skin                  = file.createVariable('/Group_SIF/temperature_skin', np.float64,('temperature_skin',))
    vvspecific_humidity                 = file.createVariable('/Group_SIF/specific_humidity', np.float64,('specific_humidity',))
    vvsurface_pressure                  = file.createVariable('/Group_SIF/surface_pressure',np.float64,('surface_pressure',))
    vvwind_speed                        = file.createVariable('/Group_SIF/wind_speed',np.float64,('wind_speed',))
    vvcontinuum_radiance_757nm          = file.createVariable('/Group_SIF/continuum_radiance_757nm',np.float64,('continuum_radiance_757nm',))
    vvcontinuum_radiance_771nm          = file.createVariable('/Group_SIF/continuum_radiance_771nm',np.float64,('continuum_radiance_771nm',))
    vvdaily_correction_factor           = file.createVariable('/Group_SIF/daily_correction_factor',np.float64,('daily_correction_factor',))
    vvfootprint                         = file.createVariable('footprint',np.float64,('footprint',))
    vvIGBP_index                        = file.createVariable('/Group_SIF/IGBP_index',np.float64,('IGBP_index',))
    vvlatitude                          = file.createVariable('latitude',np.float64,('latitude',))
    vvlongitude                         = file.createVariable('longitude',np.float64,('longitude',))
    vvmeasurement_mode                  = file.createVariable('/Group_SIF/measurement_mode',np.float64,('measurement_mode',))
    vvorbit_number                      = file.createVariable('/Group_SIF/orbit_number',np.float64,('orbit_number',))
    vvreduced_chi2_757nm                = file.createVariable('/Group_SIF/reduced_chi2_757nm',np.float64,('reduced_chi2_757nm',))
    vvreduced_chi2_771nm                = file.createVariable('/Group_SIF/reduced_chi2_771nm',np.float64,('reduced_chi2_771nm',))
    vvsensor_azimuth_angle              = file.createVariable('/Group_SIF/sensor_azimuth_angle',np.float64,('sensor_azimuth_angle',))
    vvsensor_zenith_angle               = file.createVariable('/Group_SIF/sensor_zenith_angle',np.float64,('sensor_zenith_angle',))
    vvSIF_757nm_relative                = file.createVariable('/Group_SIF/SIF_757nm_relative',np.float64,('SIF_757nm_relative',))
    vvSIF_757nm_uncert                  = file.createVariable('/Group_SIF/SIF_757nm_uncert',np.float64,('SIF_757nm_uncert',))
    vvSIF_771nm                         = file.createVariable('/Group_SIF/SIF_771nm',np.float64,('SIF_771nm',))
    vvSIF_771nm_relative                = file.createVariable('/Group_SIF/SIF_771nm_relative',np.float64,('SIF_771nm_relative',))
    vvSIF_771nm_uncert                  = file.createVariable('/Group_SIF/SIF_771nm_uncert',np.float64,('SIF_771nm_uncert',))
    vvsolar_azimuth_angle               = file.createVariable('/Group_SIF/solar_azimuth_angle',np.float64,('solar_azimuth_angle',))
    vvsolar_zenith_angle                = file.createVariable('/Group_SIF/solar_zenith_angle',np.float64,('solar_zenith_angle',))
    vvsounding_id                       = file.createVariable('/Group_SIF/sounding_id',np.float64,('sounding_id',))
    vvsurface_altitude                  = file.createVariable('/Group_SIF/surface_altitude',np.float64,('surface_altitude',))
    vvtime                              = file.createVariable('time',np.float64,('time',))
    vvuncorrected_SIF_757nm             = file.createVariable('/Group_SIF/uncorrected_SIF_757nm',np.float64,('uncorrected_SIF_757nm',))
    vvuncorrected_SIF_757nm_relative    = file.createVariable('/Group_SIF/uncorrected_SIF_757nm_relative',np.float64,('uncorrected_SIF_757nm_relative',))
    vvuncorrected_SIF_771nm             = file.createVariable('/Group_SIF/uncorrected_SIF_771nm',np.float64,('uncorrected_SIF_771nm',))
    vvuncorrected_SIF_771nm_relative    = file.createVariable('/Group_SIF/uncorrected_SIF_771nm_relative',np.float64,('uncorrected_SIF_771nm_relative',))
    vvPAR                               = file.createVariable('/Group_PAR/PAR' ,np.float64,('PAR' ,))
    vvNRB1                              = file.createVariable('/Group_REF/NRB1',np.float64,('NRB1',))
    vvNRB2                              = file.createVariable('/Group_REF/NRB2',np.float64,('NRB2',))


    vvSIF_757nm[:]                      = np.array(df['SIF_757nm'])
    vvcloud_albedo[:]                   = np.array(df['cloud_albedo'])
    vvcloud_flag[:]                     = np.array(df['cloud_flag'])
    vvcloud_co2_ratio[:]                = np.array(df['cloud_co2_ratio'])
    vvcloud_delta_surface_pressure[:]   = np.array(df['cloud_delta_surface_pressure'])
    vvcloud_o2_ratio[:]                 = np.array(df['cloud_o2_ratio'])
    vvvapor_pressure_deficit[:]         = np.array(df['vapor_pressure_deficit'])
    vvtemperature_2m[:]                 = np.array(df['temperature_2m'])
    vvtemperature_skin[:]               = np.array(df['temperature_skin'])
    vvspecific_humidity[:]              = np.array(df['specific_humidity'])
    vvsurface_pressure[:]               = np.array(df['surface_pressure'])
    vvwind_speed[:]                     = np.array(df['wind_speed'])
    vvcontinuum_radiance_757nm[:]       = np.array(df['continuum_radiance_757nm'])
    vvcontinuum_radiance_771nm[:]       = np.array(df['continuum_radiance_771nm'])
    vvdaily_correction_factor[:]        = np.array(df['daily_correction_factor'])
    vvfootprint[:]                      = np.array(df['footprint'])
    vvIGBP_index[:]                     = np.array(df['IGBP_index'])
    vvlatitude[:]                       = np.array(df['latitude'])
    vvlongitude[:]                      = np.array(df['longitude'])
    vvmeasurement_mode[:]               = np.array(df['measurement_mode'])
    vvorbit_number[:]                   = np.array(df['orbit_number'])
    vvreduced_chi2_757nm[:]             = np.array(df['reduced_chi2_757nm'])
    vvreduced_chi2_771nm[:]             = np.array(df['reduced_chi2_771nm'])
    vvsensor_azimuth_angle[:]           = np.array(df['sensor_azimuth_angle'])
    vvsensor_zenith_angle[:]            = np.array(df['sensor_zenith_angle'])
    vvSIF_757nm_relative[:]             = np.array(df['SIF_757nm_relative'])
    vvSIF_757nm_uncert[:]               = np.array(df['SIF_757nm_uncert'])
    vvSIF_771nm[:]                      = np.array(df['SIF_771nm'])
    vvSIF_771nm_relative[:]             = np.array(df['SIF_771nm_relative'])
    vvSIF_771nm_uncert[:]               = np.array(df['SIF_771nm_uncert'])
    vvsolar_azimuth_angle[:]            = np.array(df['solar_azimuth_angle'])
    vvsolar_zenith_angle[:]             = np.array(df['solar_zenith_angle'])
    vvsounding_id[:]                    = np.array(df['sounding_id'])
    vvsurface_altitude[:]               = np.array(df['surface_altitude'])
    vvtime[:]                           = np.array(df['time'])
    vvuncorrected_SIF_757nm[:]          = np.array(df['uncorrected_SIF_757nm'])
    vvuncorrected_SIF_757nm_relative[:] = np.array(df['uncorrected_SIF_757nm_relative'])
    vvuncorrected_SIF_771nm[:]          = np.array(df['uncorrected_SIF_771nm'])
    vvuncorrected_SIF_771nm_relative[:] = np.array(df['uncorrected_SIF_771nm_relative'])
    vvPAR [:]                           = np.array(df['par'])
    vvNRB1[:]                           = np.array(df['nrb1'])
    vvNRB2[:]                           = np.array(df['nrb2'])

    file.close()
    