# This notebook contains the script to retrieve satellite-based data for each site at given coordinates

In [None]:
## Load libraries
from netCDF4 import Dataset as ds
import numpy as np
from e582utils.data_read import download
import datetime
import dateutil as du 
import parser
import pytz
import dateutil.parser 
import pandas as pd

In [None]:
# Add any desired site and it´s coordinates
sites = {'Espíritu_Santo_BCS_MEX':{'lat': 24.428333, 'lon': -110.351111},
         'Loreto_Danzante_BCS_MEX':{'lat': 25.816014, 'lon': -111.262158},
         'Punta_Arenas_Ventana_BCS_MEX':{'lat': 24.043611, 'lon': -109.829167},
         'Marietas_Islands_MEX':{'lat': 20.701389, 'lon': -105.5675},
         'Isla_Gaviota_BCS_MEX':{'lat':24.286992, 'lon': -110.338742}, 
         'El_Portugues_BCS_MEX':{'lat':24.747378, 'lon':-110.678108},
         'Cabo_Pulmo_BCS_MEX':{'lat':23.390555, 'lon':-109.416388},
         'Isla_Coronado_BCS_MEX':{'lat':26.103709,'lon':-111.283726},
         'Loreto_Isla_Carmen_BCS_MEX':{'lat':26.047666,'lon':-111.180986},
         'La_Nevera_Malpelo_COL':{'lat':4.0025, 'lon': -81.611111},
         'Middle_Keys_FL_USA':{'lat':24.82675, 'lon': -80.670567}, 
         'Upper_Keys_FL_USA':{'lat':24.90765, 'lon': -80.800156}, 
         'Lower_Keys_FL_USA':{'lat':25.180864, 'lon': -80.279053},
         'Galapagos_ECU':{'lat':1.6755, 'lon':-91.9924}} 

In [None]:
# Short names should match the "sites" written above 
sites_short = {'IES':'Espíritu_Santo_BCS_MEX',
              'MAR':'Marietas_Islands_MEX',
              'ELP':'El_Portugues_BCS_MEX',
              'GAV':'Isla_Gaviota_BCS_MEX',
              'PAV':'Punta_Arenas_Ventana_BCS_MEX',
              'DAN':'Loreto_Danzante_BCS_MEX',
              'CPU':'Cabo_Pulmo_BCS_MEX',
              'COR':'Isla_Coronado_BCS_MEX',
              'CAR':'Loreto_Isla_Carmen_BCS_MEX',
              'MDK':'Middle_Keys_FL_USA',
              'UPK':'Upper_Keys_FL_USA',
              'MAL':'La_Nevera_Malpelo_COL',
              'LWK':'Lower_Keys_FL_USA',
              'GLP':'Galapagos_ECU'}

In [None]:
for short_name,long_name in sites_short.items():
    sites_short[short_name]={'long_name':long_name}
    sites_short[short_name].update(sites[long_name])
sites_short

# Functions

In [None]:
# This function provides the date of each grid cell: the_date
import dateutil.parser 
def find_date(ncfilename):
    with ds(ncfilename,'r') as ncdat:
        start=getattr(ncdat,'time_coverage_start')
        end=getattr(ncdat,'time_coverage_end') 
        start_dt=du.parser.parse(start)
        end_dt=du.parser.parse(end)
        interval=end_dt - start_dt
        mid_dt=start_dt + interval/2.
        mid_dt = datetime.datetime(mid_dt.year,mid_dt.month,mid_dt.day,tzinfo=pytz.utc)
        return mid_dt

In [None]:
# This function returns the number of day; julian day
def date_info(the_date):
    year,month,day=the_date.year,the_date.month,the_date.day
    start_of_year=datetime.datetime(year-1,12,31,tzinfo=pytz.utc)
    days=(the_date - start_of_year).days
    week=int(the_date.strftime('%W'))
    return dict(days=days,week=week,month=month,year=year)

In [None]:
# This function provides the array of selected variable, lat and lon. In this case the variable is 'par' 
def par_lat_lon(infile): 
    with ds(infile,'r') as ncfile:
        ncfile.set_auto_mask(False)
        par=ncfile.variables['par'][...]
        lat=ncfile.variables['lat'][...]
        lon=ncfile.variables['lon'][...]
        fill_value=ncfile.variables['par']._FillValue
        hit = par == fill_value
        par[hit] = np.nan 
        sat=getattr(ncfile,'platform')
    return par,lat,lon,sat

# Global loop

In [None]:
import glob, os
from os.path import expanduser
from pathlib import Path
home = Path(expanduser("~"))
datapath = home / Path('E:/path...') # Path to the directory with .nc files
all_files=list(datapath.glob('*nc')) # It returns a list of path with all the .nc´s
data_list=[]
for infile in all_files:
    the_date=find_date(infile)# this line should gave this: datetime.datetime(2017, 3, 12, 0, 0, tzinfo=<UTC>)
    par,lat,lon,sat=par_lat_lon(infile) # this would provide the array of "lat-lon" that are going to be used to get the_par values of each site 
    for site_name,site_dict in sites_short.items(): 
        date_dict=date_info(the_date)
        #print(site_name)
        site_lat,site_lon=site_dict['lat'],site_dict['lon']
        lat_idx = np.abs(lat-site_lat).argmin()
        lon_idx = np.abs(lon-site_lon).argmin()
        the_par=par[lat_idx,lon_idx]
        date_dict['par']=the_par
        date_dict['site_name']=site_name
        date_dict['sat']=sat
        #print(site_name,the_par)
        data_list.append(date_dict)

In [None]:
df=pd.DataFrame.from_records(data_list)
df.columns
par_vals=df['par'].values
hit=par_vals < 0
par_vals[hit]=np.nan
df['par']=par_vals

## Create a file "df_PAR.h5" to work on it

In [None]:
# BREAK and store the data (in df) in 'h5' format. 
with pd.HDFStore('df_PAR.h5','w') as outfile:  # Store the dataframe 
    outfile.put('df_PAR',df,format = 'table')