In [None]:
#Parameters

VAR_NAME = 'Tmin_N'

CLEAN_UP_ZIP = False

UNZIP = False

MONTHLY = True

LOCAL = True

in ['Agent', 'Lat', 'Longt', 'Date', 'MSLP', 'PET', 'Rain', 'RH', 'SoilM',
       'ETmp', 'Rad', 'TMax', 'Tmin', 'VP', 'Wind', 'Rain_bc', 'Tmax_N',
       'Tmin_N']

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
from datetime import datetime

In [4]:
from subprocess import call

In [5]:
import pathlib
import shutil

In [6]:
import numpy as np
import pandas as pd

In [7]:
import xarray as xr

In [8]:
xr.__version__

'0.15.0'

In [9]:
import pyresample
from pyresample import geometry

In [10]:
pyresample.__version__

'1.14.0'

In [11]:
HOME = pathlib.Path.home()

In [12]:
dpath_dat = HOME / 'drives' / 'well_groups' / 'CLIMATE' / 'vcsn_data'

In [13]:
dpath_dat

PosixPath('/home/nicolasf/drives/well_groups/CLIMATE/vcsn_data')

In [14]:
PWD = pathlib.Path.cwd()

In [15]:
tmp_path = PWD.parent / 'tmp'

In [16]:
if not LOCAL: 
    lfiles_zip = list(dpath_dat.glob("*.zip"))
    lfiles_zip.sort()
    
    ### selects years 

    lfiles_zip = lfiles_zip[1979 - 1960:2019 - 1960 + 1]

    if UNZIP: 
        if not tmp_path.exists(): 
            tmp_path.mkdir(parents=True)
            for fname in lfiles_zip: 
                cm = f"unzip {str(fname)} -d {str(tmp_path)}"
                call(cm, shell=True)

### list files 

In [17]:
dat_files = list(tmp_path.glob("*.dat"))

In [18]:
dat_files.sort()

In [19]:
dat_files[:10]

[PosixPath('/home/nicolasf/operational/VCSN/tmp/19790101_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790102_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790103_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790104_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790105_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790106_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790107_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790108_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790109_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/19790110_vcsn.dat')]

In [20]:
dat_files[-10:]

[PosixPath('/home/nicolasf/operational/VCSN/tmp/20191222_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191223_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191224_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191225_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191226_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191227_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191228_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191229_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191230_vcsn.dat'),
 PosixPath('/home/nicolasf/operational/VCSN/tmp/20191231_vcsn.dat')]

In [21]:
dat_files.__len__()

14975

In [22]:
len(dat_files)

14975

In [23]:
dat_files[0]

PosixPath('/home/nicolasf/operational/VCSN/tmp/19790101_vcsn.dat')

In [24]:
dat_files[-1]

PosixPath('/home/nicolasf/operational/VCSN/tmp/20191231_vcsn.dat')

### get the agents and define the swaths and grid, all taken from the LAST DAT file 

In [25]:
data = pd.read_csv(dat_files[-1], sep=',', na_values=['######','####','###']) 

lon = data.loc[:,'Longt'].values
lat = data.loc[:,'Lat'].values

lon4grid = np.sort(np.unique(lon))
lat4grid = np.sort(np.unique(lat))

xs, ys = np.meshgrid(lon4grid, lat4grid)

swath_def_out = geometry.GridDefinition(lons=xs, lats=ys)

swath_def_in = geometry.SwathDefinition(lons=lon, lats=lat)

agents_in = data.loc[:,'Agent'].values.astype(np.float32)
        
agents = pyresample.kd_tree.resample_nearest(swath_def_in, agents_in, swath_def_out, radius_of_influence=1, fill_value=np.nan)

### Now process the data itself 

In [None]:
ld = []

for i, fname in enumerate(dat_files):
    
    data = pd.read_csv(fname, sep=',', na_values=['######','####','###']) 
    
    date = data.loc[:,'Date']
    date = date.unique()[0]
    date = datetime.strptime(date, "%d/%m/%Y")
        
    data_in = data.loc[:,VAR_NAME].values
    
    result = pyresample.kd_tree.resample_nearest(swath_def_in, data_in, swath_def_out, radius_of_influence=10, \
                                             fill_value=np.nan)
    
    d = {}
    d['time'] = (('time'), np.array(date).reshape(-1))
    d['lat'] = (('lat'), lat4grid)
    d['lon'] = (('lon'), lon4grid)
    d[VAR_NAME] = (('time','lat','lon'), result[np.newaxis,...])
    
    dset = xr.Dataset(d)
    
#     print(f"regridded VCSN data for variable {VAR_NAME}, date {date:%Y-%m-%d}")
    
    ld.append(dset)

### concatenates along the time dimension 

In [None]:
dset = xr.concat(ld, dim='time')

In [None]:
dset

In [None]:
if VAR_NAME == 'TMax': 
    dset = dset.rename({'TMax':'Tmax'})
    VAR_NAME = 'Tmax'

In [None]:
dset

In [None]:
nc_path = PWD.parent / 'data' / 'NC'

In [None]:
if not nc_path.exists(): 
    nc_path.mkdir(parents=True)

In [None]:
date_start = dset.time[0].data

In [None]:
date_end = dset.time[-1].data

In [None]:
dset

In [None]:
if MONTHLY: 
    
    if VAR_NAME == 'Rain_bc':
        
        dset = dset.resample({'time':'1M'}).sum() # if rainfall: sum 
    
    else: 
        
        dset = dset.resample({'time':'1M'}).mean() # else: mean 
        
    nc_path = PWD.parent / 'data' / 'NC' / 'MONTHLY' / VAR_NAME.upper()

else: 
    
    nc_path = PWD.parent / 'data' / 'NC' / 'DAILY' / VAR_NAME.upper() 

### set the field with the agent numbers

In [None]:
dset['agent'] = (('lat','lon'), agents)

### if the output path does not exist, create it 

In [None]:
if not nc_path.exists(): 
    
    nc_path.mkdir(parents=True)

In [None]:
if MONTHLY: 
    
    dset.to_netcdf(nc_path / f"VCSN_gridded_{VAR_NAME}_{pd.to_datetime(date_start):%Y-%m}_{pd.to_datetime(date_end):%Y-%m}.nc")

else: 
    
    dates = pd.date_range(start=pd.to_datetime(dset.time[0].data), end=pd.to_datetime(dset.time[-1].data), freq='MS')   
    
    for date in dates: 
        
        sub = dset.sel(time=f"{date:%Y-%m}")
        sub.to_netcdf(nc_path / f"VCSN_gridded_daily_{VAR_NAME}_{date:%Y-%m}.nc")
        sub.close()

In [None]:
dset.close()

### clean up if variable is set to True 

In [None]:
tmp_path

In [None]:
if CLEAN_UP_ZIP: 
    shutil.rmtree(tmp_path)