In [None]:
from scipy import stats
from scipy.spatial.distance import cdist
from scipy.spatial import cKDTree as KDTree
import statsmodels.api as sm
import numpy as np
import pandas as pd
import gdal
import os
import matplotlib.pyplot as plt
#%matplotlib inline

In [None]:
def inRaster(file_raster):
    raster = gdal.Open(file_raster, gdal.GA_ReadOnly)
    band = raster.GetRasterBand(1)
    array = band.ReadAsArray()
    extent = raster.GetGeoTransform()
    return raster, array, extent

In [None]:
def saveRaster(path, array, raster, datatype=6, formatraster="GTiff"):
    # Set Driver
    format_ = formatraster #save as format
    driver = gdal.GetDriverByName( format_ )
    driver.Register()
    
    # Set Metadata for Raster output
    cols = raster.RasterXSize
    rows = raster.RasterYSize
    bands = raster.RasterCount
    datatype = 6#band.DataType
    
    # Set Projection for Raster
    outDataset = driver.Create(path, cols, rows, bands, datatype)
    geoTransform = raster.GetGeoTransform()
    outDataset.SetGeoTransform(geoTransform)
    proj = raster.GetProjection()
    outDataset.SetProjection(proj)
    
    # Write output to band 1 of new Raster
    outBand = outDataset.GetRasterBand(1)
    outBand.WriteArray(array) #save input array
    #outBand.WriteArray(dem)
    
    # Close and finalise newly created Raster
    #F_M01 = None
    outBand = None
    proj = None
    geoTransform = None
    outDataset = None
    driver = None
    datatype = None
    bands = None
    rows = None
    cols = None
    driver = None
    array = None

In [None]:
def cartesian(arrays, out=None):
    """
    Generate a cartesian product of input arrays.

    Parameters
    ----------
    arrays : list of array-like
        1-D arrays to form the cartesian product of.
    out : ndarray
        Array to place the cartesian product in.

    Returns
    -------
    out : ndarray
        2-D array of shape (M, len(arrays)) containing cartesian products
        formed of input arrays.

    Examples
    --------
    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
    array([[1, 4, 6],
           [1, 4, 7],
           [1, 5, 6],
           [1, 5, 7],
           [2, 4, 6],
           [2, 4, 7],
           [2, 5, 6],
           [2, 5, 7],
           [3, 4, 6],
           [3, 4, 7],
           [3, 5, 6],
           [3, 5, 7]])

    """

    arrays = [np.asarray(x) for x in arrays]
    dtype = arrays[0].dtype

    n = np.prod([x.size for x in arrays])
    if out is None:
        out = np.zeros([n, len(arrays)], dtype=dtype)

    m = n / arrays[0].size
    out[:,0] = np.repeat(arrays[0], m)
    if arrays[1:]:
        cartesian(arrays[1:], out=out[0:m,1:])
        for j in range(1, arrays[0].size):
            out[j*m:(j+1)*m,1:] = out[0:m,1:]
    return out

In [None]:
def IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units):
    """
    INPUT:
    data           = dataframe that contains the hourly station data
    columns        = parater to use for interpolation ('T')
    conversion     = conversion to apply on the parameter (0.1)
    shift          = shift to apply, e.g degrees to K (shift = 273.15)
    stat           = resample hour values to daily values, can be 'mean', 'max', 'min' or 'sum'
    baseDir        = directory to use as base to store IDW output 
                     (r'D:\Projects\Pr\3492.10\KNMI\InterpolatedData\Tmax//')
    templateRaster = file to raster that will be used as template for interpolation
                     (r'D:\Projects\Pr\3492.10\DIDSLF_NL//RD_prj_base.tif')
    units          = units of parameter for colorbar label

    OUTPUT:
    none
    """
    # select colums with parameter and the X/Y coordinates in RD projection    
    data = data[[columns,'X_RD','Y_RD']]

    # next lines are important!@
    # resample from hour values to daily values taking the AVERAGE value of the day
    level_values = data.index.get_level_values
    if stat == 'max':
        data_day = (data.groupby([level_values(i) for i in [0]] + [pd.Grouper(freq='1D', level=-1)]).max())
    if stat == 'min':
        data_day = (data.groupby([level_values(i) for i in [0]] + [pd.Grouper(freq='1D', level=-1)]).min())
    if stat == 'mean':
        data_day = (data.groupby([level_values(i) for i in [0]] + [pd.Grouper(freq='1D', level=-1)]).mean())
    if stat == 'sum':
        data_day = (data.groupby([level_values(i) for i in [0]] + [pd.Grouper(freq='1D', level=-1)]).sum())        

    # apply conversion for temperature as the notation is in 0.1 degrees Celsius (at +1.5 mNAP)
    data_day[columns] *= conversion
    data_day[columns] += shift
    data_day = data_day[pd.notnull(data_day[columns])]

    # display head(5)
    data_day.head()

    # select first day of which data is available
    date_selected = data_day.index.get_level_values('Datum').unique()[0]

    # select all datA from stations that has data for this selected datE
    day_df = data_day.iloc[data_day.index.get_level_values('Datum') == date_selected]

    tree = KDTree(list(zip(day_df['X_RD'],day_df['Y_RD'])), leafsize=11)

    day_df.head()

    # table data is finished and right shape, next load a template raster to use as base for interpolation

    # read grid as template for IDW application
    raster, array, extent = inRaster(templateRaster)

    # Get some metadata from the raster file
    band = raster.GetRasterBand(1)
    proj = raster.GetProjection()

    cols = raster.RasterXSize
    rows = raster.RasterYSize

#     # plot as test
#     plt.imshow(array)
#     plt.show()

    # create array with all coordinates in array using the extent
    x_steps = np.arange(array.shape[1]) * extent[1]
    y_steps = np.arange(array.shape[0]) * extent[-1]        
    x_coords = extent[0] + x_steps + extent[1]/2
    y_coords = extent[3] + y_steps + extent[-1]/2        
    xy = cartesian([x_coords, y_coords])
    #print ('xy coordinates shape:', xy.shape, '\nlooks like:\n',xy[0:5])

    # trans and reshape the template array to a 1D ndarray
    array = array.T
    array_rshp = array.reshape(-1)

    # Do it in batch mode!

    # select first day of which data is available
    for date_selected in data_day.index.get_level_values('Datum').unique():
        print ('date:',date_selected)
        
        # empty 1D ndarray to slowly fill
        newArray = np.empty(array_rshp.shape)        

        # select all datA from stations that has data for this selected datE
        day_df = data_day.iloc[data_day.index.get_level_values('Datum') == date_selected]

        tree = KDTree(list(zip(day_df['X_RD'],day_df['Y_RD'])), leafsize=11)

        for l in range(0,xy.shape[0],(cols*10)):
            fr = l
            to = l + (cols*10)
            print('.', end="", flush=True)

            # select block coordinates to query and block to fill
            xy_part = xy[fr:to]
            #array_part = array_rshp[fr:to]
        
            # IDW KDTREE
            dist, idx = tree.query(xy_part, k=8, eps=0, p=1)
            #columns = 'T'
            tmp = day_df[[columns]].values.T[:, idx, None]
            tmp = np.squeeze(tmp)
            weights = (1/dist)**2
            q = np.sum(weights, axis=1)
            q = q[:,None]
            weightsq =  weights / q
            wz = np.einsum("ij,ij->i", weightsq, tmp)
            newArray[fr:to] = wz

        # reshape to shape array
        outArray = newArray.reshape([array.shape[0],array.shape[1]]).T  

        # save the file
        file_out = columns+stat+'_'+date_selected.strftime('%Y%m%d')+'_'+str(date_selected.dayofyear).zfill(3)+'.tif'
        path_out = os.path.join(baseDir, file_out)
        #print ('path to save: ',path_out)
        saveRaster(path_out, outArray, raster)
    
#     im = plt.imshow(outArray)
#     plt.title(columns+stat)    
#     cb = plt.colorbar()
#     cb.set_label(units)
#     plt.show()        
    return print('Function returned no errors')

In [None]:
# read locations file and set station code as object type and change columns names
locations_file = r'D:\Projects\Pr\3492.10\KNMI//locations.csv'
locations = pd.read_csv(locations_file)
locations['SCODE'] = locations['SCODE'].astype(str).str.zfill(3)
locations.rename(columns = {'X':'X_RD'}, inplace = True)
locations.rename(columns = {'Y':'Y_RD'}, inplace = True)
locations.head()

In [None]:
# read csv data, and skip the first 81 rows and for each column skip the initial spaces
data_file = r'D:\Projects\Pr\3492.10\KNMI\KNMI_20170130_hourly//KNMI_20170130_hourly.txt'
data = pd.read_csv(data_file, skiprows=81, sep=',', skipinitialspace=True)

# for the hours subtract 1 as pandas daterange for hour goes from 0 - 23 
# but data reported from 1 - 24
data['HH'] = data['HH'] - 1
data['HH'] = data['HH'].astype(str).str.zfill(2)

# convert location id from int64 to object 
data['STN'] = data['STN'].astype(str).str.zfill(3)

# combine column year-month-day with column hour and convert to datetime
data['Datum'] = data['YYYYMMDD'].map(str) + data['HH']
data['Datum'] = pd.to_datetime(data['Datum'], format='%Y%m%d%H')

# join the data and location table on station name
data = pd.merge(data, locations, how='inner', left_on='STN', right_on='SCODE')

# set multiindex based on location and datum column
data.set_index(['STN','Datum'], inplace=True) # 
# display head(5)
data.head()

In [None]:
#data['P']*0.1

In [None]:
# template raster file
#templateRaster = r'D:\Projects\Pr\3492.10\DIDSLF_NL//RD_prj_base.tif'
templateRaster = r'D:\Projects\Pr\3492.10\ALBEDO\high_resolution//base_v2_zeros.tif'

In [None]:
# APPLY THE INTERPOLATION FOR EACH PARAMETER

In [None]:
# T        = Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 0
stat           = 'mean'
units          = 'gemiddelde dag temperatuur (°C)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# Tmin        = MIN Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 0
stat           = 'min'
units          = 'minimum dag temperatuur (°C)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# Tmax        = MAX Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 0
stat           = 'max'
units          = 'maximum dag temperatuur (°C)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# T        = Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 273.15
stat           = 'mean'
units          = 'gemiddelde dag temperatuur (K)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes','K'+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# T        = Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 273.15
stat           = 'min'
units          = 'minimum dag temperatuur (K)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes','K'+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# T        = Temperatuur (in 0.1 graden Celsius) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'T'
conversion     = 0.1
shift          = 273.15
stat           = 'max'
units          = 'maximum dag temperatuur (K)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes','K'+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# P        = Luchtdruk (in 0.1 hPa) herleid naar zeeniveau, tijdens de waarneming; 
columns        = 'P'
conversion     = 0.1
shift          = 0
stat           = 'mean'
units          = 'luchtdruk (kPa) op hoogte z0'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# N        = Bewolking (bedekkingsgraad van de bovenlucht in achtsten), tijdens de waarneming (9=bovenlucht onzichtbaar); 
columns        = 'N'
conversion     = 1/8.
shift          = 0
stat           = 'mean'
units          = 'bedekkingsgraad van de bewolking (-)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# U        = Relatieve vochtigheid (in procenten) op 1.50 m hoogte tijdens de waarneming; 
columns        = 'U'
conversion     = 0.01
shift          = 0
stat           = 'mean'
units          = 'relatieve vochtigheid (-)'

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)

In [None]:
# Q        = Globale straling (in J/cm2) per uurvak; 
columns        = 'Q'
conversion     = 8.64
shift          = 0
stat           = 'sum'
units          = 'dagsom globale straling (W/m2)'
# returns Q Globale straling (in W/m2) per dagvak; 

# save in this directory
baseDir        = os.path.join(r'D:\Projects\Pr\3492.10\KNMI\InterpolatedDataHighRes',columns+stat)

# do the work
IDW_KNMIstations(data,columns,conversion,shift,stat,baseDir,templateRaster,units)