# Superimpose data

The aim of this notebook is to indicate how to superimpose data from different sources (radar, satellite, weather models...).

There are 2 data categories :
* data projected on a grid (radar, weather models...)
* punctual data like observations from ground stations

So, this notebook provides functions which allow to :
* superimpose 2 grids of different resolutions
* superimpose data on a grid with punctual data

# I - Superimpose 2 grids of different resolutions

## I.1 - 2 GRIB files 
### Example : 3D ARPEGE data on isobaric levels (0.1°) to interpolate to 2D AROME data on the 2m level (0.025°)
#### Use xarray to open GRIB files and perform interpolation

In [None]:
import meteonet_toolbox.user_configuration
import xarray as xr
import datetime as dt
%matplotlib notebook
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
from matplotlib import colors
import numpy as np
import pandas as pd
from scipy.interpolate import griddata

In [None]:
####Cell containing the modifiable fields######
zone = "SE"     #geographic zone (NW or SE)
arp_param = 't'    #arpege : parameter name in the file (cf cells below to know the parameter names -> exploration of metadata)
aro_param = 't2m' #arome : parameter name in the file (cf cells below to know the parameter names -> exploration of metadata)
date = dt.datetime(2018, 5, 1,0,0) # Day example 
time_step = 0                #index for the studied time step 
level_step = 0               #index for the studied level step (from ground upwards)

arp_fname = "../../data_samples/weather_models/arpege_3D_isobar_%s_%s.grib" % (zone,date.strftime('%Y%m%d%H%M%S'))
aro_fname = "../../data_samples/weather_models/arome_2m_%s_%s.grib" % (zone,date.strftime('%Y%m%d%H%M%S'))

In [None]:
#/!\#### the plots options depend on the GRIB file structure (for example, 'isobaricInhPa =' means we have a 3D grib file with isobaric levels)
def grib_interpolate_grib(ori_fname,tar_fname,time_step,level_step,ori_param,tar_param):
    
    #data loading
    data_to_interpolate = xr.open_dataset(ori_fname, engine='cfgrib')  
    target_data = xr.open_dataset(tar_fname, engine='cfgrib')  
    
    #today, with the function above, 2 interpolation methods are implemented for 2D arrays : 'linear' and 'nearest' for nearest neighbors
    interpolated_data = data_to_interpolate.interp_like(target_data,method='linear')
    
    #plots
    fig = plt.figure(figsize=(9,9))
    gs = gridspec.GridSpec(2, 2, figure = fig)
    #data to interpolate
    ax =  pl.subplot(gs[0, 0])
    p1 =  data_to_interpolate.isel(step=time_step, isobaricInhPa = level_step)[ori_param].plot(x="longitude",y="latitude")
    plt.title('original data')

    #interpolated data
    ax =  pl.subplot(gs[0, 1])
    p1 = interpolated_data.isel(step=time_step, isobaricInhPa = level_step)[ori_param].plot(x="longitude",y="latitude")
    plt.title('interpolated data')

    #destination data 
    ax =  pl.subplot(gs[1, 0])
    p1 = target_data.isel(step=time_step)[tar_param].plot(x="longitude",y="latitude")
    plt.title('data on target grid')
    
    return data_to_interpolate,interpolated_data, target_data

In [None]:
#data to interpolate : ARPEGE, target data : AROME
data_to_interpolate,interpolated_data, target_data = grib_interpolate_grib(arp_fname,aro_fname,time_step,level_step,arp_param,aro_param)

## I.2 - 1 GRIB file and 1 radar file
### Example : rainfall data (NPZ file, 0.01°) to interpolate to land-sea mask (GRIB file, 0.025°)

In [None]:
####Cell containing the modifiable fields######
year = 2016
month = 8
decade = 3
ind = 15   #index of the chosen 5 min of the decade 
nan_value = -1  #nan value for data (ex : rainfall here)

rain_param = 'rainfall'    #parameter name for rainfall
mask_param = 'lsm'         #parameter name for land-sea mask (cf meta-data in the mask GRIB file)

rain_fname = "../../data_samples/radar/rainfall_%s_%s_%s.%s.npz" % (zone, str(year),"{:02d}".format(month),str(decade))
rain_coords_fname = "../../data_samples/radar/radar_coords_%s.npz" % (zone)
mask_fname = "../../data_samples/masks/%s_masks.grib" % (zone)

#### Step 1 : put NPZ data into an xarray (values, lat, lon)

In [None]:
def radar_to_grib(rain_fname,rain_coords_fname,ind):
    
    #load data
    d = np.load(rain_fname, allow_pickle=True)
    data = d['data'][ind,:,:]
    
    coords = np.load(rain_coords_fname, allow_pickle=True)
    lat = coords['lats']
    lon = coords['lons']
    
    data = xr.DataArray(data,coords=[lat[:,0],lon[0,:]],dims=['latitude','longitude'])
    d_grib = data.to_dataset(name = 'rainfall')
    
    return d_grib,lat,lon

In [None]:
d_grib,lat,lon = radar_to_grib(rain_fname,rain_coords_fname,ind)

In [None]:
np.unique(d_grib['rainfall'].values)

#### Step 2 : perform the interpolation
**/!\ if the 'missing data' value is not nan (for example it is -1 for rainfall), it is necessary to convert these values before into nan if you want to use the linear interpolation method, else use the nearest neighbors method**

In [None]:
#/!\#### the plots options depend on the GRIB file structure###
def radar_interpolate_grib(ori_data,tar_fname,ori_param,tar_param,nan_value):
    
    #data loading
    data_to_interpolate = ori_data
    nan_data_to_interpolate = data_to_interpolate.where(data_to_interpolate["rainfall"]!=nan_value)  #missing data (from value to 'nan')
    target_data = xr.open_dataset(tar_fname, engine='cfgrib')  
    
    #today, with the function above, 2 interpolation methods are implemented for 2D arrays : 'linear' and 'nearest' for nearest neighbors
    interpolated_data = nan_data_to_interpolate.interp_like(target_data,method='linear')
    
    #plots
    fig = plt.figure(figsize=(9,9))
    gs = gridspec.GridSpec(2, 2, figure = fig)
        
    #colorbar definition for rainfall
    if (np.max(data_to_interpolate[ori_param].values) > 65):
        borne_max = np.max(data_to_interpolate[ori_param].values)
    else:
        borne_max = 65 + 10
    cmap = colors.ListedColormap(['silver','white', 'darkslateblue', 'mediumblue','dodgerblue', 'skyblue','olive','mediumseagreen'
                                  ,'cyan','lime','yellow','khaki','burlywood','orange','brown','pink','red','plum'])
    bounds = [-1,0,2,4,6,8,10,15,20,25,30,35,40,45,50,55,60,65,borne_max]
    norm = colors.BoundaryNorm(bounds, cmap.N)
    
    #data to interpolate without nan (original data)
    ax =  pl.subplot(gs[0, 0])    
    data_to_interpolate[ori_param].plot(cmap=cmap, norm=norm) 
    plt.title('original data - missing data  : -1')
    
    #data to interpolate with nan
    ax =  pl.subplot(gs[0, 1])    
    nan_data_to_interpolate[ori_param].plot(cmap=cmap, norm=norm) 
    plt.title('original data with nan - missing data : ' + str(nan_value))

    #interpolated data
    ax =  pl.subplot(gs[1, 0])
    interpolated_data[ori_param].plot(cmap=cmap, norm=norm) 
    plt.title('interpolated data - missing data : nan')

    #destination data 
    ax =  pl.subplot(gs[1, 1]) 
    target_data[tar_param].plot()
    plt.title('data on target grid')
    
    return data_to_interpolate,nan_data_to_interpolate,interpolated_data, target_data

In [None]:
data_to_interpolate,nan_data_to_interpolate,interpolated_data, target_data = radar_interpolate_grib(d_grib,mask_fname,rain_param,mask_param,nan_value)

# II - Superimpose data on a grid with punctual data

### Example 1 : 2D AROME data on the 2m level (GRIB file, 0.025°) to interpolate to ground station observations

In [None]:
####Cell containing the modifiable fields######
date = '2016-01-01T06:00:00'    #study date 
obs_param = 't'      #observation parameter
grib_param = 't2m'   #AROME parameter

In [None]:
study_date = pd.Timestamp(date)  #study date
fname = "../../data_samples/ground_stations/%s_20160101.csv"%zone
df =pd.read_csv(fname,parse_dates=[4],infer_datetime_format=True)
d_sub = df[df['date'] == study_date]

Overview of observation data for a given date :

In [None]:
display(d_sub.head())

Load AROME data : parameter values, latitudes and longitudes

In [None]:
aro = xr.open_dataset(aro_fname, engine='cfgrib')
grid_lat = aro['latitude'].values
grid_lon = aro['longitude'].values
grid_val = aro[grib_param].values

Perform the interpolation

In [None]:
def interpolate_grib_on_points(grid_lat,grid_lon,grid_val,data_obs):
    
    #initialisation
    latlon_grid = []
    latlon_obs = []
    val_grid = []
    
    #grid data preprocessing
    for i in range(0,grid_lat.shape[0]):        
        for j in range(0,grid_lon.shape[0]):
            #put coordinates (lat,lon) in list of tuples
            latlon_grid.append([round(grid_lat[i],3),round(grid_lon[j],3)])
            #put grid values into a list
            val_grid.append(grid_val[time_step,i,j])
    grid_latlon = np.array(latlon_grid)
    grid_val2 = np.array(val_grid)

    #obs data preprocessing : put coordinates (lat,lon) in list of tuples
    for i in range(0,data_obs.shape[0]):
        latlon_obs.append([data_obs['lat'].values[i],data_obs['lon'].values[i]])
    latlon_obs = np.array(latlon_obs)
    
    #interpolation
    grid_val_on_points=griddata(grid_latlon ,grid_val2, latlon_obs,  method='linear')
    return latlon_obs,grid_val_on_points

In [None]:
latlon_obs,grid_val_on_points = interpolate_grib_on_points(grid_lat,grid_lon,grid_val,d_sub)

latlon_obs : tuples of [lat,lon] of observation points

In [None]:
latlon_obs[0:10]

grid_val_on_points : array of grid values interpolated on observation points 

In [None]:
grid_val_on_points[0:10]

### Example 2 : radar data (rainfall, NPZ file, 0.01°) to interpolate to ground station observations

In [None]:
####Cell containing the modifiable fields######
###obs###
date = '2016-01-01T06:00:00'    #study date 
obs_param = 't'      #observation parameter
npz_param = 'rainfall'   #npz parameter

#rainfall##
year = 2016
month = 8
decade = 3
ind = 15   #index of the chosen 5 min of the decade 
nan_value = -1  #nan value for data (ex : rainfall here)

rain_fname = "../../data_samples/radar/rainfall_%s_%s_%s.%s.npz" % (zone, str(year),"{:02d}".format(month),str(decade))
rain_coords_fname = "../../data_samples/radar/radar_coords_%s.npz" % (zone)

In [None]:
study_date = pd.Timestamp(date)  #study date
fname = "../../data_samples/ground_stations/%s_20160101.csv"%zone
df =pd.read_csv(fname,parse_dates=[4],infer_datetime_format=True)
d_sub = df[df['date'] == study_date]

Overview of observation data for a given date :

In [None]:
display(d_sub.head())

Load radar data : parameter values, latitudes and longitudes

In [None]:
radar = np.load(rain_fname, allow_pickle=True)
data = radar['data'][ind,:,:]
coords = np.load(rain_coords_fname, allow_pickle=True)
lat = coords['lats']
lon = coords['lons']

#### Perform the interpolation
**/!\ if the 'missing data' value is not nan (for example it is -1 for rainfall), it is necessary to convert these values before into nan if you want to use the linear interpolation method, else use the nearest neighbors method**

In [None]:
def interpolate_radar_on_points(grid_lat,grid_lon,grid_val,data_obs):
    #grid data preprocessing
    latlon_grid = []
    latlon_obs = []
    val_grid = []
    for i in range(0,grid_lat.shape[0]):        
        for j in range(0,grid_lon.shape[1]):
            #put coordinates (lat,lon) in list of tuples
            latlon_grid.append([grid_lat[i,0],grid_lon[0,j]])
            #put grid values into a list
            val_grid.append(grid_val[i,j])
    grid_latlon = np.array(latlon_grid)
    grid_val2 = np.array(val_grid)
    #replace 'missing data' values by nan
    grid_val2 = grid_val2.astype(np.float64)
    grid_val2[grid_val2==-1]=np.nan

    #obs data preprocessing : put coordinates (lat,lon) in list of tuples
    for i in range(0,data_obs.shape[0]):
        latlon_obs.append([data_obs['lat'].values[i],data_obs['lon'].values[i]])
    latlon_obs = np.array(latlon_obs)

    #interpolation
    grid_val_on_points=griddata(grid_latlon ,grid_val2, latlon_obs,  method='linear')
    return latlon_obs,grid_val_on_points

In [None]:
latlon_obs,grid_val_on_points = interpolate_radar_on_points(lat,lon,data,d_sub)

latlon_obs : tuples of [lat,lon] of observation points

In [None]:
latlon_obs

grid_val_on_points : array of grid values interpolated on observation points 

In [None]:
grid_val_on_points