# Setup

In [64]:
import geopandas as gpd
import os
import numpy as np
import pandas as pd
import scipy.io
from scipy.spatial import ConvexHull

In [65]:
def grid_gen(coord, ncells, axes = False):
    """
    Generates the grid on which to interpolate the values
    """
    # generate two arrays of evenly space data between ends of previous arrays
    xi = np.linspace(coord[0][0], coord[1][0], ncells[0])
    yi = np.linspace(coord[0][1], coord[1][1], ncells[1])
    if axes:
        return(xi, yi)

    # generate grid 
    xi, yi = np.meshgrid(xi, yi)

    # colapse grid into 1D
    # xi, yi = xi.flatten(), yi.flatten()
    return(xi, yi)

In [66]:
cwd = os.getcwd()

# Load data

In [67]:
df = pd.read_csv(os.path.join(cwd, 'data', 'st_kriging_data_points.csv'))
df.date = pd.to_datetime(df.date)
df = df.sort_values('date')

# Obtain IT_DATA.mat (spatial interpolation)

In [68]:
# define the interpolation extention and cell size
coord = [
    484234.65167973784, #xmin
    5019178.630486382,  #ymin
    517495.35663193866, #xmax
    5055708.644651484   #ymax
]
cols = ['z', 'id_punto', 'x', 'y']
dim = 100
# obtain number of rows and columns
nx, ny = round((coord[2] - coord[0])/dim), round((coord[3] - coord[1])/dim)
# obtain the grid cell points
xp, yp = grid_gen([(coord[0], coord[1]), (coord[2], coord[3])], [nx,ny])

In [69]:
# select only one time step
tool = df.loc[df.date == '2023-06-01',:].copy()
points = np.array([tool.x.unique(),tool.y.unique()]).transpose()
hull = ConvexHull(points)

In [70]:
# set data in .mat (Matlab) format
# first test: copy IT_DATA.mat
mat = {
    'VAL': np.atleast_2d(tool.value.values).transpose(),
    'X': np.atleast_2d(tool.x.values).transpose(),
    'XC': np.atleast_2d(hull.points[hull.vertices][:, 0]).transpose(),
    'Y': np.atleast_2d(tool.y.values).transpose(),
    'YC': np.atleast_2d(hull.points[hull.vertices][:, 1]).transpose(),
    'Xp': xp, #coordinates of the prediction points
    'Yp': yp
}

In [71]:
# save as .mat file
scipy.io.savemat(os.path.join(cwd, 'data', 'IT_DATA_test.mat'), mat)
scipy.io.savemat('C:/Users/user/OneDrive - Politecnico di Milano/hydrogeo-modelling/Corsi-dottorato/advanced_geostatistics/assignment/IT_DATA_test.mat', mat)

# Obtain TESTD.mat (spatio-temporal kriging)

In [5]:
# define the interpolation extention and cell size
coord = [
    484234.65167973784, #xmin
    5019178.630486382,  #ymin
    517495.35663193866, #xmax
    5055708.644651484   #ymax
]
cols = ['z', 'id_punto', 'x', 'y']
dim = 100
# obtain number of rows and columns
nx, ny = round((coord[2] - coord[0])/dim), round((coord[3] - coord[1])/dim)
# obtain the grid cell points
xp, yp = grid_gen([(coord[0], coord[1]), (coord[2], coord[3])], [nx,ny])
# extract the same time step as before
tool = df.loc[df.date == '2023-06-01',:].copy()

In [52]:
# dates with at least 10 data points
count_date = df.sort_values('date').groupby('date').count()
d = count_date[count_date.id_punto > 10].index

df10 = df.loc[df.date.isin(d)].reset_index(drop = True)

In [60]:
# add timestep indicator
t = pd.date_range(df10.date[0], df10.date[len(df10.date)-1], freq = '1MS')
i = range(1,len(t)+1)
ti = pd.DataFrame({'t': t, 'i': i})
ts = [ti.loc[t == date, 'i'].values[0] for date in df10.date]
df10['t'] = ts

In [61]:
points = np.array([df10.x.unique(),df10.y.unique()]).transpose()
hull = ConvexHull(points)

In [62]:
# second test: copy TESTD.mat

mat = {
    'VAL': df10.loc[:, ['x','y','t','value']].values,
    'X': np.atleast_2d(df10.x.values).transpose(),
    'XC': np.atleast_2d(hull.points[hull.vertices][:, 0]).transpose(),
    'Y': np.atleast_2d(df10.y.values).transpose(),
    'YC': np.atleast_2d(hull.points[hull.vertices][:, 1]).transpose(),
    'Xp': xp, #coordinates of the prediction points
    'Yp': yp,
    'X2': np.atleast_2d(tool.x.values).transpose(),
    'Y2': np.atleast_2d(tool.y.values).transpose()
}

In [63]:
# save as .mat file
scipy.io.savemat(os.path.join(cwd, 'data', 'TESTD_test.mat'), mat)

# Checks

In [35]:
# dates with at least 10 data points
count_date = df.sort_values('date').groupby('date').count()
d = count_date[count_date.id_punto > 10].index
d_df = pd.DataFrame(index = d).reset_index()
# generate a counter for each date, based on the month
counter = pd.date_range('2009-02-01', '2024-09-01', freq='MS')
counter_df = pd.DataFrame(counter, columns=['date_range'])
counter_df['counter'] = range(0, counter_df.shape[0])
counter_df = counter_df.merge(d_df, how='left', left_on='date_range', right_on='date')
df_clean = df.loc[df.date.isin(d), :].merge(counter_df, left_on = 'date', right_on='date_range').drop(columns = 'date_range')
df_clean = df_clean.sort_values('counter')