In [1]:
import rioxarray
import geopandas
import pandas
from tqdm import tqdm

In [2]:
features = {
    "H_modelled": "data/raster/thickness_m.tif",
    "H_modelled_grad": "data/raster/thickness_m_grad.tif",
    "H_modelled_lapl": "data/raster/thickness_m_lapl.tif",
    "v_modelled": "data/raster/velocity.tif",
    "v_modelled_grad": "data/raster/velocity_grad.tif",
    "v_modelled_lapl": "data/raster/velocity_lapl.tif",
    "boundary_proximity": "data/raster/boundary_distance.tif",
    "elevation1": "data/raster/elevation1.tif",
    "elevation1_gradient": "data/raster/elevation1_grad.tif",
    "elevation1_laplacian": "data/raster/elevation1_lapl.tif",
    "elevation2": "data/raster/elevation2.tif",
    "elevation2_gradient": "data/raster/elevation2_grad.tif",
    "elevation2_laplacian": "data/raster/elevation2_lapl.tif",
}

df = {_: [] for _ in features.keys()}
df["glacier"] = []
df["H"] = []
df["x"] = []
df["y"] = []

gpr = geopandas.read_file(f"data/vector/gpr_measurements.geojson")
    
for feature_name, feature_path in tqdm(features.items()):
    feature = rioxarray.open_rasterio(feature_path, masked=True)
    
    for measurement in gpr.itertuples():
        point = measurement.geometry
        x, y = point.x, point.y
        feature_value = feature.sel(x=x, y=y, method="nearest").values[0]
        df[feature_name].append(feature_value)
            
for measurement in gpr.itertuples():
    df["H"].append(measurement.thickness)
    df["x"].append(measurement.geometry.x)
    df["y"].append(measurement.geometry.y)
    df["glacier"].append(measurement.glacier)
                    
df = pandas.DataFrame(df)

100%|██████████| 13/13 [00:32<00:00,  2.52s/it]


In [3]:
df = df.dropna()
df = df.drop_duplicates()

In [4]:
df["elevation_diff"] = df["elevation2"] - df["elevation1"]

In [5]:
df

Unnamed: 0,H_modelled,H_modelled_grad,H_modelled_lapl,v_modelled,v_modelled_grad,v_modelled_lapl,boundary_proximity,elevation1,elevation1_gradient,elevation1_laplacian,elevation2,elevation2_gradient,elevation2_laplacian,glacier,H,x,y,elevation_diff
0,38.277653,4.356576,-0.562798,11.672775,0.192457,-0.289883,151.052185,517.379822,0.171137,-0.000388,514.024597,0.180128,0.003331,scottturnerbreen,21.336162,520487.116,8669111.792,-3.355225
1,36.014076,4.095859,0.412361,11.460643,0.127978,-0.136279,151.618088,520.799133,0.167109,0.002855,517.727966,0.170820,0.003537,scottturnerbreen,21.388340,520470.475,8669085.661,-3.071167
3,36.641193,3.933838,0.093573,11.651606,0.268867,0.008652,147.775909,527.319763,0.155607,-0.002797,524.813904,0.191493,-0.001446,scottturnerbreen,20.657822,520455.613,8669055.737,-2.505859
4,33.846889,4.252011,0.244328,12.175056,0.275052,0.006756,132.105942,529.596985,0.154682,0.001229,527.357422,0.172028,0.000727,scottturnerbreen,23.527357,520449.007,8669040.201,-2.239563
5,36.288673,4.260145,0.309946,12.629606,0.314354,-0.054508,122.649940,533.759094,0.179442,-0.000037,531.322937,0.219994,0.002934,scottturnerbreen,24.935737,520441.130,8669021.680,-2.436157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3166,106.063553,1.667703,0.005013,5.515220,0.349081,-0.000264,408.821991,358.020844,0.080284,-0.002220,355.400879,0.090083,-0.000179,jinnbreen,170.383162,557151.019,8676077.366,-2.619965
3167,106.063553,1.667703,0.005013,5.515220,0.349081,-0.000264,408.821991,358.020844,0.080284,-0.002220,355.400879,0.090083,-0.000179,jinnbreen,170.226825,557150.832,8676077.476,-2.619965
3168,106.063553,1.667703,0.005013,5.515220,0.349081,-0.000264,408.821991,358.020844,0.080284,-0.002220,355.400879,0.090083,-0.000179,jinnbreen,170.070489,557150.682,8676077.541,-2.619965
3169,106.063553,1.667703,0.005013,5.515220,0.349081,-0.000264,408.821991,358.020844,0.080284,-0.002220,355.400879,0.090083,-0.000179,jinnbreen,170.070489,557150.527,8676077.590,-2.619965


In [6]:
df.to_csv("data/dataset.csv")

In [None]:
"""
EXERCISE: Perform exploratory data analysis
"""