# Digital Elevation Model

## 1. Merge DEM from Individual Departments

Run this section to merge all the DEM files for the individual departments into one large for all of France


In [1]:
import glob
import sys
from pyprojroot import here

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *

In [2]:
# List all ascii files of the digital elevation model
all_files = glob.glob("../../data/raw/dem_france/all_files/*.asc")
print("Number of files: ", len(all_files))
display(all_files)

Number of files:  0


[]

In [6]:
# # See one file as example
# pd.read_csv(
#     "../../data/raw/dem_france/all_files/BDALTIV2_25M_FXX_0100_6800_MNT_LAMB93_IGN69.asc",
#     skiprows=6,
#     header=None,
#     delim_whitespace=True,
# )

# # data / raw / dem_france / all_files / BDALTIV2_25M_FXX_0075_6850_MNT_LAMB93_IGN69.asc.aux.xml

In [14]:
# Combine all files into one
# ! Cell takes ca. 1 min to run
import glob
import rasterio
from rasterio.merge import merge
from rasterio.crs import CRS
from tqdm import tqdm

# List all .asc files in the directory
asc_files = glob.glob("../../data/raw/dem_france/all_files/*.asc")

# List for the data
src_files_to_mosaic = []

for file in tqdm(asc_files):
    src = rasterio.open(file)
    src_files_to_mosaic.append(src)

# Merge function returns a single mosaic array and the transformation info
mosaic, out_trans = merge(src_files_to_mosaic)

# Copy the metadata
out_meta = src.meta.copy()

# Update the metadata
out_meta.update(
    {
        "driver": "GTiff",
        "height": mosaic.shape[1],
        "width": mosaic.shape[2],
        "transform": out_trans,
        "crs": CRS.from_epsg(2154).to_string(),
    }
)

100%|██████████| 1017/1017 [00:00<00:00, 1216.20it/s]


In [15]:
# Save final DEM as a tif file
with rasterio.open(
    here("data/raw/dem_france/original_dem25_altitude_put_together_in_python.tif"),
    "w",
    **out_meta
) as dest:
    dest.write(mosaic)

***

## 2. Produce derivatives in QGIS

See the batch process that I wrote to extract different variables like slope, aspect, etc. from the DEM at different scales. All done within QGIS.

Original file: `"data/raw/dem_france/original_dem25_altitude_put_together_in_python.tif"`


***

## 3. Extract data from raster files

In [3]:
import glob
import sys
from pyprojroot import here

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *

In [4]:
# Get files
files = glob.glob("/Volumes/SAMSUNG 1TB/qgis/france_dem/derivatives/*1000*.tif")
filenames = [f.split("/")[-1].split(".tif")[0] for f in files]
df_files = pd.DataFrame({"filename": filenames, "path": files})
list_files = split_df_into_list_of_group_or_ns(df_files, group_variable=files.__len__())

# Get buffered coordinates
buffer = gpd.read_file("../../data/final/nfi/700m_buffer_epsg2154.geojson")

# Run extraction
from utilities import extract_zonal_mean

run_mp(
    extract_zonal_mean,
    list_files,
    buffer=buffer,
    num_cores=10,
    force_run=True,
    save_dir="../../data/france_dem/zonal_statistics",
)

 - Splitting df into 7 random groups


100%|██████████| 7/7 [03:02<00:00, 26.13s/it] 


[None, None, None, None, None, None, None]

## 4. Clean raster extraction

In [6]:
import glob
import sys
from pyprojroot import here

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *

In [14]:
# Get all extracated files
files = glob.glob("../../data/france_dem/zonal_statistics/*.feather")

# Get structure of df
df_all = pd.read_feather(files[0])[["idp", "first_year"]]

# Loop through all files and attach to them by idp and first year
for file in files:
    df = pd.read_feather(file)
    df_all = pd.merge(df_all, df, on=["idp", "first_year"], how="left")

df_all

Unnamed: 0,idp,first_year,dem1000_tri,dem1000_altitude,dem1000_roughness,dem1000_hillshade,dem1000_tpi,dem1000_aspect,dem1000_slope
0,500008,2010,48.536648,316.173309,50.200104,183.0,-9.577301,285.175964,0.735294
1,500013,2010,109.321777,89.760727,87.774704,176.0,-10.202721,82.635757,2.123172
2,500098,2010,76.003555,68.483444,73.520874,177.5,-2.411343,74.252144,1.759407
3,500103,2010,40.304092,281.961853,42.023193,181.0,5.978333,32.121216,0.651242
4,500137,2010,67.384216,361.576782,62.357437,184.5,12.710907,317.075562,1.239872
...,...,...,...,...,...,...,...,...,...
51408,1354883,2018,286.061066,471.350403,295.174744,184.5,-43.461639,204.082870,3.385994
51409,1354893,2018,46.035267,67.526711,41.796936,179.5,6.790529,66.753860,0.943742
51410,1354907,2018,81.772125,656.976135,84.973206,182.0,14.507690,245.087234,1.369366
51411,1354911,2018,32.641544,181.012665,33.208565,182.5,3.500320,341.238007,0.669274


In [15]:
# Aspect has a degree structure going from 0 to 360 which is not ideal for ML.
# We will transform it to a sin and cos variable
# https://stats.stackexchange.com/questions/218407/encoding-aspects-for-sine-and-cosine
df_fixed = df_all.copy()

# Extract variables that hold aspect information
aspect_vars = [var for var in df_fixed.columns if "aspect" in var]
print(aspect_vars)
wrangled_vars = 0

for var in aspect_vars:
    if "_sd" in var:
        df_fixed = df_fixed.drop(columns=[var])
        continue
    # Extract information on dem resolution
    res = var.split("_")[0]

    # Extract information on aspect type
    df_fixed[f"{res}_aspect_sin"] = np.sin(np.deg2rad(df_fixed[var]))
    df_fixed[f"{res}_aspect_cos"] = np.cos(np.deg2rad(df_fixed[var]))

    # Drop original variable
    df_fixed = df_fixed.drop(columns=[var])

    # Counter
    wrangled_vars += 1

# Verbose
print(f"Shape of df_fixed before aspect transformation:\t{df_all.shape}")
print(f"Shape of df_fixed after aspect transformation: \t{df_fixed.shape}")
print(f"Change in columns should be: {wrangled_vars*2-len(aspect_vars)}")

['dem1000_aspect']
Shape of df_fixed before aspect transformation:	(51413, 9)
Shape of df_fixed after aspect transformation: 	(51413, 10)
Change in columns should be: 1


In [17]:
df_fixed

Unnamed: 0,idp,first_year,dem1000_tri,dem1000_altitude,dem1000_roughness,dem1000_hillshade,dem1000_tpi,dem1000_slope,dem1000_aspect_sin,dem1000_aspect_cos
0,500008,2010,48.536648,316.173309,50.200104,183.0,-9.577301,0.735294,-0.965126,0.261784
1,500013,2010,109.321777,89.760727,87.774704,176.0,-10.202721,2.123172,0.991751,0.128177
2,500098,2010,76.003555,68.483444,73.520874,177.5,-2.411343,1.759407,0.962465,0.271404
3,500103,2010,40.304092,281.961853,42.023193,181.0,5.978333,0.651242,0.531712,0.846925
4,500137,2010,67.384216,361.576782,62.357437,184.5,12.710907,1.239872,-0.681033,0.732252
...,...,...,...,...,...,...,...,...,...,...
51408,1354883,2018,286.061066,471.350403,295.174744,184.5,-43.461639,3.385994,-0.408058,-0.912956
51409,1354893,2018,46.035267,67.526711,41.796936,179.5,6.790529,0.943742,0.918818,0.394682
51410,1354907,2018,81.772125,656.976135,84.973206,182.0,14.507690,1.369366,-0.906950,-0.421238
51411,1354911,2018,32.641544,181.012665,33.208565,182.5,3.500320,0.669274,-0.321638,0.946863


In [16]:
df_fixed.to_feather("../../data/final/predictor_datasets/topography.feather")

***
