### Notebook to process Coastal Erosion Typology
from geopackage to parquet

In [1]:
# Load software
import os
import pathlib
import sys
import json
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from dotenv import load_dotenv
import math
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import rioxarray as rio

# Import custom functionality
from coclicodata.drive_config import p_drive

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Set path to geopackage
ds_dir = coclico_data_dir.joinpath('WP4','data','Erosion database')
ds_file = ds_dir.joinpath('CoasTER_database.gpkg')

# Load data
gpkg = gpd.read_file(ds_file) # takes a while! Order of 15 minutes

Cannot find header.dxf (GDAL_DATA is not defined)


In [3]:
gpkg

Unnamed: 0,source,country,covered,seg_id,seg_length,associated_floodplain,local_floodplain,remote_floodplain_1,remote_floodplain_2,onshore_structure,...,geomorphological_class,barrier,primary_sediment_type,secondary_sediment_type,historical_shoreline_change_regime,corine_code_18,corine_code_simplified,Notes,Local_floodplain_area_km2,geometry
0,EUDEM,CY,Y,CY000001,115,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,"MULTILINESTRING ((6407886.060 1602536.060, 640..."
1,EUDEM,CY,Y,CY000002,220,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,"MULTILINESTRING ((6407987.988 1602487.988, 640..."
2,EUDEM,CY,Y,CY000003,70,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,"MULTILINESTRING ((6408162.012 1602362.012, 640..."
3,EUDEM,CY,Y,CY000004,370,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,"MULTILINESTRING ((6408224.390 1602332.593, 640..."
4,EUDEM,CY,Y,CY000005,26,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,"MULTILINESTRING ((6408512.060 1602111.987, 640..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092306,NGA,UK,N,UK200706,1,,,,,,...,Not classified,,,,,,,,,"MULTILINESTRING ((2935533.426 1591455.880, 293..."
2092307,NGA,UK,N,UK200707,1,,,,,,...,Not classified,,,,,,,,,"MULTILINESTRING ((2935532.825 1591455.521, 293..."
2092308,NGA,UK,N,UK200708,1,,,,,,...,Not classified,,,,,,,,,"MULTILINESTRING ((2935531.543 1591454.966, 293..."
2092309,NGA,UK,N,UK200709,1,,,,,,...,Not classified,,,,,,,,,"MULTILINESTRING ((2935532.192 1591455.218, 293..."


In [4]:
# Write data as parquet file
parquet_file = str(ds_file).replace('.gpkg','.parquet')
gpkg.to_parquet(parquet_file)

In [5]:
# Test new parquet file
test_parquet = pd.read_parquet(parquet_file)
test_parquet

Unnamed: 0,source,country,covered,seg_id,seg_length,associated_floodplain,local_floodplain,remote_floodplain_1,remote_floodplain_2,onshore_structure,...,geomorphological_class,barrier,primary_sediment_type,secondary_sediment_type,historical_shoreline_change_regime,corine_code_18,corine_code_simplified,Notes,Local_floodplain_area_km2,geometry
0,EUDEM,CY,Y,CY000001,115,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
1,EUDEM,CY,Y,CY000002,220,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
2,EUDEM,CY,Y,CY000003,70,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
3,EUDEM,CY,Y,CY000004,370,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
4,EUDEM,CY,Y,CY000005,26,N,0,0,0,N,...,Beach,,Sand/Gravel,,Ero,212,3,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092306,NGA,UK,N,UK200706,1,,,,,,...,Not classified,,,,,,,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
2092307,NGA,UK,N,UK200707,1,,,,,,...,Not classified,,,,,,,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
2092308,NGA,UK,N,UK200708,1,,,,,,...,Not classified,,,,,,,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
2092309,NGA,UK,N,UK200709,1,,,,,,...,Not classified,,,,,,,,,b'\x01\x05\x00\x00\x00\x01\x00\x00\x00\x01\x02...
