In [27]:
import hvplot.pandas
import hvplot.xarray
import xarray as xr
import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path
import shapely

### WMQN

In [46]:
paths_data = list(Path(f'../mrc_webscrapper/outputs/csv/Total Suspended Solids').rglob(f'*.csv'))

df_data = pd.DataFrame([])
for path in paths_data:
    df_temp = pd.read_csv(path, dtype={'station_code':'str'})
    df_temp['date'] = pd.to_datetime(df_temp['date'])
    df_temp['med_frq'] = np.median(np.diff(df_temp.date))
    df_data = pd.concat([df_data, df_temp])

df_data_wqmn = df_data.copy()
df_data_wqmn['source'] = 'WQMN'
df_data_wqmn['type'] = 'near-surface'
gdf_data_wqmn = gpd.GeoDataFrame(df_data_wqmn, crs='epsg:4326',
                                 geometry=df_data_wqmn.apply(lambda row: shapely.geometry.Point((row.lon, row.lat)), axis=1)).reset_index()

### Hydromet-Mekong & DSMP

In [47]:
paths_data = list(Path(f'../mrc_webscrapper/outputs/csv/Sediment Concentration').rglob(f'*.csv'))

df_data = pd.DataFrame([])
for path in paths_data:
    df_temp = pd.read_csv(path, dtype={'station_code':'str'})
    df_temp['date'] = pd.to_datetime(df_temp['date'])
    df_temp['med_frq'] = np.median(np.diff(df_temp.date))
    df_data = pd.concat([df_data, df_temp])

df_data['type'] = 'depth-integrated'

df_data_dsmp = df_data.copy().loc[df_data.identifier.str.contains('Measured DSMP')]
df_data_dsmp['source'] = 'DSMP'
gdf_data_dsmp = gpd.GeoDataFrame(df_data_dsmp, crs='epsg:4326',
                                 geometry=df_data_dsmp.apply(lambda row: shapely.geometry.Point((row.lon, row.lat)), axis=1)).reset_index()

df_data_hydrom = df_data.copy().loc[~df_data.identifier.str.contains('Measured DSMP')]
df_data_hydrom['source'] = 'Hydromet'
gdf_data_hydrom = gpd.GeoDataFrame(df_data_hydrom, crs='epsg:4326',
                                 geometry=df_data_hydrom.apply(lambda row: shapely.geometry.Point((row.lon, row.lat)), axis=1)).reset_index()

### Map

In [51]:
gdf_data_dsmp.groupby('identifier').first().hvplot(geo=True, tiles='CartoLight', label='DSMP', alpha=0.5) * \
gdf_data_wqmn.groupby('identifier').first().hvplot(geo=True, tiles='CartoLight', label='WQMN', alpha=0.5) * \
gdf_data_hydrom.groupby('identifier').first().hvplot(geo=True, tiles='CartoLight', label='Hydromet', alpha=0.5)

In [52]:
gdf_data = pd.concat([gdf_data_dsmp, gdf_data_wqmn, gdf_data_hydrom])
gdf_data.to_csv('input/insitu_data.csv')