## Data explorer

In [None]:
import os.path
import pathlib

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import shapely
import xarray as xr

# geojson files



In [None]:
path = os.path.abspath("../../resources/data/rc")
p = pathlib.Path(path)
geojson_files = p.glob("*.geojson")

# gdf = pd.concat([gpd.read_file(f) for f in geojson_files])

data_frames = []
for f in geojson_files:
    gdf = gpd.read_file(f)
    gdf["src_file"] = f.name
    data_frames.append(gdf)
gdf_shapes = pd.concat(data_frames)

In [None]:
from folium import Map

bounds = gdf_shapes.total_bounds
center = [(bounds[1] + bounds[3]) / 2 - 10, (bounds[0] + bounds[2]) / 2]
aoi_map = Map(
    # Base map is set to OpenStreetMap
    tiles="OpenStreetMap",
    # Define the spatial properties for the map
    location=center,
    # Set the zoom value
    zoom_start=4,
)
gdf_shapes.explore(m=aoi_map, column="src_file")

Socioeconomic Data and Applications Center (SEDAC) pop density


In [None]:
import rioxarray

target = os.path.expanduser(
    "~/downloads/gpw-v4-population-density-rev11_2020_30_sec_tif/gpw_v4_population_density_rev11_2020_30_sec.tif"
)

In [None]:
def read_sedac_pop_density(
    path: os.PathLike, clip_shape: gpd.GeoSeries = None, crs=4326, plot=False
) -> gpd.GeoDataFrame:
    """_summary_

    Args:
        path (os.PathLike): _description_
        clip_shape (gpd.GeoSeries, optional): _description_. Defaults to None.
        crs (int, optional): _description_. Defaults to 4326.
        plot (bool, optional): _description_. Defaults to False.

    Returns:
        gpd.GeoDataFrame: _description_
    """
    bounds = clip_shape.total_bounds

    ds = rioxarray.open_rasterio(path, chunks=60, default_name="pop_density")
    ds = ds.rename("pop_density")
    if clip_shape is not None:
        print("cliping")
        ds = ds.rio.clip(clip_shape.geometry)
        print("clipped")

    ds = ds.where(ds > 0)

    if plot:
        ds.plot()
        plt.show()

    df = ds.to_dataframe("pop_density")
    df.reset_index(inplace=True)

    # Convert the DataFrame to a GeoDataFrame
    return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=crs)


target = os.path.expanduser(
    "~/downloads/gpw-v4-population-density-rev11_2020_2pt5_min_tif/gpw_v4_population_density_rev11_2020_2pt5_min.tif"
)
pop_china = read_sedac_pop_density(target, gdf_shapes)

In [None]:
pop_china

In [None]:
target = os.path.expanduser(
    "~/downloads/gpw-v4-population-density-rev11_2020_1_deg_tif/gpw_v4_population_density_rev11_2020_1_deg.tif"
)
ds = rioxarray.open_rasterio(target, chunks=20, default_name="pop_density")
ds = ds.rename("pop_density")

clipped_ds = ds.rio.clip(gdf_shapes.geometry)

In [None]:
from matplotlib.colors import LogNorm

norm = LogNorm(vmin=0.1, vmax=10000)
clipped_ds.plot(norm=norm)

In [None]:
norm = LogNorm(vmin=1, vmax=pop_china.pop_density.max())
pop_china.plot(column="pop_density", legend=True, norm=norm)

In [None]:
pop_china.sample(frac=0.005).explore(column="pop_density", cmap="viridis")

# CFRS pop

In [None]:
cfsr_or = xr.open_dataarray(
    "~/downloads/13987282/PyPSA-China-main/PyPSA-China-original/data/population/CFSR_grid.nc"
)

In [None]:
path = os.path.abspath("../../resources/data/population/CFSR_grid.nc")
cfsr = xr.open_dataset(path)

pop_ww_df = cfsr.to_dataframe()
pop_ww_df

In [None]:
with pd.HDFStore(
    "~/downloads/13987282/PyPSA-China-main/PyPSA-China-original/data/population/population_gridcell_map.h5",
    mode="r",
) as store:
    pop_density = store["population_gridcell_map"]
pop_density

In [None]:
def convert_to_gdf_old(df):
    df.reset_index(inplace=True)
    df["coordinates"] = list(zip(df.x, df.y))
    df["coordinates"] = df["coordinates"].apply(shapely.Point)
    return gpd.GeoDataFrame(df, geometry="coordinates", crs=4326)


def convert_to_gdf(df, var_name="__xarray_dataarray_variable__"):
    df.reset_index(inplace=True)
    return gpd.GeoDataFrame(
        df[var_name], geometry=gpd.points_from_xy(df.x, df.y), crs=4326
    )


pop_ww = convert_to_gdf(pop_ww_df)
pop_ww.rename(columns={"__xarray_dataarray_variable__": "pop_density"}, inplace=True)

In [None]:
cfsr

In [None]:
pop_ww

In [None]:
import folium

m = folium.Map(
    min_lat=bounds[1],
    min_lon=bounds[0],
    max_lat=bounds[3],
    max_lon=bounds[2],
    max_bounds=True,
    prefer_canvas=True,
)

In [None]:
bounds

In [None]:
# Define the location to start the map at
location = (bounds[1] - bounds[3], -121.981752)

# Initializing the map
map = folium.Map(location=location)
map

In [None]:
# pop_ww.explore()

In [None]:
import os.path

import atlite

In [None]:
p = os.path.expanduser(
    "~/documents/Documents/PyPSA-China-main/resources/cutouts/China-2020.nc"
)
os.path.exists(p)
cutout = atlite.Cutout(p)

In [None]:
snapshot = cutout.data.sel(time="2020-01-01T23:00:00")

In [None]:
fig, ax = plt.subplots(1, 1)
snapshot.temperature.plot(ax=ax)
gdf_shapes.plot(ax=ax, edgecolor="red", facecolor="none")

# Explore the hdf5 population

In [None]:
prov_pop_path = r"/home/ivanra/documents/Documents/PyPSA-China-main/resources/derived_data/population/population_gridcell_map.h5"
with pd.HDFStore(prov_pop_path, mode="r") as store:
    print(store.keys())
    pop_province_new = store["population_gridcell_map"]
pop_province_new.mean()

In [None]:
pop_province_new["Anhui"][pop_province_new["Anhui"] > 0]

In [None]:
prov_pop_path = r"/home/ivanra/documents/Documents/PyPSA-China-original/data/population/population_gridcell_map.h5"
with pd.HDFStore(prov_pop_path, mode="r") as store:
    print(store.keys())
    pop_province = store["population_gridcell_map"]

In [None]:
import scipy as sp

pop_matrix = sp.sparse.csr_matrix(pop_province.T)
index = pop_province.columns
index.name = "provinces"

In [None]:
temp = cutout.temperature(matrix=pop_matrix, index=index)
temp["time"] = temp["time"].values + pd.Timedelta(8, unit="h")  # UTC-8 instead of UTC

In [None]:
temp

In [None]:
pop_province.loc[pop_province.any(axis=1) != 0]

In [None]:
anhui = pop_province["Anhui"]
anhui[anhui > 0]