# Import Required Libraries
Import necessary libraries such as matplotlib and pollution_extraction modules.

In [None]:
import matplotlib.pyplot as plt
from pollution_extraction.core import DataExporter
from pollution_extraction.core.data_reader import PollutionDataReader
from pollution_extraction.core.data_visualizer import DataVisualizer
from pollution_extraction.core.spatial_extractor import SpatialExtractor

# Initialize PollutionDataReader
Initialize PollutionDataReader with the NetCDF file path and pollution type.

In [None]:
file_path = "/workspaces/dss-pollution-extraction/PM2p5_downscaled_daily_lr_2006_01.nc"
reader = PollutionDataReader(file_path, pollution_type="pm25")

# Inspect Dataset Information
Print dataset shape, basic information, time range, and spatial bounds.

In [None]:
data = reader.data_variable
print("\nData variable shape:", data.shape)
info = reader.get_basic_info()
print("\nBasic Info:")
for k, v in info.items():
    print(f"  {k}: {v}")
print("\nTime range:", reader.time_range)
print("Spatial bounds:", reader.spatial_bounds)

# Subset Data by Time
Select a subset of data for the first 7 days and print its shape.

In [None]:
subset = data.isel(time=slice(0, 7))
print("\nSubset shape (first 7 days):", subset.shape)

# Visualize First Time Slice
Plot the first time slice using xarray or matplotlib with controlled vmin/vmax.

In [None]:
first_slice = data.isel(time=0).clip(min=0)
print("\nFirst slice stats:")
print("  min:", float(first_slice.min().values))
print("  max:", float(first_slice.max().values))
print("  mean:", float(first_slice.mean().values))
try:
    first_slice.plot.imshow(vmin=0, vmax=40, cmap="Reds", origin="upper")
    plt.title("First Time Slice (time=0)")
    plt.show()
except Exception:
    plt.imshow(first_slice.values, origin="upper", vmin=0, vmax=40, cmap="Reds")
    plt.title("First Time Slice (time=0) [imshow fallback]")
    plt.colorbar()
    plt.show()

# Compute Monthly Average
Calculate the monthly average (mean over time) and visualize it.

In [None]:
dataset = reader.dataset
var_name = reader.variable_info["var_name"]
time_avg = dataset[var_name].mean(dim="time").clip(min=0)
print("\nTime-averaged (monthly mean) shape:", time_avg.shape)
time_avg.plot.imshow(vmin=0, vmax=40, cmap="RdYlBu_r", origin="upper")
plt.title("Monthly Mean (Time-Averaged) PM2.5")
plt.show()

# Extract Spatial Point
Extract the value at the center of the spatial domain using SpatialExtractor.

In [None]:
spatial_ext = SpatialExtractor(dataset, var_name)
x_center = float(
    reader.spatial_bounds["x_min"]
    + (reader.spatial_bounds["x_max"] - reader.spatial_bounds["x_min"]) / 2
)
y_center = float(
    reader.spatial_bounds["y_min"]
    + (reader.spatial_bounds["y_max"] - reader.spatial_bounds["y_min"]) / 2
)
try:
    point_result = spatial_ext.extract_points([(x_center, y_center)], method="nearest")
    print(
        f"\nExtracted value at domain center (x={x_center:.1f}, y={y_center:.1f}):\n",
        point_result,
    )
except KeyError as e:
    print(
        f"\n[SpatialExtractor] Extraction failed: {e}\nCheck if the coordinates are within the valid range and match the dataset's CRS."
    )

# Export Data to GeoTIFF
Demonstrate exporting the time-averaged map to GeoTIFF using DataExporter.

In [None]:
exporter = DataExporter(dataset, var_name)
exporter.to_geotiff(
    "/workspaces/dss-pollution-extraction/monthly_mean_pm25.tif",
    time_index=slice(None),
    aggregation_method="mean",
)
print("\n[DataExporter] Example: exporter.to_geotiff() can export data.")

# Custom Visualization
Use DataVisualizer to create a custom plot for a specific time index.

In [None]:
visualizer = DataVisualizer(dataset, var_name, reader.pollution_type)
fig = visualizer.plot_spatial_map(
    time_index=0, vmin=0, vmax=40, title="PM2.5 Day 1 (Visualizer)"
)
plt.show()
reader.close()