# Master notebook

## Imports

In [1]:
import xarray as xr
import glob
import datetime

import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import numpy as np

from google.cloud import storage
import gcsfs
import zarr

import xskillscore as xs
import pandas as pd

import os

import re
import skill_metrics as sm

import matplotlib.lines as mlines

## Pangu Processing

In [2]:
# Accessing weatherbench data
fs = gcsfs.GCSFileSystem(anon=True)  # Use anon=True for public data
store = gcsfs.GCSMap(root='gs://weatherbench2/datasets/pangu_hres_init/2022_0012_0p25.zarr', gcs=fs, check=False)
pangu_import = xr.open_zarr(store, consolidated=True)

# Verify the updated dataset
pangu_import

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.94 GiB 3.96 MiB Shape (730, 40, 721, 1440) (1, 1, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",730  1  1440  721  40,

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.94 GiB 3.96 MiB Shape (730, 40, 721, 1440) (1, 1, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",730  1  1440  721  40,

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.94 GiB 3.96 MiB Shape (730, 40, 721, 1440) (1, 1, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",730  1  1440  721  40,

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.94 GiB 3.96 MiB Shape (730, 40, 721, 1440) (1, 1, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",730  1  1440  721  40,

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 112.94 GiB 3.96 MiB Shape (730, 40, 721, 1440) (1, 1, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",730  1  1440  721  40,

Unnamed: 0,Array,Chunk
Bytes,112.94 GiB,3.96 MiB
Shape,"(730, 40, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.43 TiB 51.49 MiB Shape (730, 40, 13, 721, 1440) (1, 1, 13, 721, 1440) Dask graph 29200 chunks in 2 graph layers Data type float32 numpy.ndarray",40  730  1440  721  13,

Unnamed: 0,Array,Chunk
Bytes,1.43 TiB,51.49 MiB
Shape,"(730, 40, 13, 721, 1440)","(1, 1, 13, 721, 1440)"
Dask graph,29200 chunks in 2 graph layers,29200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [3]:
pangu = pangu_import
# Renaming coordinates
pangu = pangu.rename({'latitude': 'lat', 'longitude': 'lon'})

# Slicing the data for the variables required
pangu = pangu[[  
                                # 'geopotential_at_surface',
                                # 'land_sea_mask',                 
                                '2m_temperature',                
                                'mean_sea_level_pressure',       
                                '10m_v_component_of_wind',       
                                '10m_u_component_of_wind',       
                                # 'total_precipitation_6hr',       
                                # 'toa_incident_solar_radiation',  ----> to fix  
                                # 'temperature',                   
                                # 'geopotential',                  
                                # 'u_component_of_wind',           
                                # 'v_component_of_wind',          
                                # 'vertical_velocity',             
                                # 'specific_humidity'
                                ]]

# Slice for 2022 and 4 days ahead
pangu = pangu.sel(time='2022').isel(prediction_timedelta=range(0,16))  

# Slice for 1 forecast per day
pangu = pangu.sel(time=pangu['time'].dt.hour == 0)

# Slice for dates in SYNOP (2022-01-12 onwards)
pangu = pangu.sel(time=slice( '2022-01-12','2022-12-31T00'))

# Unit conversions to match synop
pangu['2m_temperature'] = pangu['2m_temperature'] - 273.15
pangu['mean_sea_level_pressure'] = pangu['mean_sea_level_pressure'] / 100

# Rename variables to inlcude pangu specification
pangu = pangu.rename_vars({
    '2m_temperature': '2m_temperature_pangu',                        # 2m temperature
    'mean_sea_level_pressure': 'mean_sea_level_pressure_pangu',               # Mean sea level pressure
    '10m_v_component_of_wind': '10m_v_component_of_wind_pangu',                            # 10m wind speed, to be converted
    '10m_u_component_of_wind': '10m_u_component_of_wind_pangu',                        # 10m wind direction, to be converted
    # 'total_precipitation_6hr': 'total_precipitation_6hr_pangu',              # Rainfall in last 6 Hours
    # Add other variable mappings if necessary
})

# Verify the updated dataset
pangu

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 21.91 GiB 3.96 MiB Shape (354, 16, 721, 1440) (1, 1, 721, 1440) Dask graph 5664 chunks in 6 graph layers Data type float32 numpy.ndarray",354  1  1440  721  16,

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 21.91 GiB 3.96 MiB Shape (354, 16, 721, 1440) (1, 1, 721, 1440) Dask graph 5664 chunks in 6 graph layers Data type float32 numpy.ndarray",354  1  1440  721  16,

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 5 graph layers,5664 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 21.91 GiB 3.96 MiB Shape (354, 16, 721, 1440) (1, 1, 721, 1440) Dask graph 5664 chunks in 5 graph layers Data type float32 numpy.ndarray",354  1  1440  721  16,

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 5 graph layers,5664 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 5 graph layers,5664 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 21.91 GiB 3.96 MiB Shape (354, 16, 721, 1440) (1, 1, 721, 1440) Dask graph 5664 chunks in 5 graph layers Data type float32 numpy.ndarray",354  1  1440  721  16,

Unnamed: 0,Array,Chunk
Bytes,21.91 GiB,3.96 MiB
Shape,"(354, 16, 721, 1440)","(1, 1, 721, 1440)"
Dask graph,5664 chunks in 5 graph layers,5664 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
pangu.sel(lat=slice(55.75, 50.5),lon=slice(2.25, 7.5))

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 7 graph layers,5664 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.46 MiB 1.89 kiB Shape (354, 16, 22, 22) (1, 1, 22, 22) Dask graph 5664 chunks in 7 graph layers Data type float32 numpy.ndarray",354  1  22  22  16,

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 7 graph layers,5664 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 7 graph layers,5664 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.46 MiB 1.89 kiB Shape (354, 16, 22, 22) (1, 1, 22, 22) Dask graph 5664 chunks in 7 graph layers Data type float32 numpy.ndarray",354  1  22  22  16,

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 7 graph layers,5664 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.46 MiB 1.89 kiB Shape (354, 16, 22, 22) (1, 1, 22, 22) Dask graph 5664 chunks in 6 graph layers Data type float32 numpy.ndarray",354  1  22  22  16,

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.46 MiB 1.89 kiB Shape (354, 16, 22, 22) (1, 1, 22, 22) Dask graph 5664 chunks in 6 graph layers Data type float32 numpy.ndarray",354  1  22  22  16,

Unnamed: 0,Array,Chunk
Bytes,10.46 MiB,1.89 kiB
Shape,"(354, 16, 22, 22)","(1, 1, 22, 22)"
Dask graph,5664 chunks in 6 graph layers,5664 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [15]:
pangu.sel(lat=slice(55.75, 50.5),lon=slice(2.25, 7.5)).to_netcdf('pangu_2022.nc')

### Pangu loading and slicing

### Pangu interpolating to station locations

In [7]:
results = xr.open_dataset('results_2022_ws_gcshifted_meso_gc.nc')

In [29]:

# Extract latitudes and longitudes for all stations
station_names = results['station'].values   
latitudes = results['lat'].values
longitudes = results['lon'].values

# Combine lat and lon into a pandas DataFrame
station_locations = pd.DataFrame({'station': station_names, 'lat': latitudes, 'lon': longitudes})

station_locations.reset_index(inplace=True)

# Interpolate the global dataset using lat/lon from station_locations
pangu_interp = pangu.interp(lat=("station", station_locations['lat']),
                                  lon=("station", station_locations['lon']))

# # Assign station codes from synop as coordinates to the interpolated dataset
pangu_interp = pangu_interp.assign_coords(station=("station", station_locations['station'].values))

# # Add time dimension for merging
pangu_interp = pangu_interp.assign_coords(valid_time=pangu_interp['time'] + pangu_interp['prediction_timedelta'])

pangu = pangu_interp

# add wind speed to pangu
pangu['10m_wind_speed_pangu'] = np.sqrt(pangu['10m_u_component_of_wind_pangu']**2 + pangu['10m_v_component_of_wind_pangu']**2)

# Display the interpolated dataset
pangu.to_netcdf('pangu_stations_2022.nc')


### PROPER PANGU MERGING

In [30]:
results = xr.open_dataset('results_2022_ws_gcshifted_meso_gc.nc').compute()
pangu = xr.open_dataset('pangu_stations_2022.nc').compute()

# list all variables to add to results
variables_to_add = [
    '2m_temperature_pangu',
    'mean_sea_level_pressure_pangu',
    '10m_v_component_of_wind_pangu',
    '10m_u_component_of_wind_pangu',
    '10m_wind_speed_pangu',
    # 'total_precipitation_6hr_pangu'
]

# # Create a new dataset to hold the '_pangu' variables
# new_vars = {}
# for var_name in variables_to_add:
#     new_var_name = var_name
#     new_vars[new_var_name] = aligned_pangu[var_name].broadcast_like(results['2m_temperature_pangu'])

# # Create a new dataset from the new_vars dictionary
# pangu_ds = xr.Dataset(new_vars)

# Merge the new variables with the original forecast dataset
results_merged = xr.merge([results, pangu])

results_merged

# # compute the dataset and write to file
# # results.to_netcdf('results_2022_ws.nc')

# results_merged.to_netcdf('results_2022_ws_gcshifted_pangu.nc')

In [32]:
# insert NaN values for total_precipitation_6hr_pangu to match the length of the other variables for all times, stations and prediction_timedelta
results_merged['total_precipitation_6hr_pangu'] = np.nan * results_merged['2m_temperature_pangu']
results_merged

In [33]:
results_merged.to_netcdf('results_2022_ws_gcshifted_meso_gc_pangu.nc')