# Generate PC database
Here we compute the mean large-scale flow associated with each PF and project it onto the EOFs of vertical mass flux

In [14]:
%load_ext autoreload
%autoreload 2
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from IPython.display import clear_output
from src.configs import *
from src.plotting import *
from src.regions import *
from src.pcs import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
# User inputs
# Choose region and size of grid over which to sample ERA5
region = wmp_region()
grid_size = 2

In [27]:
pf_ds = load_pf_stats(region.name)
# restrict to tropical regions within -10 to 10
pf_ds = pf_ds.where(pf_ds.mean_latitude.abs() < 10).dropna()

In [28]:
# extract lat, lon, and time information
pf_lat = pf_ds.precipitation_centroid_latitude.values
pf_lon = pf_ds.precipitation_centroid_longitude.values
pf_time = pd.to_datetime(pf_ds.observation_time, format='%Y%m%d_%H:%M:%S')
pf_day = pf_time.dt.floor('D').values

In [29]:
# to compute PCs, need to project mean madd flux onto EOFs
eofs = load_era5_eofs(region.name)
era5_w = xr.open_dataset(merged_era5_var_file('W')).W
era5_w['time'] = era5_w['time'].dt.floor('D')
era5_w['lon'] = era5_w['lon'] - 180

In [30]:
def lat_lon_grid_slice(lat0, lon0, time0, radius_in_degs=5):
    slice_dict = {
        'lat': slice(lat0-radius_in_degs/2, lat0+radius_in_degs/2),
        'lon': slice(lon0-radius_in_degs/2, lon0+radius_in_degs/2),
        'time': time0
    }
    return slice_dict

grid_slices = [lat_lon_grid_slice(lat0, lon0, time0, radius_in_degs=5) for (lat0, lon0, time0) in zip(pf_lat, pf_lon, pf_day)]

In [31]:
pc_database_dict = init_pc_database_dict()
for i, gs in enumerate(grid_slices):
    clear_output()
    print(f'PF {i+1} of {len(grid_slices)} ')
    obs_w = era5_w.sel(gs)
    assert(obs_w.lat.size>0 and obs_w.lon.size>0)
    pf_mf = (-1/9.81)*obs_w.mean(('lat', 'lon'))
    pc_database_dict['pc1'].append((eofs.sel(mode=1) * pf_mf).sum().item())
    pc_database_dict['pc2'].append((eofs.sel(mode=2) * pf_mf).sum().item())
pc_database = pd.DataFrame.from_dict(pc_database_dict)
pf_ds.join(pc_database).to_csv(f'{project_root_dir()}/data/{region.name}.tropical_pf_pc_data_{grid_size}deg.csv')

PF 62085 of 62085 
