In [31]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import ma
import xarray as xr
import geopandas as gpd
import pandas as pd
# requires cartopy to be installed
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader

import cartopy.crs as ccrs # for projection
import cartopy.feature as cfeature # for map features
from cartopy.util import add_cyclic_point
from matplotlib.axes import Axes
from cartopy.mpl.geoaxes import GeoAxes
#from matplotlib.colors import TwoSlopeNorm
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import xesmf as xe 
import sys
import os
import dask

In [32]:
models = ["BNU-ESM", "CNRM-CM5", "CSIRO-Mk3-6-0", "CanESM2", "GFDL-ESM2G", "GFDL-ESM2M", "HadGEM2-CC365", "HadGEM2-ES365", "IPSL-CM5A-LR",
         "IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM-CHEM", "MIROC-ESM", "MIROC5", "bcc-csm1-1-m", "MRI-CGCM3", "bcc-csm1-1", "inmcm4"]


In [18]:
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(cores=3,
                       processes=3,
                       memory="100GB",
                       walltime="04:00:00",
                       scheduler_options={'host': '172.22.179.3:7662'}) # Change the last 4 numbers here to something else between 7000-8000

cluster.scale(18)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38237 instead


ValueError: cannot get address of non-running Server

In [None]:
from dask.distributed import Client

client = Client(cluster)

client

In [33]:
# this is the norcal lon/lat comb
_2090_array = []
def _2090proj(model, start_day, end_day, lon_max, lon_min, lat_max, lat_min):
    filein = "/data/keeling/a/davidcl2/d/MACA/FWI_RHmin/rcp45/out/macav2metdata_fwi_" + model + "_r1i1p1_rcp45_2086_2090_CONUS_daily.nc"
    current_model = xr.open_dataset(filein)
    day_ds = current_model.apply(lambda x: x.where((x['time.dayofyear'] >= start_day) & (x['time.dayofyear'] <= end_day)))
    averaged_yearly = day_ds.groupby('time.year').mean(dim='time')
    current_model_2090 = averaged_yearly.sel(year=slice('2090', '2090')).FWI
    selected_ds = current_model_2090.dropna(dim='year', how='all')
    #annual_mean = selected_ds.groupby('time.year').mean(dim='time')
    selected_ds["lon"] = np.where(selected_ds["lon"] > 180, selected_ds["lon"] - 360, selected_ds["lon"])
    selected_ds = selected_ds.sortby("lon")
    
    annual_mean_boundaries = selected_ds.where((selected_ds.lon >= lon_min) & (selected_ds.lon <= lon_max) & (selected_ds.lat >= lat_min) & (selected_ds.lat <= lat_max), drop=True)
    annual_mean_boundaries = annual_mean_boundaries.mean(dim=['lat'])
    annual_mean_boundaries = annual_mean_boundaries.mean(dim=['lon'])
    _2090_array.append(annual_mean_boundaries)
    return annual_mean_boundaries
    

In [6]:
delayed = []
for model in models: # only do for 5 models
    out = dask.delayed(_2090proj)(model=model, start_day=152, end_day=304, lon_max=-116.5, lon_min=-118.5, lat_max = 36, lat_min = 34)
    delayed.append(out)


In [7]:
results = dask.compute(*delayed)  # Specify distributed scheduler

In [34]:
for model in models:
    _2090proj(model=model, start_day=152, end_day=304, lon_max=-121, lon_min=-123, lat_max = 40.5, lat_min = 38.5)

In [35]:
np.mean(_2090_array)

48.612316

In [36]:
# socal 4.5 2090

In [37]:
np.std(_2090_array)

3.9526732