In [13]:
import xarray as xr
import geopandas as gpd
import xagg as xa
import numpy as np
import os
import xhistogram.xarray as xhist

In [2]:
# Load temperature data using xarray
ds_tas = xr.open_dataset('~/Documents/tmp/climateestimate_xagg_tutorial/climate_data/tas_day_BEST_historical_station_19800101-19891231.nc')

In [3]:
# Load population data using xarray 
ds_pop = xr.open_dataset('~/Documents/tmp/climateestimate_xagg_tutorial/pcount/usap90ag.nc')

In [16]:
# Load county shapefiles using geopandas
gdf_counties = gpd.read_file('/Users/kevinschwarzwald/Documents/tmp/climateestimate_xagg_tutorial/geo_data/UScounties.shp')

In [5]:
wm = xa.pixel_overlaps(ds_tas,gdf_counties,weights=ds_pop.Population,subset_bbox=False)

creating polygons for each pixel...
lat/lon bounds not found in dataset; they will be created.
calculating overlaps between pixels and output polygons...
success!


In [6]:
wm = xa.pixel_overlaps(ds_tas,gdf_counties,subset_bbox=False)

creating polygons for each pixel...
lat/lon bounds not found in dataset; they will be created.
calculating overlaps between pixels and output polygons...
success!


In [7]:
aggregated = xa.aggregate(ds_tas,wm)

aggregating land_mask...
aggregating tas...
all variables aggregated to polygons!


In [23]:
models = ['CanESM2-LE','CESM1-CAM5-LE','CSIRO-Mk3-6-0-LE','EC-EARTH-LE','GFDL-CM3-LE','GFDL-ESM2M-LE','MPI-ESM-LE']
subset_params = {'hist':('19800101','20091231'),
                 'midc':('20400101','20691231'),
                 'endc':('20700101','20991231')}
proc_data_dir = '/Volumes/KSssd/climate_data/'
bins = np.append(np.append(-np.inf,(np.arange(10,91,10)+459.67)*5/9),np.inf)
fns = dict()
for mod in models:
    fn_out = proc_data_dir+'ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_'+mod+'proj_'+'-'.join(subset_params.keys())+'_CUSA.nc'
    
    if not os.path.exists(fn_out):
        fns[mod] = dict()
        
        hist_data = xr.open_dataset(proc_data_dir+
                                    'ERA-INTERIM/tas_day_ERA-INTERIM_historical_'+
                                    mod+'proj_'+'-'.join(subset_params['hist'])+'_CUSA.nc')
        
        tmp = xr.open_dataset(proc_data_dir+'ERA-INTERIM/tas_day_ERA-INTERIM_rcp85_'+mod+'proj_'+'-'.join(subset_params['midc'])+'_CUSA.nc')
        if 'record' in tmp.dims.keys():
            tmp = tmp.rename({'record':'run'})
        
        tas_binned = xr.Dataset({'hist':(['lat','lon','bin'],xhist.histogram(hist_data.tas,dim=['time'],bins=[bins]).values)},
                                coords={'lon' : (['lon'],hist_data.lon),
                                              'lat' : (['lat'],hist_data.lat),
                                              'bin' : (['bin'],np.arange(1,11)),
                                              'run' : (['run'],np.arange(0,tmp.dims['run']))})
        del hist_data, tmp

        for timeframe in [k for k in subset_params.keys() if k is not 'hist']:
            fns[mod][timeframe] = proc_data_dir+'ERA-INTERIM/tas_day_ERA-INTERIM_rcp85_'+mod+'proj_'+'-'.join(subset_params[timeframe])+'_CUSA.nc'

            tas_binned[timeframe] = xr.DataArray(data=xhist.histogram(xr.open_dataset(fns[mod][timeframe]).tas,dim=['time'],bins=[bins]).values,
                                                 dims=['lat','lon','run','bin'],
                                                 coords=[tas_binned.lat,tas_binned.lon,tas_binned.run,tas_binned.bin])

        # Change all 0s (where every bin is 0) back to nan
        (off_map,_)=xr.broadcast(tas_binned.sum('bin'),tas_binned)
        off_map['hist'] = off_map.hist.isel(run=0)
        tas_binned = tas_binned.where(off_map>0)   

        # Aggregate to county level using xagg
        # (using data_mpi_hist beacause it still has the lat/lon 
        # bounds. Same grid as tas_binned.)
        weightmap = xa.pixel_overlaps(tas_binned,gdf_counties,subset_bbox=False)
        tbins_agg = xa.aggregate(tas_binned,weightmap)
        # Make dataset
        ds_tbins_agg = tbins_agg.to_dataset(loc_dim='county')

        # Save
        ds_tbins_agg.to_netcdf(fn_out)
        print(fn_out+' saved!')
    else:
        print(fn_out+' already exists!')

/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_CanESM2-LEproj_hist-midc-endc_CUSA.nc already exists!
/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_CESM1-CAM5-LEproj_hist-midc-endc_CUSA.nc already exists!
/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_CSIRO-Mk3-6-0-LEproj_hist-midc-endc_CUSA.nc already exists!
/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_EC-EARTH-LEproj_hist-midc-endc_CUSA.nc already exists!
/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_GFDL-CM3-LEproj_hist-midc-endc_CUSA.nc already exists!
creating polygons for each pixel...
lat/lon bounds not found in dataset; they will be created.
calculating overlaps between pixels and output polygons...
success!
aggregating hist...
aggregating midc...
aggregating endc...
all variables aggregated to polygons!
/Volumes/KSssd/climate_data/ERA-INTERIM/tbin_day_ERA-INTERIM_hist-rcp85_GFDL-ESM2M-LEproj_hist-mid

In [22]:
tmp