In [1]:
import os
import geopandas as gpd
from glob import glob
import numpy as np
import pandas as pd 
import rasterio 
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.mask import mask
import matplotlib.pyplot as plt
import shutil
import sys
import xarray as xr
from osgeo import gdal, osr

home = "/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling"
exec(open(os.path.join(home, "Scripts", "Functions.py")).read())

# python -m pip install "xarray[complete]"
# https://colab.research.google.com/drive/1B7gFBSr0eoZ5IbsA0lY8q3XL8n-3BOn4#scrollTo=1I_gUeqs5Sak

In [3]:
def climate_anomalies(VAR, agg):

    years =  range(1985, 2022) # grabs up to 2020 
    files = glob(os.path.join(home, "Data", "Climate", VAR + "*.nc"))
    # order the files by year 
    names = [os.path.basename(x) for x in files]
    file_year = [int(x[-13:-9]) for x in names]
    files = [x for _, x in sorted(zip(file_year, files))]   
    K = -1 
    for year in years: 
        K += 1 
        file = files[K:(K+2)]
        print(K)
        print(year)
        print(file)
        ds = xr.open_mfdataset(file)
        water_year = (ds.time.dt.month >= 9) + ds.time.dt.year
        ds.coords['water_year'] = water_year
        ds = ds[VAR].sel(time=(ds.water_year == year))
    
        if agg == 'mean':
            if K == 0: 
                ds_wy = ds.mean('time')
                ds_son = ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])).mean('time')
                ds_djf = ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])).mean('time')
                ds_mam = ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])).mean('time')
                ds_jja = ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])).mean('time')
                ds_mamjja = ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])).mean('time')
            else: 
                ds_wy = xr.concat([ds_wy, ds.mean('time')], dim='year')
                ds_son = xr.concat([ds_son, ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])).mean('time')], dim='year')
                ds_djf = xr.concat([ds_djf, ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])).mean('time')], dim='year')
                ds_mam = xr.concat([ds_mam, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])).mean('time')], dim='year')
                ds_jja = xr.concat([ds_jja, ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])).mean('time')], dim='year')
                ds_mamjja = xr.concat([ds_mamjja, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])).mean('time')], dim='year')
    
        if agg == 'sum':
            if K == 0: 
                ds_wy = ds.sum('time')
                ds_son = ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])).sum('time')
                ds_djf = ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])).sum('time')
                ds_mam = ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])).sum('time')
                ds_jja = ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])).sum('time')
                ds_mamjja = ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])).sum('time')
            else: 
                ds_wy = xr.concat([ds_wy, ds.sum('time')], dim='year')
                ds_son = xr.concat([ds_son, ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])).sum('time')], dim='year')
                ds_djf = xr.concat([ds_djf, ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])).sum('time')], dim='year')
                ds_mam = xr.concat([ds_mam, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])).sum('time')], dim='year')
                ds_jja = xr.concat([ds_jja, ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])).sum('time')], dim='year')
                ds_mamjja = xr.concat([ds_mamjja, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])).sum('time')], dim='year')

    # calculate anomalies and water year climatology 
    ds_wy_anomalies = (ds_wy - ds_wy.mean('year'))/ds_wy.std('year')
    ds_son_anomalies = (ds_son - ds_son.mean('year'))/ds_son.std('year')
    ds_djf_anomalies = (ds_djf - ds_djf.mean('year'))/ds_djf.std('year')
    ds_mam_anomalies = (ds_mam - ds_mam.mean('year'))/ds_mam.std('year')
    ds_jja_anomalies = (ds_jja - ds_jja.mean('year'))/ds_jja.std('year')
    ds_mamjja_anomalies = (ds_mamjja - ds_mamjja.mean('year'))/ds_mamjja.std('year')
    climatology = ds_wy.mean('year')

    # save the anomalies and water year climatology to a .nc
    ds_wy_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_WY.nc"))
    ds_son_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_SON.nc"))
    ds_djf_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_DJF.nc"))
    ds_mam_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_MAM.nc"))
    ds_jja_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_JJA.nc"))
    ds_mamjja_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", VAR + "_MAMJJA.nc"))
    climatology.to_netcdf(path = os.path.join(home, "Data", "Climate", "climatology", VAR + "_WY.nc"))

In [4]:
climate_anomalies("tmin", "mean")
climate_anomalies("tmax", "mean")
climate_anomalies("prcp", "sum")
climate_anomalies("vp", "mean")

0
1985
['/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1984subset.nc', '/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1985subset.nc']
1
1986
['/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1985subset.nc', '/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1986subset.nc']
2
1987
['/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1986subset.nc', '/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1987subset.nc']
3
1988
['/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1987subset.nc', '/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1988subset.nc']
4
1989
['/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/tmin_1988subset.nc', '/Volu

In [5]:
# calculate dry day anomalies 
def count_dry_days(x): 
    return len(x[np.where(x <= 0.25)])
    
files = glob(os.path.join(home, "Data", "Climate", "prcp*.nc"))
names = [os.path.basename(x) for x in files]
file_year = [int(x[-13:-9]) for x in names]
files = [x for _, x in sorted(zip(file_year, files))]   
years = range(1985, 2022) 
K = -1
for year in years: 
    K += 1 
    file = files[K:(K+2)]
    ds = xr.open_mfdataset(file).load()
    water_year = (ds.time.dt.month >= 9) + ds.time.dt.year
    ds.coords['water_year'] = water_year
    ds = ds['prcp'].sel(time=(ds.water_year == year))
    
    if K == 0: 
        ds_wy = xr.apply_ufunc(count_dry_days, ds, input_core_dims=[["time"]], vectorize=True)
        ds_son = xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])), input_core_dims=[["time"]], vectorize=True)
        ds_djf = xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])), input_core_dims=[["time"]], vectorize=True)
        ds_mam= xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])), input_core_dims=[["time"]], vectorize=True)
        ds_jja = xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])), input_core_dims=[["time"]], vectorize=True)        
        ds_mamjja = xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])), input_core_dims=[["time"]], vectorize=True)

    else:
        ds_wy = xr.concat([ds_wy, xr.apply_ufunc(count_dry_days, ds, input_core_dims=[["time"]], vectorize=True)], dim ='year')
        ds_son = xr.concat([ds_son, xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [9,10,11])), input_core_dims=[["time"]], vectorize=True)], dim='year')
        ds_djf = xr.concat([ds_djf, xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [12,1,2])), input_core_dims=[["time"]], vectorize=True)], dim='year')
        ds_mam = xr.concat([ds_mam, xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5])), input_core_dims=[["time"]], vectorize=True)], dim='year')
        ds_jja = xr.concat([ds_jja, xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [6,7,8])), input_core_dims=[["time"]], vectorize=True)], dim='year')
        ds_mamjja = xr.concat([ds_mamjja, xr.apply_ufunc(count_dry_days, ds.sel(time = np.isin(ds.time.dt.month, [3,4,5,6,7,8])), input_core_dims=[["time"]], vectorize=True)], dim='year')

# calculate anomalies and water year climatology 
ds_wy_anomalies = (ds_wy - ds_wy.mean('year'))/ds_wy.std('year')
ds_son_anomalies = (ds_son - ds_son.mean('year'))/ds_son.std('year')
ds_djf_anomalies = (ds_djf - ds_djf.mean('year'))/ds_djf.std('year')
ds_mam_anomalies = (ds_mam - ds_mam.mean('year'))/ds_mam.std('year')
ds_jja_anomalies = (ds_jja - ds_jja.mean('year'))/ds_jja.std('year')
ds_mamjja_anomalies = (ds_mamjja - ds_mamjja.mean('year'))/ds_mamjja.std('year')
climatology = ds_wy.mean('year')

# save the anomalies and water year climatology to a .nc
ds_wy_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_WY.nc"))
ds_son_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_SON.nc"))
ds_djf_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_DJF.nc"))
ds_mam_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_MAM.nc"))
ds_jja_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_JJA.nc"))
ds_mamjja_anomalies.to_netcdf(path = os.path.join(home, "Data", "Climate", "anomalies", "drydays" + "_MAMJJA.nc"))
climatology.to_netcdf(path = os.path.join(home, "Data", "Climate", "climatology", "drydays" + "_WY.nc"))

In [16]:
# convert nc files to tifs 
def nc_to_tif(nc, template):
    # nc is the netcdf filepath i want to write to tifs
    # ds is the template raster filepath that i will write the nc files to match 

    # template 
    ds = gdal.Open(template)
    wkt = ds.GetProjection()
    trans = ds.GetGeoTransform()
    cols = ds.RasterXSize
    rows = ds.RasterYSize   
    
    mync = xr.open_dataset(nc)
    myarray = mync.to_array()
    myarray = myarray.to_numpy()
    if len(myarray.shape)==4:
        myarray = myarray[0,:,:,:]
    
    K = 1984
    for time in range(0, myarray.shape[0]):
        K +=1
        myslice = myarray[time,:,:]

        # create the output image
        if len(range(0, myarray.shape[0])) == 1:
            breakdown_path = nc.split("/")
            outname = breakdown_path[len(breakdown_path)-2] + "_" + breakdown_path[len(breakdown_path)-1][:-3] + ".tif"
            outpath = os.path.join(home, "Data", "Climate", "tifs", outname)
        else:
            breakdown_path = nc.split("/")
            outname = breakdown_path[len(breakdown_path)-2] + "_" + breakdown_path[len(breakdown_path)-1][:-3] + "_" + str(K) + ".tif"
            outpath = os.path.join(home, "Data", "Climate", "tifs", outname)

        driver = ds.GetDriver()
        outDs = driver.Create(outpath, cols, rows, 1, gdal.GDT_Float32)
        outBand = outDs.GetRasterBand(1)
        outBand.WriteArray(myslice)
        outDs.SetGeoTransform(trans)    
        srs = osr.SpatialReference()
        srs.ImportFromWkt(wkt)
        outDs.SetProjection(srs.ExportToWkt())

        outDs = None

In [17]:
# make a raster template 
# template = rxr.open_rasterio('/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/prcp_1984subset.nc')
# template['prcp'].rio.to_raster('/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/template.tif')
template = "/Volumes/GoogleDrive/My Drive/Chapter2_mechanisms_forest_water_cycling/Data/Climate/template.tif"
climatology_nc = glob(os.path.join(home, "Data", "Climate", "climatology", "*.nc"))
anomalies_nc = glob(os.path.join(home, "Data", "Climate", "anomalies", "*.nc"))
all_nc = climatology_nc + anomalies_nc

for nc in all_nc:
    nc_to_tif(nc, template)

In [23]:
# make a shapefile of the permanent forest in the crs matching the climate data 
import rioxarray as rxr
import rasterio as rio

crs = rio.open(glob(os.path.join(home, "Data", "Climate", "tifs", "*" + 'tmin'  + "*.tif"))[0]).crs
shp = gpd.read_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest.shp"))
shp = shp.to_crs(crs)
shp.to_file(os.path.join(home, "Data", "Catchments", "Headwater", "headwater_catchments_perm_forest_climate.shp"))

  pd.Int64Index,
