In [7]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [8]:
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import mpl_toolkits
import glob
import pandas as pd
import xarray as xr
import cartopy
from cartopy.util import add_cyclic_point
import cartopy.crs as ccrs
import scipy.stats
import scipy as sp
from scipy import signal
import os
from eofs.xarray import Eof
from eofs.multivariate.standard import MultivariateEof

In [9]:
# first load your data

dirn = '/global/scratch/medhap02/ISMData/'
filen = 'chirps-v2.0.monthly.nc'
datgpcp = xr.open_mfdataset(dirn+filen,combine='by_coords')
# precip units are mm/day:
prgpcp = datgpcp['precip']
prgpcp = prgpcp.rename({
    "latitude": "lat",
    "longitude": "lon"
})
print(prgpcp)
# select years of interest (dataset only goes thru march 2019):
prgpcp = prgpcp.sel(time=slice('1981-01-01','2018-12-31'))

<xarray.DataArray 'precip' (time: 473, lat: 2000, lon: 7200)>
dask.array<shape=(473, 2000, 7200), dtype=float32, chunksize=(473, 2000, 7200)>
Coordinates:
  * lat      (lat) float32 -49.975 -49.925 -49.875 ... 49.924995 49.975
  * lon      (lon) float32 -179.975 -179.925 -179.875 ... 179.92499 179.975
  * time     (time) datetime64[ns] 1981-01-01 1981-02-01 ... 2020-05-01
Attributes:
    units:               mm/month
    standard_name:       convective precipitation rate
    long_name:           Climate Hazards group InfraRed Precipitation with St...
    time_step:           month
    geostatial_lat_min:  -50.0
    geostatial_lat_max:  50.0
    geostatial_lon_min:  -180.0
    geostatial_lon_max:  180.0


In [10]:
# take summer mean:
def is_summer(month):
    return (month >= 6) & (month <= 9)
prgpcpSeas = prgpcp.sel(time=is_summer(prgpcp['time.month']))#.groupby('time.year').mean())

prgpcpSeas = prgpcpSeas.groupby('time.year',restore_coord_dims = True).mean("time")
print(prgpcpSeas.dims)

# rechunk data for efficiency:
prgpcpSeas.data = prgpcpSeas.data.rechunk(-1)

# eliminate any NaNs:
prgpcpSeas = prgpcpSeas.ffill('lon').bfill('lon').ffill('lat').bfill('lat')
#prgpcpSeas.load()
print(prgpcpSeas)

('year', 'lat', 'lon')
<xarray.DataArray 'precip' (year: 38, lat: 2000, lon: 7200)>
dask.array<shape=(38, 2000, 7200), dtype=float32, chunksize=(38, 2000, 7200)>
Coordinates:
  * lat      (lat) float32 -49.975 -49.925 -49.875 ... 49.924995 49.975
  * lon      (lon) float32 -179.975 -179.925 -179.875 ... 179.92499 179.975
  * year     (year) int64 1981 1982 1983 1984 1985 ... 2014 2015 2016 2017 2018


In [21]:
#cat /proc/sys/vm/overcommit_memory
#echo 1 > /proc/sys/vm/overcommit_memory

latlim1 = 5
latlim2 = 6#35
lonlim1 = 5#65
lonlim2 = 6#95

# take seasonal mean and select latitudes to use in analysis:
tmppr = prgpcpSeas.sel(lat=slice(latlim1,latlim2),lon=slice(lonlim1,lonlim2))
# subtract time mean from each data point (EOFs work with anomalies)
tmppr = tmppr - tmppr.mean(dim='year')

# need to mark dimension along which we want to take EOFs:
#tmppr = tmppr.assign_coords(year=tmppr.year)
tmppr.year.attrs['axis'] = 'T'

# weight variables by the area of the lat-lon grid cell (which is proportional to cosine of latitude):
coslat = np.cos(tmppr.lat*np.pi/180)
tmppr2, coslat2 = xr.broadcast(tmppr, coslat)
print(tmppr)
# find the EOFs
solver_prfull = Eof(tmppr,weights=coslat2,center=True)
pcs_prfull = solver_prfull.pcs()
eofs_prfull = solver_prfull.eofs()
varfrac_prfull = solver_prfull.varianceFraction()

#473, 2000, 7200

<xarray.DataArray 'precip' (year: 38, lat: 20, lon: 20)>
dask.array<shape=(38, 20, 20), dtype=float32, chunksize=(38, 20, 20)>
Coordinates:
  * lat      (lat) float32 5.0249977 5.074997 5.125 ... 5.875 5.924999 5.9749985
  * lon      (lon) float32 5.024994 5.074997 5.125 ... 5.875 5.925003 5.974991
  * year     (year) int64 1981 1982 1983 1984 1985 ... 2014 2015 2016 2017 2018


MemoryError: Unable to allocate array with shape (473, 2000, 7200) and data type float32

In [6]:
tmppr
#user interpolate to reduce resolution, .2

<xarray.DataArray 'precip' (year: 38, lat: 600, lon: 600)>
dask.array<shape=(38, 600, 600), dtype=float32, chunksize=(38, 600, 600)>
Coordinates:
  * lat      (lat) float32 5.0249977 5.074997 5.125 ... 34.875 34.924995 34.975
  * lon      (lon) float32 65.024994 65.075 65.125 ... 94.875 94.92499 94.975006
  * year     (year) int64 1981 1982 1983 1984 1985 ... 2014 2015 2016 2017 2018

In [None]:
#(5-35n, 65-95e)
latlim1 = 5
latlim2 = 35
lonlim1 = 65
lonlim2 = 95

# take seasonal mean and select latitudes to use in analysis:
tmppr = prgpcpSeas.sel(lat=slice(latlim1,latlim2), lon=slice(lonlim1,lonlim2))
# subtract time mean from each data point (EOFs work with anomalies)
tmppr = tmppr - tmppr.mean(dim='year')

# need to mark dimension along which we want to take EOFs:
#tmppr = tmppr.assign_coords(year=tmppr.year)
tmppr.year.attrs['axis'] = 'T'

# weight variables by the area of the lat-lon grid cell (which is proportional to cosine of latitude):
coslat = np.cos(tmppr.lat*np.pi/180)
coslat2 = xr.broadcast(tmppr, coslat)

# find the EOFs
solver_pr = Eof(tmppr,weights=coslat2,center=True)
pcs_pr = solver_pr.pcs()
eofs_pr = solver_pr.eofs()
varfrac_pr = solver_pr.varianceFraction()

# plot results

In [None]:
varfrac_prfull.plot(marker='*',label='pr')
print('sum of variance in first 4 pr EOFs = ', varfrac_prfull[0:4].sum().data)
plt.legend()
print('total anomaly variance pr =', solver_prfull.totalAnomalyVariance())

In [None]:
plt.figure(figsize=(18,8))
for i in np.arange(0,4):
    ax = plt.subplot(2,2,i+1,projection=ccrs.PlateCarree());
    eofs_prfull[i,:,:].plot.contourf(ax=ax,levels=12,extend='both',cmap=cm.BrBG)
    ax.coastlines()
    gl = ax.gridlines(draw_labels=False)
    ax.set_extent([-180,180,-90,90],crs=ccrs.PlateCarree())

In [None]:
pcs_prfull.sel(mode=0).plot(marker='.')
plt.grid()

In [None]:
varfrac_pr.plot(marker='*',label='pr')
print('sum of variance in first 4 pr EOFs = ', varfrac_pr[0:4].sum().data)
plt.legend()
print('total anomaly variance pr =', solver_pr.totalAnomalyVariance())

In [None]:
plt.figure(figsize=(18,8))
for i in np.arange(0,4):
    ax = plt.subplot(2,2,i+1,projection=ccrs.PlateCarree());
    eofs_pr[i,:,:].plot.contourf(ax=ax,levels=12,extend='both',cmap=cm.BrBG)
    ax.coastlines()
    gl = ax.gridlines(draw_labels=False)
    ax.set_extent([-180,180,-90,90],crs=ccrs.PlateCarree())

In [None]:
pcs_pr.sel(mode=0).plot(marker='.')
plt.grid()

In [None]:
 # Copyright (c) 2020 Medha Palavalli.