In [2]:
import xarray as xr
import src.xarray_utils as xu
import numpy as np
import xesmf

In [5]:
ds = xr.open_dataset('/home/ftei-dsw/data/weather-gan/datasets/temperature_daily_gfdl_historical.nc')
ds.temperature.min(), ds.temperature.max()

(<xarray.DataArray 'temperature' ()>
 array(187.76097107),
 <xarray.DataArray 'temperature' ()>
 array(319.63687134))

In [5]:
def preprocess_raw_data(variable_name: str,
                        rename: dict,
                        fname_in: str,
                        fname_out_daily: str,
                        fname_out_monthly: str, 
                        drop=None):

    inv_rename = {v: k for k, v in rename.items()}
    data = xu.load(fname_in,
                   rename=rename,
                   extract=inv_rename[variable_name],
                   drop=drop,
                   multi_files=True)
    
    longitudes = np.arange(0,360,2.5)
    latitudes = np.arange(-90,90+2.5,2.5)
    
    data_regridded = xu.regrid(data, latitudes, longitudes, periodic=True)
    data_regridded_shifted = xu.shift_longitudes(data_regridded)
    
    xu.write_dataset(data_regridded_shifted, fname_out_daily)
    
    data_regridded_shifted_monthly = data_regridded_shifted.resample(time="M").mean()
    
    xu.write_dataset(data_regridded_shifted_monthly, fname_out_monthly)

In [6]:
""" preprocess precipitation data """

fname_in = '/home/ftei-dsw/data/weather-gan/datasets/pr/v20190726/*.nc'
fname_out_daily = '/home/ftei-dsw/data/weather-gan/datasets/precipitation_daily_gfdl_historical.nc'
fname_out_monthly = '/home/ftei-dsw/data/weather-gan/datasets/precipitation_monthly_gfdl_historical.nc'

variable_name = 'precipitation'

rename = {
    'pr': 'precipitation',
    'lat': 'latitude',
    'lon': 'longitude',
         }

preprocess_raw_data(variable_name,
                    rename,
                    fname_in,
                    fname_out_daily,
                    fname_out_monthly)


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


writing to /home/ftei-dsw/data/weather-gan/datasets/precipitation_daily_gfdl_historical.nc
[########################################] | 100% Completed | 51.6s
writing to /home/ftei-dsw/data/weather-gan/datasets/precipitation_monthly_gfdl_historical.nc
[########################################] | 100% Completed | 52.6s


In [10]:
""" preprocess temperature data """

fname_in = '/home/ftei-dsw/data/weather-gan/datasets/tas/v20190726/*.nc'
fname_out_daily = '/home/ftei-dsw/data/weather-gan/datasets/temperature_daily_gfdl_historical.nc'
fname_out_monthly = '/home/ftei-dsw/data/weather-gan/datasets/temperature_monthly_gfdl_historical.nc'

variable_name = 'temperature'


rename = {
    'tas': 'temperature',
    'lat': 'latitude',
    'lon': 'longitude',
         }

preprocess_raw_data(variable_name,
                    rename,
                    fname_in,
                    fname_out_daily,
                    fname_out_monthly,
                    drop='height')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


writing to /home/ftei-dsw/data/weather-gan/datasets/temperature_daily_gfdl_historical.nc
[########################################] | 100% Completed | 32.0s
writing to /home/ftei-dsw/data/weather-gan/datasets/temperature_monthly_gfdl_historical.nc
[########################################] | 100% Completed | 29.9s


In [7]:
fname_out_daily = '/home/ftei-dsw/data/weather-gan/datasets/temperature_daily_gfdl_historical.nc'
fname_out_monthly = '/home/ftei-dsw/data/weather-gan/datasets/temperature_monthly_gfdl_historical.nc'

daily = xr.open_dataset(fname_out_daily)
monthly = xr.open_dataset(fname_out_monthly)

In [9]:
daily, monthly

(<xarray.Dataset>
 Dimensions:      (time: 60225, latitude: 73, longitude: 144)
 Coordinates:
   * time         (time) object 1850-01-01 12:00:00 ... 2014-12-31 12:00:00
   * longitude    (longitude) float64 -180.0 -177.5 -175.0 ... 172.5 175.0 177.5
   * latitude     (latitude) float64 -90.0 -87.5 -85.0 -82.5 ... 85.0 87.5 90.0
 Data variables:
     temperature  (time, latitude, longitude) float32 ...
 Attributes:
     regrid_method:  bilinear,
 <xarray.Dataset>
 Dimensions:      (time: 1980, latitude: 73, longitude: 144)
 Coordinates:
   * time         (time) object 1850-01-31 00:00:00 ... 2014-12-31 00:00:00
   * longitude    (longitude) float64 -180.0 -177.5 -175.0 ... 172.5 175.0 177.5
   * latitude     (latitude) float64 -90.0 -87.5 -85.0 -82.5 ... 85.0 87.5 90.0
 Data variables:
     temperature  (time, latitude, longitude) float32 ...)

In [21]:
def random_day(monthly_times, index) -> str:
    month = str(monthly_times.isel(time=index)['time.month'].values) 
    year = str(monthly_times.isel(time=index)['time.year'].values)
    random_day = str(np.random.randint(1,30,size=1)[0])
    date_str = f'{year}-{month.zfill(2)}-{random_day.zfill(2)}'
    return date_str

def month_from_daily(daily_times, index) -> str:
    month = str(daily_times.isel(time=index)['time.month'].values) 
    year = str(daily_times.isel(time=index)['time.year'].values)
    date_str = f'{year}-{month.zfill(2)}'
    return date_str

date = random_month(daily.time, 6_000)


monthly.sel(time=date)

cftime.datetime(1858, 5, 1, 0, 0, 0, 0, calendar='standard', has_year_zero=False)

In [103]:
print(random_day)

6


In [58]:
daily.sel(time='1858-05')

In [25]:
import cftime

In [None]:
cftime