In [58]:
import os
import xarray as xr
from glob import glob
import numpy as np
import pandas as pd

#import netcdf4

In [44]:
models = [
          'ACCESS1-0', 'ACCESS1-3', 'CCSM4', 'CESM1-BGC', 'CESM1-CAM5', 'CMCC-CM', 'CMCC-CMS', 'CSIRO-Mk3-6-0', 
          'CanESM2', 'CNRM-CM5', 'EC-EARTH', 'FGOALS-g2', 'GFDL-CM3', 'GFDL-ESM2G', 'GFDL-ESM2M', 'GISS-E2-H',
          'GISS-E2-R', 'HadGEM2-AO', 'HadGEM2-CC', 'HadGEM2-ES', 'IPSL-CM5A-LR', 'IPSL-CM5A-MR', 'MIROC-ESM',
          'MIROC-ESM-CHEM', 'MIROC5', 'MPI-ESM-LR', 'MPI-ESM-MR', 'MRI-CGCM3', 'NorESM1-M', 'bcc-csm1-1',
          'bcc-csm1-1-m', 'inmcm4'
         ]

ext = 'nc'

In [45]:
data_dir = 'data/tasmax'
file_list = glob(os.path.join(data_dir, '*.nc'))
print(file_list)

['data/tasmax/tasmax_CSIRO-Mk3-6-0_2006.nc', 'data/tasmax/tasmax_FGOALS-g2_2007.nc', 'data/tasmax/tasmax_GISS-E2-R_2007.nc', 'data/tasmax/tasmax_ACCESS1-0_2006.nc', 'data/tasmax/tasmax_HadGEM2-AO_2008.nc', 'data/tasmax/tasmax_CMCC-CMS_2007.nc', 'data/tasmax/tasmax_ACCESS1-0_2008.nc', 'data/tasmax/tasmax_CanESM2_2008.nc', 'data/tasmax/tasmax_HadGEM2-CC_2008.nc', 'data/tasmax/tasmax_IPSL-CM5A-MR_2007.nc', 'data/tasmax/tasmax_inmcm4_2006.nc', 'data/tasmax/tasmax_EC-EARTH_2008.nc', 'data/tasmax/tasmax_MPI-ESM-MR_2008.nc', 'data/tasmax/tasmax_GFDL-ESM2G_2007.nc', 'data/tasmax/tasmax_CanESM2_2007.nc', 'data/tasmax/tasmax_CMCC-CM_2006.nc', 'data/tasmax/tasmax_ACCESS1-0_2007.nc', 'data/tasmax/tasmax_MRI-CGCM3_2007.nc', 'data/tasmax/tasmax_MPI-ESM-MR_2007.nc', 'data/tasmax/tasmax_inmcm4_2007.nc', 'data/tasmax/tasmax_MRI-CGCM3_2006.nc', 'data/tasmax/tasmax_CCSM4_2007.nc', 'data/tasmax/tasmax_MRI-CGCM3_2008.nc', 'data/tasmax/tasmax_MPI-ESM-LR_2008.nc', 'data/tasmax/tasmax_CMCC-CM_2008.nc', 'data/

In [47]:
def calc_daily_avg():
    df_list = []
    #for file in file_list[:2]:
    for file in file_list:
        ds = xr.open_dataset(file)
        tasmax = ds['tasmax']
        data = tasmax.sel(lat=38.59375, lon=(360-121.46875), method='nearest')
        df = pd.DataFrame(data.to_pandas())
        model = file.split('_')[1]
        df['model'] = model
        df['year'] = model
        ds.close()
        df_list.append(df) 
    return pd.concat(df_list)

In [48]:
daily_df = calc_daily_avg()

In [49]:
daily_df['year'] = daily_df.index.year
daily_df['month'] = daily_df.index.month
daily_df['wateryear'] = np.where(daily_df['month'] > 9, daily_df['year'] + 1, daily_df['year'])
daily_df

Unnamed: 0_level_0,0,model,year,month,wateryear
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-01-01 12:00:00,285.333008,CSIRO-Mk3-6-0,2006,1,2006
2006-01-02 12:00:00,279.371338,CSIRO-Mk3-6-0,2006,1,2006
2006-01-03 12:00:00,281.004456,CSIRO-Mk3-6-0,2006,1,2006
2006-01-04 12:00:00,281.116974,CSIRO-Mk3-6-0,2006,1,2006
2006-01-05 12:00:00,280.612762,CSIRO-Mk3-6-0,2006,1,2006
2006-01-06 12:00:00,279.423981,CSIRO-Mk3-6-0,2006,1,2006
2006-01-07 12:00:00,277.866180,CSIRO-Mk3-6-0,2006,1,2006
2006-01-08 12:00:00,284.577942,CSIRO-Mk3-6-0,2006,1,2006
2006-01-09 12:00:00,283.748352,CSIRO-Mk3-6-0,2006,1,2006
2006-01-10 12:00:00,282.931976,CSIRO-Mk3-6-0,2006,1,2006


In [50]:
calendar_year = daily_df.groupby(['model', 'year'])[0].mean()
calendar_year

model           year
ACCESS1-0       2006    298.407898
                2007    298.003754
                2008    298.575348
ACCESS1-3       2006    297.063568
                2007    297.719910
                2008    297.048340
CCSM4           2006    297.452179
                2007    297.840363
                2008    298.055573
CESM1-BGC       2006    297.817749
                2007    297.679718
                2008    298.143799
CESM1-CAM5      2006    297.290619
                2007    297.972290
                2008    297.147278
CMCC-CM         2006    296.831451
                2007    296.548523
                2008    295.602234
CMCC-CMS        2006    297.202698
                2007    298.586212
                2008    298.263580
CNRM-CM5        2006    297.332214
                2007    298.116241
                2008    298.264252
CSIRO-Mk3-6-0   2006    297.712738
                2007    298.335052
                2008    297.697662
CanESM2         2006    297.114868

In [57]:
print('Calendar year min from daily data')
print(calendar_year.groupby('year').min())
print('Calendar year max from daily data')
print(calendar_year.groupby('year').max())

Calendar year min from daily data
year
2006    296.052460
2007    295.870850
2008    295.602234
Name: 0, dtype: float32
Calendar year max from daily data
year
2006    299.512024
2007    298.839325
2008    298.575348
Name: 0, dtype: float32


In [52]:
water_year = daily_df.groupby(['model', 'wateryear'])[0].mean()
water_year

model         wateryear
ACCESS1-0     2006         300.371246
              2007         298.006683
              2008         298.586212
              2009         292.526978
ACCESS1-3     2006         298.211853
              2007         298.009979
              2008         297.197693
              2009         291.911224
CCSM4         2006         299.268341
              2007         297.519501
              2008         298.413727
              2009         291.911102
CESM1-BGC     2006         300.110291
              2007         296.953766
              2008         298.442902
              2009         292.705109
CESM1-CAM5    2006         298.988007
              2007         297.849518
              2008         297.154510
              2009         292.712067
CMCC-CM       2006         298.371368
              2007         295.917328
              2008         296.310883
              2009         291.946930
CMCC-CMS      2006         298.790466
              2007        

In [56]:
print('NOTE: Ignore values for first and last year in the list below')
print('Water year min from daily data')
print(water_year.groupby('wateryear').min())
print('Water year max from daily data')
print(water_year.groupby('wateryear').max())

NOTE: Ignore values for first and last year in the list below
Water year min from daily data
wateryear
2006    297.839142
2007    295.917328
2008    295.770325
2009    289.920990
Name: 0, dtype: float32
Water year max from daily data
wateryear
2006    301.511963
2007    299.092346
2008    298.877563
2009    294.120300
Name: 0, dtype: float32
