In [1]:
import os
import xarray as xr
import pandas as pd
import re

In [3]:
# Get all the files in the directory 
all_files = os.listdir('/g/data/eg3/mf3225/CMIP_TS/CMIP6/historical')

In [4]:
# Choose the ensmeble run 
ensemble = 'r1i1p1f1'

In [5]:
# Loop through all files and check to see if ensemble is in them 
model_files = []
for file in all_files:
    if ensemble in file:
        model_files.append(file)

In [6]:
# Get the paths 
paths = [f'/g/data/eg3/mf3225/CMIP_TS/CMIP6/historical/{f}' for f in model_files]

In [14]:
# Add model names instead of number as model dimension 
model_names = []
for file in model_files:
    model_name = re.search('ts_(.+?)_r1i1p1f1', file).group(1)
    model_names.append(model_name)

In [16]:
# Make into a dictionary 
models = {model_names[i]: paths[i] for i in range(len(model_names))}

In [18]:
# Read in all the models into one data set 
names = []
ds = []

for name, path in models.items():
    try:
        d = xr.open_mfdataset(path, combine='by_coords', chunks={'time':-1, 'lat':110, 'lon':110})
        if len(d['time'])==1980:
            del d['time']
            del d['time_bnds']
            time_month = pd.date_range(start='1850-01',end = '2015-01', freq ='M')
            d.coords['time'] = time_month
            ds.append(d)
            names.append(name)
        else:
            print(f'Model {name} has weird time')
    except OSError:
        # No files read, move on to the next
        continue 

multi_model = xr.concat(ds, dim='model', coords='minimal')
multi_model.coords['model'] = names

Model FGOALS-g3 has weird time


In [19]:
multi_model

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.26 GiB 91.39 MiB Shape (53, 1980, 120, 240) (1, 1980, 110, 110) Count 1007 Tasks 318 Chunks Type float32 numpy.ndarray",53  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray


### Testing

In [19]:
ds = []
c=0
for path in paths:
    try:
        d = xr.open_mfdataset(path, combine='by_coords', chunks={'time':-1, 'lat':110, 'lon':110})
        if len(d['time'])==1980:
            del d['time']
            del d['time_bnds']
            time_month = pd.date_range(start='1850-01', end = '2015-01', freq ='M')
            d.coords['time'] = time_month
            ds.append(d)
        else:
            print(f'File {path} has weird time')
            c=c+1
    except OSError:
        # No files read, move on to the next
        continue 

models = [*range(1,(len(paths)-c+1))]    
multi_model = xr.concat(ds, dim='model', coords='minimal')
multi_model.coords['model'] = models

File /g/data/eg3/mf3225/CMIP_TS/CMIP6/historical/ts_E3SM-1-0_r1i1p1f1_1850_1999_r240x120.nc has weird time
File /g/data/eg3/mf3225/CMIP_TS/CMIP6/historical/ts_NorCPM1_r1i1p1f1_1850_2029_r240x120.nc has weird time
File /g/data/eg3/mf3225/CMIP_TS/CMIP6/historical/ts_FGOALS-g3_r1i1p1f1_1850_2016_r240x120.nc has weird time


In [20]:
multi_model

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.26 GiB 91.39 MiB Shape (53, 1980, 120, 240) (1, 1980, 110, 110) Count 1007 Tasks 318 Chunks Type float32 numpy.ndarray",53  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray


In [2]:
def read_in_cmip_models(directory, ensemble, var, start_date, end_date):
    # Get all the files in the directory 
    all_files = os.listdir(f'{directory}')
    # Loop through all files and check to see if ensemble is in them 
    model_files = []
    for file in all_files:
        if ensemble in file:
            model_files.append(file)
    # Get the paths 
    paths = [f'/g/data/eg3/mf3225/CMIP_TS/CMIP6/historical/{f}' for f in model_files]
    # Add model names instead of number as model dimension 
    model_names = []
    for file in model_files:
        model_name = re.search(f'{var}_(.+?)_{ensemble}', file).group(1)
        model_names.append(model_name)
    # Make into a dictionary 
    models = {model_names[i]: paths[i] for i in range(len(model_names))}
    # Read in all the models into one data set 
    names = []
    ds = []

    for name, path in models.items():
        try:
            d = xr.open_mfdataset(path, combine='by_coords', chunks={'time':-1, 'lat':110, 'lon':110})
            if len(d['time'])==1980:
                del d['time']
                del d['time_bnds']
                time_month = pd.date_range(start=f'{start_date}',end = f'{end_date}', freq ='M')
                d.coords['time'] = time_month
                ds.append(d)
                names.append(name)
            else:
                print(f'Model {name} has weird time')
        except OSError:
            # No files read, move on to the next
            continue 

    multi_model = xr.concat(ds, dim='model', coords='minimal')
    multi_model.coords['model'] = names
    return multi_model

In [3]:
multi_model = read_in_cmip_models('/g/data/eg3/mf3225/CMIP_TS/CMIP6/historical', 'r1i1p1f1', 'ts', '1850-01', '2015-01')

Model FGOALS-g3 has weird time


In [4]:
multi_model

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.26 GiB 91.39 MiB Shape (53, 1980, 120, 240) (1, 1980, 110, 110) Count 1007 Tasks 318 Chunks Type float32 numpy.ndarray",53  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,11.26 GiB,91.39 MiB
Shape,"(53, 1980, 120, 240)","(1, 1980, 110, 110)"
Count,1007 Tasks,318 Chunks
Type,float32,numpy.ndarray
