# Exploring what models we have and running preprocess_local.py

In [None]:
import os, sys 
import numpy as np
import xarray as xr
import warnings
import matplotlib.pyplot as plt

sys.path.insert(1, '../')
#from load_utils import get_cmip6_catalogue,find_overlap_models,rename_dimensions,rename_lat_lon

In [None]:
import preprocess_utils as pputils
import analysis_utils as autils 

### List areacell files that are on glade

In [None]:
areacello_files,areacella_files,areacella_models = pputils.find_cellarea_files_local()

areacello_models_local = [d[14:-15].split('_')[0] for d in areacello_files]
areacella_models_local = [d[13:-15].split('_')[0] for d in areacella_files]

areacello_models_local

### List all models that are downloaded in given directory for all experiments and variables: 

In [None]:
p = '/glade/scratch/mmsmith/CMIP6/cmip6_downloader/'
experiments = ['historical','piControl','ssp370']
variables = {'siconc':'SImon_', 'tas':'Amon_', 'sithick':'SImon_','areacella':'fx_','areacello':'Ofx_'}

directories = pputils.get_directories(p)
directories.remove('.git')
directories.remove('test')

models = pputils.get_models(directories, variables, experiments, p)

### List models available for each experiment (available for at least one variable)

In [None]:
models_picontrol = list(set(models['siconc_mon_piControl'])&set(models['tas_mon_piControl']&set(models['sithick_mon_piControl'])))
models_historical = (set(models['siconc_mon_historical'])&set(models['tas_mon_historical']&set(models['sithick_mon_historical'])))
models_ssp370 = (set(models['siconc_mon_ssp370'])&set(models['tas_mon_ssp370']&set(models['sithick_mon_ssp370'])))

len(models_picontrol), len(models_historical), len(models_ssp370)

### Collect list of models and their grids: 

In [None]:
var = ['siconc', 'sithick']
exp = 'ssp370'
model_grids = {}

for v in var: 
    d = (v+'_mon_'+exp+'/')
    files = os.listdir(p+d)

    for m in models_picontrol: 
        filename = (v+'_'+variables[v]+m+'_'+exp+'_r1i1p1f1_')

        for f in files: 
            if filename in f: 
                model_grids[m] = str(f[-19:-17])
                break

In [None]:
model_grids

### Here are lists of models without areacell files: 

In [None]:
no_cellareao_picontrol = set(models_picontrol)-set(areacello_models_local)
no_cellareao_historical = set(models_historical)-set(areacello_models_local)
no_cellareao_ssp370 = set(models_ssp370)-set(areacello_models_local)
no_cellareao_ssp370|no_cellareao_picontrol|no_cellareao_historical

### This is the list of models we want to preprocess: 

In [None]:
models_ssp370

### Mistakes and things to investigate: 

##### Historical simulations: 
* 'ACCESS-CM2' - is giving and HDF error for 'siconc' processing 

##### piControl simulations: 
* datetime issue: 'EC-Earth3','MPI-ESM1-2-LR','EC-Earth3-Veg','EC-Earth3-LR','MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR'
* HDF errors: 'ACCESS-ESM1-5','ACCESS-CM2'

In [None]:
models_historical_left = ['TaiESM1','AWI-ESM-1-1-LR','KIOST-ESM','NESM3']

models_picontrol_error = ['NESM3','KIOST-ESM','AWI-ESM-1-1-LR']

# These models have a datetime issue where the time object switches from a timestamp to Proleptic Gregorian datatime object half way through...
models_picontrol_error_datetime =['EC-Earth3','MPI-ESM1-2-LR','EC-Earth3-Veg','EC-Earth3-LR',
                                  'MPI-ESM-1-2-HAM','MPI-ESM1-2-HR']

# HDF error: 
models_picontrol_error_HDF = ['ACCESS-ESM1-5','ACCESS-CM2']

# HDF error: 
models_ssp370_error_HDF = ['ACCESS-CM2','MPI-ESM1-2-HR']

models_ssp370_error = ['TaiESM1']
#'MPI-ESM1-2-HR' - permission denied 

# Running the preprocessing: 

In [None]:
%run preprocess_local