# Documentation

This is a set of auxiliary functions useful for downloading, organizing, and compiling CMIP6 data

# Imports

In [2]:
import os
import glob
import sys
import xarray as xr

# Move files from Misc Data to correct folders

In [54]:
models = [
    'ACCESS-CM2',
    'ACCESS-ESM1-5',
    'BCC-CSM2-MR', 
    'CanESM5',
    'CNRM-CM6-1',
    'HadGEM3-GC31-LL',
    'IPSL-CM6A-LR',
    'MIROC6',
    'MRI-ESM2-0',
    'NorESM2-LM'
]
experiments = ['hist-GHG', 'hist-nat', 'historical', 'ssp245']
variables = ['zg']

# os.chdir("/home/disk/tc/pangulo/CMIP6/Misc Data/")

for model in models:
    print(f"{model} \n=============================================================")
    for exp in experiments:
        for variable in variables:
            os.chdir(f"/home/disk/tc/pangulo/CMIP6/{model}/{exp}/")
            files = glob.glob(f"zg_Amon_{model}_{exp}_*.nc")
            
            if not files:
                print(f"{model}/{exp}/ has no subfiles for {variable}")
                pass
            
            else:
                for file in files:
                    # os.system(f"mv {file} /home/disk/tc/pangulo/CMIP6/{model}/{exp}")
                    # print(f"mv {file} /home/disk/tc/pangulo/CMIP6/{model}/{exp}")
                    
                    os.system(f"mv {file} subfiles/")
                    print(f"mv {file} subfiles/")
                    
    print("=============================================================")
print("all files moved")

ACCESS-CM2 
ACCESS-CM2/hist-GHG/ has no subfiles for zg
ACCESS-CM2/hist-nat/ has no subfiles for zg
ACCESS-CM2/historical/ has no subfiles for zg
ACCESS-CM2/ssp245/ has no subfiles for zg
ACCESS-ESM1-5 
ACCESS-ESM1-5/hist-GHG/ has no subfiles for zg
ACCESS-ESM1-5/hist-nat/ has no subfiles for zg
ACCESS-ESM1-5/historical/ has no subfiles for zg
ACCESS-ESM1-5/ssp245/ has no subfiles for zg
BCC-CSM2-MR 
BCC-CSM2-MR/hist-GHG/ has no subfiles for zg
BCC-CSM2-MR/hist-nat/ has no subfiles for zg
BCC-CSM2-MR/historical/ has no subfiles for zg
BCC-CSM2-MR/ssp245/ has no subfiles for zg
CanESM5 
CanESM5/hist-GHG/ has no subfiles for zg
CanESM5/hist-nat/ has no subfiles for zg
CanESM5/historical/ has no subfiles for zg
CanESM5/ssp245/ has no subfiles for zg
CNRM-CM6-1 
CNRM-CM6-1/hist-GHG/ has no subfiles for zg
CNRM-CM6-1/hist-nat/ has no subfiles for zg
CNRM-CM6-1/historical/ has no subfiles for zg
CNRM-CM6-1/ssp245/ has no subfiles for zg
HadGEM3-GC31-LL 
HadGEM3-GC31-LL/hist-GHG/ has no subfi

# Compile subfiles

In [53]:
models = [
    'ACCESS-CM2',
    'ACCESS-ESM1-5',
    'BCC-CSM2-MR', 
    'CanESM5',
    'CNRM-CM6-1',
    'HadGEM3-GC31-LL',
    'IPSL-CM6A-LR',
    'MIROC6',
    'MRI-ESM2-0',
    'NorESM2-LM'
]
experiments = ['hist-GHG', 'hist-nat', 'historical', 'ssp245']
# variables = ['rlut', 'tasmax', 'tos']
variables = ['zg']
# variants = ['r1i1p1f1']

for model in models:
    print(f"{model} \n=============================================================")
    for exp in experiments:
        for variable in variables:
            
            file_directory = f"/home/disk/tc/pangulo/CMIP6/{model}/{exp}/"
            os.chdir(file_directory)
            
            # Search for an already compiled file
            compiled_file = glob.glob(f"compiled_{variable}*.nc")
            
            # If it exists, skip this variable
            if compiled_file:
                print(f"{model} {exp} {variable} already compiled")
                
            # Otherwise grab all subfiles of that variable
            else:
                uncompiled_files = sorted(glob.glob(f"{variable}*.nc"))

                # If there are none, skip
                if not uncompiled_files:
                    print(f"No file for {model} {exp} {variable} {variant}")
                    continue

                # Create a folder called subfiles if it doesn't exist
                else:
                    if not os.path.exists(f"./subfiles/"):
                        os.system("mkdir subfiles/")
                    else:
                        pass                                    
                
                # For each set of files, get the correct initial and final dates
                [_, freq, _, _, variant, grid, period] = uncompiled_files[0].split("_")
                starting_date = period.split("-")[0]
                [_, freq, _, _, variant, grid, period] = uncompiled_files[-1].split("_")
                ending_date = period.split("-")[-1].split(".")[0]

                # Compile the files using the cdo mergetime command
                compiled_file = f"compiled_{variable}_{freq}_{model}_{exp}_{variant}_{grid}_{starting_date}-{ending_date}.nc"
                merge_command = f"cdo mergetime {variable}_{freq}_{model}_{exp}_{variant}*.nc {compiled_file}"

                if os.path.exists(compiled_file):
                    continue
                else:
                    os.system(merge_command)
                    print(merge_command)


    print(f"=============================================================")
print('done')

ACCESS-CM2 
ACCESS-CM2 hist-GHG zg already compiled
ACCESS-CM2 hist-nat zg already compiled
ACCESS-CM2 historical zg already compiled
ACCESS-CM2 ssp245 zg already compiled
ACCESS-ESM1-5 
ACCESS-ESM1-5 hist-GHG zg already compiled
ACCESS-ESM1-5 hist-nat zg already compiled
ACCESS-ESM1-5 historical zg already compiled
ACCESS-ESM1-5 ssp245 zg already compiled
BCC-CSM2-MR 
BCC-CSM2-MR hist-GHG zg already compiled
BCC-CSM2-MR hist-nat zg already compiled
BCC-CSM2-MR historical zg already compiled
BCC-CSM2-MR ssp245 zg already compiled
CanESM5 
CanESM5 hist-GHG zg already compiled
CanESM5 hist-nat zg already compiled
CanESM5 historical zg already compiled
CanESM5 ssp245 zg already compiled
CNRM-CM6-1 
CNRM-CM6-1 hist-GHG zg already compiled
CNRM-CM6-1 hist-nat zg already compiled
CNRM-CM6-1 historical zg already compiled
CNRM-CM6-1 ssp245 zg already compiled
HadGEM3-GC31-LL 
HadGEM3-GC31-LL hist-GHG zg already compiled
HadGEM3-GC31-LL hist-nat zg already compiled
HadGEM3-GC31-LL historical z

# Show .sh files

In [52]:
models = [
    'ACCESS-CM2',
    'ACCESS-ESM1-5',
    'BCC-CSM2-MR', 
    'CanESM5',
    'CNRM-CM6-1',
    'HadGEM3-GC31-LL',
    'IPSL-CM6A-LR',
    'MIROC6',
    'MRI-ESM2-0',
    'NorESM2-LM'
]
experiments = ['hist-GHG', 'hist-nat', 'historical', 'ssp245']
# variables = ['rlut', 'tasmax', 'tos']
variables = ['zg']
# variants = ['r1i1p1f1']

for model in models:
    print(f"{model} \n=============================================================")
    for exp in experiments:
        os.chdir(f"/home/disk/tc/pangulo/CMIP6/{model}/{exp}/")
        sh_files = glob.glob(f"*.sh")
        
        if not sh_files:
            print(f"No sh files for {model}/{exp}")
            pass
        
        else:
            for file in sh_files:
                print(f"{model}/{exp}: {file}")
             
    print("=============================================================")
print("done")

ACCESS-CM2 
No sh files for ACCESS-CM2/hist-GHG
ACCESS-CM2/hist-nat: wget_script_2023-9-14_14-56-47.sh
ACCESS-CM2/historical: wget_script_2023-9-14_15-7-46.sh
No sh files for ACCESS-CM2/ssp245
ACCESS-ESM1-5 
ACCESS-ESM1-5/hist-GHG: CMIP6.DAMIP.CSIRO.ACCESS-ESM1-5.hist-GHG.r1i1p1f1.Amon.rlut.gn.sh
ACCESS-ESM1-5/hist-nat: CMIP6.DAMIP.CSIRO.ACCESS-ESM1-5.hist-nat.r1i1p1f1.Amon.rlut.gn.sh
ACCESS-ESM1-5/hist-nat: wget_script_2023-9-19_14-9-6.sh
ACCESS-ESM1-5/historical: wget_script_2023-9-14_14-58-1.sh
ACCESS-ESM1-5/historical: CMIP6.CMIP.CSIRO.ACCESS-ESM1-5.historical.r1i1p1f1.Amon.rlut.gn.sh
ACCESS-ESM1-5/ssp245: CMIP6.ScenarioMIP.CSIRO.ACCESS-ESM1-5.ssp245.r1i1p1f1.Amon.rlut.gn.sh
BCC-CSM2-MR 
BCC-CSM2-MR/hist-GHG: CMIP6.DAMIP.BCC.BCC-CSM2-MR.hist-GHG.r1i1p1f1.day.tasmax.gn.sh
BCC-CSM2-MR/hist-GHG: CMIP6.DAMIP.BCC.BCC-CSM2-MR.hist-GHG.r1i1p1f1.Amon.rlut.gn.sh
BCC-CSM2-MR/hist-GHG: CMIP6.DAMIP.BCC.BCC-CSM2-MR.hist-GHG.r1i1p1f1.Omon.tos.gn.sh
BCC-CSM2-MR/hist-nat: CMIP6.DAMIP.BCC.BCC-CSM2-

# Extract a given height level from geopotential height data

In [26]:
models_list = [
    'ACCESS-CM2',
    'ACCESS-ESM1-5',
    'BCC-CSM2-MR', 
    'CanESM5',
    'CNRM-CM6-1',
    'HadGEM3-GC31-LL',
    'IPSL-CM6A-LR',
    'MIROC6',
    'MRI-ESM2-0',
    'NorESM2-LM'
]
experiments_list = ['hist-GHG', 'hist-nat', 'historical', 'ssp245']
variables = ['zg']

for model in models_list:
    for exp in experiments_list:
        os.chdir(f"/home/disk/tc/pangulo/CMIP6/{model}/{exp}")
        
        file = glob.glob(f"./compiled_zg_*.nc")[0]        
        [header, closer] = file.split("/")[-1].split("zg")
    
        zg_data = xr.open_dataset(file)
        zg200_data = zg_data.isel(plev=9)
        zg200_data.to_netcdf(f"{header}zg200{closer}")
        
        