In [2]:
import os

In [3]:
os.getcwd()

'/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature'

In [4]:
import xarray as xr
import numpy as np
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from datetime import datetime as dt






In [5]:
def find_nth(string, substring, n):
   if (n == 1):
       return string.find(substring)
   else:
       return string.find(substring, find_nth(string, substring, n - 1) + 1)
    
def find_memberid(run):
    return run[find_nth(run,"_",4)+1:find_nth(run,"_",5)]

def find_sourceid(run):
    return run[find_nth(run,"_",2)+1:find_nth(run,"_",3)]

def find_timerange(run):
    return run[find_nth(run,"_",6)+1:-3]

def find_variableid(run):
    return run[:find_nth(run,"_",1)]

def find_tableid(run):
    return run[find_nth(run,"_",1)+1:find_nth(run,"_",2)]

def find_experimentid(run):
    return run[find_nth(run,"_",3)+1:find_nth(run,"_",4)]

In [6]:
pd.options.mode.chained_assignment = None  # default='warn'

In [18]:
def get_lat_name(df):
    for lat_name in ['lat', 'latitude']:
        if lat_name in df.columns.values.tolist():
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")
    
def get_lon_name(df):
    for lon_name in ['lon','long','longitude']:
        if lon_name in df.columns.values.tolist():
            return lon_name
    raise RuntimeError("Couldn't find a latitude coordinate")




#currently slices by year
def compute_mean(df,var):
    df_list = []
    for year in df['year'].unique().tolist():
        sub = global_mean(df[df['year'] == year],var)
        sub['year'] = year
        df_list.append(sub.set_index([get_lat_name(df), get_lon_name(df),'year']))
    result = pd.concat(df_list)
    return result

df_eez = gpd.read_file('palauEEZ.geojson')
eez = df_eez['geometry'].values[0]

def palau_eez(df):
    in_palau = []
    longlat = df[['longitude','latitude']].values.tolist()
    for x in longlat:
        point = Point(x[0],x[1])
        if eez.contains(point) or point.within(eez):
            in_palau.append((x[0],x[1]))
    return df[df[['longitude', 'latitude']].apply(tuple, axis=1).isin(in_palau)]


In [8]:
def filter_palau(df):
    df = df.reset_index()
    filtered = df[(df[get_lat_name(df)]>= 1.25)&
                                (df[get_lat_name(df)]<=12.50)&(df[get_lon_name(df)]<=139.00)&
                                (df[get_lon_name(df)]>=128)]
    return filtered

In [9]:
def convert_to_dt(x):

    try:
        return dt.strptime(str(x), '%Y-%m-%d %H:%M:%S')
    except ValueError:
        return dt.strptime(str(x), '%Y-%m-%d %H:%M:%S.%f')
    

In [10]:
def convert_year(time):
    return [x.year for x in time]

In [11]:
def global_mean(df,var):
    lat = df[get_lat_name(df)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
#     return weight
#     other_dims = set(ds.dims) - {'time'}
#     df["weight"] = weight
    df[var] = df[var] * weight

    return df.groupby([get_lat_name(df), get_lon_name(df)], as_index = False)[var].mean()

In [19]:
def run_pickles(model,runs,var):
    print("Creating pickles")

    if len(runs)>0:
        master = pd.concat(runs)
        master_reset = master.reset_index()
        model_run_name= "_" +str(model[0])+ "_" + str(model[1])+ "_" + str(model[2])+ "_" + str(model[3])+ "_" + str(model[4])+ "_"
        master.to_pickle("./master/master" + model_run_name +".pkl")
        print("Created: " + "master" + model_run_name +".pkl")
            
        try:
            master_reset['year'] = master_reset['time'].dt.year
        except AttributeError:
            try:
                master_reset['year'] = master_reset.time.apply(convert_to_dt).dt.year
            except AttributeError:
                master_reset['year'] = convert_year(master_reset['time'].values.tolist())
        
        
        latlon_master= compute_mean(master_reset,var)
        latlon_master.to_pickle("./latlon/latlon" + model_run_name +".pkl")
        print("Created: " + "latlon" + model_run_name +".pkl")
    
        averaged = latlon_master.reset_index().groupby(["year"], as_index = False)[var].mean()
        averaged.to_pickle("./average/average" + model_run_name +".pkl")
        print("Created: " + "average" + model_run_name +".pkl")

In [13]:
os.getcwd()

'/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature'

In [14]:
#make three folders
if not os.path.exists(os.getcwd() + "/master"):
    os.makedirs(os.getcwd() + "/master")
    
if not os.path.exists(os.getcwd() + "/latlon"):
    os.makedirs(os.getcwd() + "/latlon")
    
if not os.path.exists(os.getcwd() + "/average"):
    os.makedirs(os.getcwd() + "/average")

In [15]:
cmip_vars = ['zos','thetao','ph','no3','po4','phyc']

models = ["historical","ssp119","ssp126","ssp245","ssp370","ssp585"]


for var in cmip_vars:
    print(var)
    var_runs = {}
    
    for model in models:
        var_runs[model] = []
    
    for x in os.listdir(os.getcwd() + "/wgetCMIP6_sample/"+var +"/" ):
        
        if not x.startswith('.') and os.path.isdir(os.getcwd() + "/wgetCMIP6_sample/"+var+"/" + x):
            
            for y in os.listdir(os.getcwd() + "/wgetCMIP6_sample/"+var+"/" + x):
                if not y.startswith('.'):
                    if y[:len(var)] == var:
                        for model in models:
                            if model in y:
                                var_runs[model].append(y)
    print(var_runs)
    for model in models:
        print(model)
        if len(var_runs[model]) <= 0:
            continue
        run_members = {}
        for run in var_runs[model]:
            key = (
                find_variableid(run),
                find_tableid(run),
                find_sourceid(run),
                find_experimentid(run),
                find_memberid(run),
            )
            if key in run_members:
                run_members[key].append(find_timerange(run))
            else:
                run_members[key] = [find_timerange(run)]
        if model == "historical":
            startComplete = []
            endComplete = []
            for run in run_members:
                for trange in run_members[run]:
                    if trange[:4] == "1850":
                        startComplete.append(run)
                    if trange[7:-2] == "2014":
                        endComplete.append(run)
            complete_runs = [x for x in startComplete if x in endComplete]
            for x in complete_runs:
                run_members[x].sort()
    
            scanned_runs = {run: [] for run in complete_runs}
            trimmed_runs = {run: [] for run in complete_runs}
            run_members = complete_runs
        else:
            scanned_runs = {run: [] for run in run_members}
            trimmed_runs = {run: [] for run in run_members}
            for run in run_members:
                run_members[run].sort()
    
        contains_palau = {m: [] for m in models}
        no_palau = {m: [] for m in models}
        palau_runs = {m: [] for m in models}
    
        folder_root = os.getcwd() + "/wgetCMIP6_sample/"+var+"/"
    
        for run in run_members:
            if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
                print(run)
                print("already scanned")
                continue
            print(run)
            processed_runs = []
            if run in no_palau:
                break
            
            # Collect all matching files from subdirectories
            files = []
            for root, dirs, filenames in os.walk(folder_root):
                files.extend(
                    [
                        os.path.join(root, x)
                        for x in filenames
                        if "_".join(run) in x
                    ]
                )
            files.sort()
            try:
                for file in files:
                    if file not in scanned_runs[run] and run not in no_palau[model]:
                        print(file)
                        ds = xr.open_dataset(file)
                        if var not in ds.keys():
                            no_palau[model].append(run)
                            break
                        df = ds[var].to_dataframe()
        
                        filtered = filter_palau(df)
                        del df
        
                        if len(filtered[run[0]]) > 0:
                            print("Run was a success")
                            processed_runs.append(filtered)
                        else:
                            no_palau[model].append(run)
                            break
                        scanned_runs[run].append(file)
            except ValueError as e:
                print("ValueError", e)
                continue
            
    
            if len(processed_runs) > 0:
                run_pickles(run, processed_runs,var)


zos
{'historical': [], 'ssp119': [], 'ssp126': [], 'ssp245': [], 'ssp370': [], 'ssp585': []}
historical
ssp119
ssp126
ssp245
ssp370
ssp585
thetao
{'historical': [], 'ssp119': [], 'ssp126': [], 'ssp245': [], 'ssp370': [], 'ssp585': []}
historical
ssp119
ssp126
ssp245
ssp370
ssp585
ph
{'historical': [], 'ssp119': [], 'ssp126': [], 'ssp245': [], 'ssp370': [], 'ssp585': []}
historical
ssp119
ssp126
ssp245
ssp370
ssp585
no3
{'historical': ['no3_Omon_CanESM5_historical_r10i1p2f1_gn_192101-193012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_194101-195012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_187101-188012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_195101-196012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_197101-198012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_193101-194012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_200101-201012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_185001-186012.nc', 'no3_Omon_CanESM5_historical_r10i1p2f1_gn_198101-199012.nc', 'no3_Omon_Ca

TypeError: run_pickles() missing 1 required positional argument: 'var'

In [20]:
run_pickles(run, processed_runs,var)

Creating pickles
Created: master_no3_Omon_CanESM5_historical_r10i1p2f1_.pkl
Created: latlon_no3_Omon_CanESM5_historical_r10i1p2f1_.pkl
Created: average_no3_Omon_CanESM5_historical_r10i1p2f1_.pkl


In [None]:
run_members = {}
for run in pr_runs["historical"]:
    if (find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run)) in run_members:
        run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))].append(find_timerange(run))
    else:
        run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))] = [find_timerange(run)]

In [16]:
#this should show only run members for historical, but it only has everything that isnt a run member
run_members

{('pr', 'Amon', 'E3SM-1-0', 'historical', 'r2i1p1f1'): ['185001-187412',
  '200001-201412',
  '187501-189912',
  '192501-194912',
  '197501-199912',
  '195001-197412',
  '190001-192412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r3i1p1f1'): ['190001-192412',
  '195001-197412',
  '197501-199912',
  '192501-194912',
  '187501-189912',
  '200001-201412',
  '185001-187412'],
 ('pr', 'Amon', 'SAM0-UNICON', 'historical', 'r1i1p1f1'): ['189001-189912',
  '198001-198912',
  '197001-197912',
  '186001-186912',
  '191001-191912',
  '190001-190912',
  '187001-187912',
  '196001-196912',
  '199001-199912',
  '188001-188912',
  '201001-201412',
  '194001-194912',
  '192001-192912',
  '185001-185912',
  '193001-193912',
  '195001-195912',
  '200001-200912'],
 ('pr', 'Amon', 'CESM2-WACCM-FV2', 'historical', 'r1i1p1f1'): ['195001-199912',
  '190001-194912',
  '185001-189912',
  '200001-201412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r5i1p1f1'): ['192501-194912',
  '187501-189912',
  '200001-201

In [17]:
startComplete = []
endComplete = []
for run in run_members:
    for trange in run_members[run]:
        if trange[:4] == "1850":
            startComplete.append(run)
        if trange[7:-2] == "2014":
            endComplete.append(run)
complete_runs = []
for x in startComplete:
    if x in endComplete:
        complete_runs.append(x)
for x in complete_runs:
    run_members[x].sort()

In [18]:
scanned_runs = {}

trimmed_runs = {}
for run in complete_runs:
    scanned_runs[run] = []
    trimmed_runs[run] = []
    
contains_palau = {}

no_palau = {}

palau_runs = {}

for model in models:
    contains_palau[model] = []
    palau_runs[model] = []
    no_palau[model] = []

In [20]:
# def run_pickles(model,runs):
#     print("Creating pickles")

#     if len(runs)>0:
#         master = pd.concat(runs)
#         master_reset = master.reset_index()
#         model_run_name= "_" +str(model[0])+ "_" + str(model[1])+ "_" + str(model[2])+ "_" + str(model[3])+ "_" + str(model[4])+ "_"
#         master.to_pickle("./master/master" + model_run_name +".pkl")
#         print("Created: " + "master" + model_run_name +".pkl")
            
#         try:
#             master_reset['year'] = master_reset['time'].dt.year
#         except AttributeError:
#             try:
#                 master_reset['year'] = master_reset.time.apply(convert_to_dt).dt.year
#             except AttributeError:
#                 master_reset['year'] = convert_year(master_reset['time'].values.tolist())
        
        
#         latlon_master= compute_mean(master_reset)
#         latlon_master.to_pickle("./latlon/latlon" + model_run_name +".pkl")
#         print("Created: " + "latlon" + model_run_name +".pkl")
    
#         averaged = latlon_master.reset_index().groupby(["year"], as_index = False)["pr"].mean()
#         averaged.to_pickle("./average/average" + model_run_name +".pkl")
#         print("Created: " + "average" + model_run_name +".pkl")

In [21]:
models

['historical', 'ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585']

In [22]:
run_members = {}
for run in pr_runs['ssp119']:
    if (find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run)) in run_members:
        run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))].append(find_timerange(run))
    else:
        run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))] = [find_timerange(run)]

In [23]:
run_members = {}
for model in models:
    for run in pr_runs[model]:
        if (find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run)) in run_members:
            run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))].append(find_timerange(run))
        else:
            run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))] = [find_timerange(run)]

In [24]:
run_members

{('pr', 'Amon', 'E3SM-1-0', 'historical', 'r2i1p1f1'): ['185001-187412',
  '200001-201412',
  '187501-189912',
  '192501-194912',
  '197501-199912',
  '195001-197412',
  '190001-192412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r3i1p1f1'): ['190001-192412',
  '195001-197412',
  '197501-199912',
  '192501-194912',
  '187501-189912',
  '200001-201412',
  '185001-187412'],
 ('pr', 'Amon', 'SAM0-UNICON', 'historical', 'r1i1p1f1'): ['189001-189912',
  '198001-198912',
  '197001-197912',
  '186001-186912',
  '191001-191912',
  '190001-190912',
  '187001-187912',
  '196001-196912',
  '199001-199912',
  '188001-188912',
  '201001-201412',
  '194001-194912',
  '192001-192912',
  '185001-185912',
  '193001-193912',
  '195001-195912',
  '200001-200912'],
 ('pr', 'Amon', 'CESM2-WACCM-FV2', 'historical', 'r1i1p1f1'): ['195001-199912',
  '190001-194912',
  '185001-189912',
  '200001-201412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r5i1p1f1'): ['192501-194912',
  '187501-189912',
  '200001-201

In [25]:
for key in run_members:
    run_members[key] = sorted(run_members[key])

In [26]:
run_members

{('pr', 'Amon', 'E3SM-1-0', 'historical', 'r2i1p1f1'): ['185001-187412',
  '187501-189912',
  '190001-192412',
  '192501-194912',
  '195001-197412',
  '197501-199912',
  '200001-201412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r3i1p1f1'): ['185001-187412',
  '187501-189912',
  '190001-192412',
  '192501-194912',
  '195001-197412',
  '197501-199912',
  '200001-201412'],
 ('pr', 'Amon', 'SAM0-UNICON', 'historical', 'r1i1p1f1'): ['185001-185912',
  '186001-186912',
  '187001-187912',
  '188001-188912',
  '189001-189912',
  '190001-190912',
  '191001-191912',
  '192001-192912',
  '193001-193912',
  '194001-194912',
  '195001-195912',
  '196001-196912',
  '197001-197912',
  '198001-198912',
  '199001-199912',
  '200001-200912',
  '201001-201412'],
 ('pr', 'Amon', 'CESM2-WACCM-FV2', 'historical', 'r1i1p1f1'): ['185001-189912',
  '190001-194912',
  '195001-199912',
  '200001-201412'],
 ('pr', 'Amon', 'E3SM-1-0', 'historical', 'r5i1p1f1'): ['185001-187412',
  '187501-189912',
  '190001-192

In [27]:
model

'ssp585'

In [28]:
import os
import xarray as xr
seen = []
for model in models:
    print(model)
#     if model == "ssp119" or 
    if len(pr_runs[model]) <= 0 or any(model in x[8:-5] for x in os.listdir(os.getcwd() + "/average")):
        continue
        
    seen.append(model)

    run_members = {}
    for run in pr_runs[model]:
        key = (
            find_variableid(run),
            find_tableid(run),
            find_sourceid(run),
            find_experimentid(run),
            find_memberid(run),
        )
        if key in run_members:
            run_members[key].append(find_timerange(run))
        else:
            run_members[key] = [find_timerange(run)]
    if model == "historical":
        startComplete = []
        endComplete = []
        for run in run_members:
            for trange in run_members[run]:
                if trange[:4] == "1850":
                    startComplete.append(run)
                if trange[7:-2] == "2014":
                    endComplete.append(run)
        complete_runs = [x for x in startComplete if x in endComplete]
        for x in complete_runs:
            run_members[x].sort()

        scanned_runs = {run: [] for run in complete_runs}
        trimmed_runs = {run: [] for run in complete_runs}
        run_members = complete_runs
    else:
        scanned_runs = {run: [] for run in run_members}
        trimmed_runs = {run: [] for run in run_members}
        for run in run_members:
            run_members[run].sort()

    contains_palau = {m: [] for m in models}
    no_palau = {m: [] for m in models}
    palau_runs = {m: [] for m in models}

    folder_root = os.getcwd() + "/wgetCMIP6_sample/pr/"

    for run in run_members:
        if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
            print(run)
            print("already scanned")
            continue
        print(run)
        processed_runs = []
        if run in no_palau:
            break
        
        # Collect all matching files from subdirectories
        files = []
        for root, dirs, filenames in os.walk(folder_root):
            files.extend(
                [
                    os.path.join(root, x)
                    for x in filenames
                    if "_".join(run) in x
                ]
            )
        files.sort()
        
        for file in files:
            if file not in scanned_runs[run] and run not in no_palau[model]:
                print(file)
                ds = xr.open_dataset(file)
                if "pr" not in ds.keys():
                    no_palau[model].append(run)
                    break
                df = ds.pr.to_dataframe()

                filtered = filter_palau(df)

                if len(filtered[run[0]]) > 0:
                    print("Run was a success")
                    processed_runs.append(filtered)
                else:
                    no_palau[model].append(run)
                    break
                scanned_runs[run].append(file)

        if len(processed_runs) > 0:
            run_pickles(run, processed_runs)


historical
('pr', 'Amon', 'E3SM-1-0', 'historical', 'r2i1p1f1')
/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature/wgetCMIP6_sample/pr/historical/pr_Amon_E3SM-1-0_historical_r2i1p1f1_gr_185001-187412.nc


ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html

In [32]:
import os
import xarray as xr

for model in models:
    print(model)
    if len(pr_runs[model]) <= 0:
        continue
    run_members = {}
    for run in pr_runs[model]:
        key = (
            find_variableid(run),
            find_tableid(run),
            find_sourceid(run),
            find_experimentid(run),
            find_memberid(run),
        )
        if key in run_members:
            run_members[key].append(find_timerange(run))
        else:
            run_members[key] = [find_timerange(run)]
    if model == "historical":
        startComplete = []
        endComplete = []
        for run in run_members:
            for trange in run_members[run]:
                if trange[:4] == "1850":
                    startComplete.append(run)
                if trange[7:-2] == "2014":
                    endComplete.append(run)
        complete_runs = [x for x in startComplete if x in endComplete]
        for x in complete_runs:
            run_members[x].sort()

        scanned_runs = {run: [] for run in complete_runs}
        trimmed_runs = {run: [] for run in complete_runs}
        run_members = complete_runs
    else:
        scanned_runs = {run: [] for run in run_members}
        trimmed_runs = {run: [] for run in run_members}
        for run in run_members:
            run_members[run].sort()

    contains_palau = {m: [] for m in models}
    no_palau = {m: [] for m in models}
    palau_runs = {m: [] for m in models}

    folder_root = os.getcwd() + "/wgetCMIP6_sample/pr/"

    for run in run_members:
        if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
            print(run)
            print("already scanned")
            continue
        print(run)
        processed_runs = []
        if run in no_palau:
            break
        
        # Collect all matching files from subdirectories
        files = []
        for root, dirs, filenames in os.walk(folder_root):
            files.extend(
                [
                    os.path.join(root, x)
                    for x in filenames
                    if "_".join(run) in x
                ]
            )
        files.sort()
        try:
            for file in files:
                if file not in scanned_runs[run] and run not in no_palau[model]:
                    print(file)
                    ds = xr.open_dataset(file)
                    if "pr" not in ds.keys():
                        no_palau[model].append(run)
                        break
                    df = ds.pr.to_dataframe()
    
                    filtered = filter_palau(df)
    
                    if len(filtered[run[0]]) > 0:
                        print("Run was a success")
                        processed_runs.append(filtered)
                    else:
                        no_palau[model].append(run)
                        break
                    scanned_runs[run].append(file)
        except ValueError as e:
            print("ValueError", e)
            continue
        

        if len(processed_runs) > 0:
            run_pickles(run, processed_runs)


historical
('pr', 'Amon', 'E3SM-1-0', 'historical', 'r2i1p1f1')
/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature/wgetCMIP6_sample/pr/historical/pr_Amon_E3SM-1-0_historical_r2i1p1f1_gr_185001-187412.nc
ValueError did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
('pr', 'Amon', 'E3SM-1-0', 'historical', 'r3i1p1f1')
/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature/wgetCMIP6_sample/pr/historical/pr_Amon_E3SM-1-0_historical_r3i1p1f1_gr_185001-187412.nc
Run was a success
/Users/one/Documents/Palau/2_ocean/2.2_ocean-temperature/2.2.1_mean-sea-surface-temperature/wgetCMIP6_sample/pr/historical/pr_Amon_

In [38]:
xr.open_dataset(file)

ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html

In [32]:
"ssp119" in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]

False

In [34]:
any("ssp145" in x[8:-5] for x in os.listdir(os.getcwd() + "/average"))

False

In [27]:
sfs = df.reset_index()

In [30]:
sfs

Unnamed: 0,time,lat,lon,tas
0,2045-01-01 00:00:00,-90.0,0.00,250.063583
1,2045-01-01 00:00:00,-90.0,1.25,250.063187
2,2045-01-01 00:00:00,-90.0,2.50,250.063156
3,2045-01-01 00:00:00,-90.0,3.75,250.063644
4,2045-01-01 00:00:00,-90.0,5.00,250.063629
...,...,...,...,...
201830395,2054-12-31 00:00:00,90.0,353.75,249.514847
201830396,2054-12-31 00:00:00,90.0,355.00,249.513016
201830397,2054-12-31 00:00:00,90.0,356.25,249.511368
201830398,2054-12-31 00:00:00,90.0,357.50,249.509872


In [29]:
sfs['time'].unique()

array([cftime.DatetimeNoLeap(2045, 1, 1, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2045, 1, 2, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2045, 1, 3, 0, 0, 0, 0, has_year_zero=True),
       ...,
       cftime.DatetimeNoLeap(2054, 12, 29, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2054, 12, 30, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2054, 12, 31, 0, 0, 0, 0, has_year_zero=True)],
      dtype=object)

In [59]:
# for model in models:
#     print(model)
#     if model == "historical" or len(tos_runs[model]) <= 0:
#         continue
#     run_members = {}
#     for run in tos_runs[model]:
#         if (find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run)) in run_members:
#             run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))].append(find_timerange(run))
#         else:
#             run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))] = [find_timerange(run)]
#     if model == "historical":
#         startComplete = []
#         endComplete = []
#         for run in run_members:
#             for trange in run_members[run]:
#                 if trange[:4] == "1850":
#                     startComplete.append(run)
#                 if trange[7:-2] == "2014":
#                     endComplete.append(run)
#         complete_runs = []
#         for x in startComplete:
#             if x in endComplete:
#                 complete_runs.append(x)
#         for x in complete_runs:
#             run_members[x].sort()
    
#         scanned_runs = {}

#         trimmed_runs = {}

#         for run in complete_runs:
#             scanned_runs[run] = []
#             trimmed_runs[run] = []
#         run_members = complete_runs
#     else:
#         scanned_runs = {}

#         trimmed_runs = {}

#         for run in run_members:
#             scanned_runs[run] = []
#             trimmed_runs[run] = []
#             run_members[run].sort()

#     contains_palau = {}

#     no_palau = {}

#     palau_runs = {}

#     for m in models:
#         contains_palau[m] = []
#         palau_runs[m] = []
#         no_palau[m] = []
    
    
#     folder = os.listdir(os.getcwd() + "/wgetCMIP6/tas/" )
# #     '/Users/one/Documents/CMIP6D/' + "historical.ssp119.ssp126.ssp245.ssp370.ssp585/"


    
#     for run in run_members:
#         if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
#             print(run)
#             print("already scanned")
#             continue
#         print(run)
#         processed_runs = []
#         if run in no_palau:
#             break
#         files = [x for x in os.listdir(folder) if "_".join(run) in x]
#         files.sort()
#         for file in files:
#             if file not in scanned_runs[run] and run not in no_palau[model]:
#                 print(file)
#                 ds = xr.open_dataset(folder+file)
#                 if 'tos' not in ds.keys():
#                     no_palau[model].append(run)
#                     break
#                 df = ds.tos.to_dataframe()

#                 filtered = filter_palau(df)

#                 if len(filtered[run[0]]) > 0:
#                     print("Run was a sucess")
#                     processed_runs.append(filtered)
#                 else:
#                     no_palau[model].append(run)
#                     break
#                 scanned_runs[run].append(file)

#         if len(processed_runs)> 0:
#             run_pickles(run,processed_runs)

historical
ssp119
ssp126
('tos', 'Omon', 'HadGEM3-GC31-MM', 'ssp126', 'r1i1p1f3')
tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_gn_201501-202912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_gn_203001-204912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_gn_205001-206912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_gn_207001-208912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_gn_209001-210012.nc
Run was a sucess
Creating pickles
Created: master_tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_.pkl
Created: latlon_tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_.pkl
Created: average_tos_Omon_HadGEM3-GC31-MM_ssp126_r1i1p1f3_.pkl
ssp245
ssp370
ssp585
('tos', 'Omon', 'HadGEM3-GC31-MM', 'ssp585', 'r2i1p1f3')
tos_Omon_HadGEM3-GC31-MM_ssp585_r2i1p1f3_gn_201501-202912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp585_r2i1p1f3_gn_203001-204912.nc
Run was a sucess
tos_Omon_HadGEM3-GC31-MM_ssp585_r2i1p1f3_gn_205001-206912.nc
Run was a sucess
tos_Omon_Had

In [164]:
# models = ["ssp434","ssp460","ssp534-over"]

# tos_runs = {}

# for model in models:
#     tos_runs[model] = []

# for x in os.listdir('/Users/one/Documents/CMIP6D/' + "ssp434,ssp460,ssp534-over"):
#     if x[:3] == "tos":
#         for model in models:
#             if model in x:
#                 tos_runs[model].append(x)

In [150]:
# for model in models:
#     print(model)
#     if len(tos_runs[model]) <= 0:
#         continue
#     run_members = {}
#     for run in tos_runs[model]:
#         if (find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run)) in run_members:
#             run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))].append(find_timerange(run))
#         else:
#             run_members[(find_variableid(run),find_tableid(run),find_sourceid(run),find_experimentid(run),find_memberid(run))] = [find_timerange(run)]

#     scanned_runs = {}

#     trimmed_runs = {}

#     for run in run_members:
#         scanned_runs[run] = []
#         trimmed_runs[run] = []
#         run_members[run].sort()

#     contains_palau = {}

#     no_palau = {}

#     palau_runs = {}

#     for m in models:
#         contains_palau[m] = []
#         palau_runs[m] = []
#         no_palau[m] = []
    
    
#     folder = '/Users/one/Documents/CMIP6D/' + "ssp434,ssp460,ssp534-over/"


    
#     for run in run_members:
#         if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
#             print(run)
#             print("already scanned")
#             continue
#         print(run)
#         processed_runs = []
#         if run in no_palau:
#             break
#         files = [x for x in os.listdir(folder) if "_".join(run) in x]
#         files.sort()
#         for file in files:
#             if file not in scanned_runs[run] and run not in no_palau[model]:
#                 print(file)
#                 ds = xr.open_dataset(folder+file)
#                 if 'tos' not in ds.keys():
#                     no_palau[model].append(run)
#                     break
#                 df = ds.tos.to_dataframe()

#                 filtered = filter_palau(df)

#                 if len(filtered[run[0]]) > 0:
#                     print("Run was a sucess")
#                     processed_runs.append(filtered)
#                 else:
#                     no_palau[model].append(run)
#                     break
#                 scanned_runs[run].append(file)

#         if len(processed_runs)> 0:
#             run_pickles(run,processed_runs)

ssp434
('tos', 'Omon', 'UKESM1-0-LL', 'ssp434', 'r4i1p1f2')
already scanned
('tos', 'Omon', 'CanESM5', 'ssp434', 'r4i1p1f1')
already scanned
('tos', 'Omon', 'MIROC6', 'ssp434', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'UKESM1-0-LL', 'ssp434', 'r2i1p1f2')
already scanned
('tos', 'Omon', 'CanESM5', 'ssp434', 'r3i1p1f1')
already scanned
('tos', 'Omon', 'UKESM1-0-LL', 'ssp434', 'r3i1p1f2')
already scanned
('tos', 'Omon', 'MRI-ESM2-0', 'ssp434', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'UKESM1-0-LL', 'ssp434', 'r8i1p1f2')
already scanned
('tos', 'Omon', 'CanESM5', 'ssp434', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'UKESM1-0-LL', 'ssp434', 'r1i1p1f2')
already scanned
('tos', 'Omon', 'FGOALS-g3', 'ssp434', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'CanESM5', 'ssp434', 'r2i1p1f1')
already scanned
('tos', 'Omon', 'CanESM5', 'ssp434', 'r5i1p1f1')
already scanned
ssp460
('tos', 'Omon', 'CanESM5', 'ssp460', 'r3i1p1f1')
already scanned
('tos', 'Omon', 'MIROC6', 'ssp460', 'r1i1p1f1')
alre

  new_vars[k] = decode_cf_variable(


Run was a sucess
tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_gn_210101-215012.nc


  new_vars[k] = decode_cf_variable(


Run was a sucess
tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_gn_215101-220012.nc


  new_vars[k] = decode_cf_variable(


Run was a sucess
tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_gn_220101-225012.nc


  new_vars[k] = decode_cf_variable(


Run was a sucess
tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_gn_225101-229912.nc


  new_vars[k] = decode_cf_variable(


Run was a sucess
Creating pickles
Created: master_tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_.pkl
Created: latlon_tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_.pkl
Created: average_tos_Omon_CESM2-WACCM_ssp534-over_r1i1p1f1_.pkl
('tos', 'Omon', 'UKESM1-0-LL', 'ssp534-over', 'r2i1p1f2')
tos_Omon_UKESM1-0-LL_ssp534-over_r2i1p1f2_gn_204001-204912.nc
Run was a sucess
tos_Omon_UKESM1-0-LL_ssp534-over_r2i1p1f2_gn_205001-210012.nc
Run was a sucess
Creating pickles
Created: master_tos_Omon_UKESM1-0-LL_ssp534-over_r2i1p1f2_.pkl
Created: latlon_tos_Omon_UKESM1-0-LL_ssp534-over_r2i1p1f2_.pkl
Created: average_tos_Omon_UKESM1-0-LL_ssp534-over_r2i1p1f2_.pkl
('tos', 'Omon', 'MIROC-ES2L', 'ssp534-over', 'r1i1p1f2')
tos_Omon_MIROC-ES2L_ssp534-over_r1i1p1f2_gn_201501-210012.nc
Run was a sucess
tos_Omon_MIROC-ES2L_ssp534-over_r1i1p1f2_gn_210101-230012.nc


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(array[self.key], dtype=None)


Run was a sucess
Creating pickles
Created: master_tos_Omon_MIROC-ES2L_ssp534-over_r1i1p1f2_.pkl
Created: latlon_tos_Omon_MIROC-ES2L_ssp534-over_r1i1p1f2_.pkl
Created: average_tos_Omon_MIROC-ES2L_ssp534-over_r1i1p1f2_.pkl
('tos', 'Omon', 'CanESM5', 'ssp534-over', 'r1i1p1f1')
tos_Omon_CanESM5_ssp534-over_r1i1p1f1_gn_204001-210012.nc
Run was a sucess
tos_Omon_CanESM5_ssp534-over_r1i1p1f1_gn_210101-230012.nc
Run was a sucess
Creating pickles
Created: master_tos_Omon_CanESM5_ssp534-over_r1i1p1f1_.pkl
Created: latlon_tos_Omon_CanESM5_ssp534-over_r1i1p1f1_.pkl
Created: average_tos_Omon_CanESM5_ssp534-over_r1i1p1f1_.pkl
('tos', 'Omon', 'MRI-ESM2-0', 'ssp534-over', 'r1i1p1f1')
tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_gn_204001-210012.nc
Run was a sucess
tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_gn_210101-230012.nc


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(array[self.key], dtype=None)


Run was a sucess
tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_gr_204001-210012.nc
Run was a sucess
tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_gr_210101-230012.nc


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(array[self.key], dtype=None)


Run was a sucess
Creating pickles
Created: master_tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_.pkl
Created: latlon_tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_.pkl
Created: average_tos_Omon_MRI-ESM2-0_ssp534-over_r1i1p1f1_.pkl
('tos', 'Omon', 'CMCC-ESM2', 'ssp534-over', 'r1i1p1f1')
tos_Omon_CMCC-ESM2_ssp534-over_r1i1p1f1_gn_201501-210012.nc
Run was a sucess
Creating pickles
Created: master_tos_Omon_CMCC-ESM2_ssp534-over_r1i1p1f1_.pkl
Created: latlon_tos_Omon_CMCC-ESM2_ssp534-over_r1i1p1f1_.pkl
Created: average_tos_Omon_CMCC-ESM2_ssp534-over_r1i1p1f1_.pkl
('tos', 'Omon', 'FGOALS-g3', 'ssp534-over', 'r1i1p1f1')
tos_Omon_FGOALS-g3_ssp534-over_r1i1p1f1_gn_201501-206412.nc
Run was a sucess
tos_Omon_FGOALS-g3_ssp534-over_r1i1p1f1_gn_206501-210012.nc
Run was a sucess
Creating pickles
Created: master_tos_Omon_FGOALS-g3_ssp534-over_r1i1p1f1_.pkl
Created: latlon_tos_Omon_FGOALS-g3_ssp534-over_r1i1p1f1_.pkl
Created: average_tos_Omon_FGOALS-g3_ssp534-over_r1i1p1f1_.pkl
('tos', 'Omon', 'CanESM5', 'ssp534

  new_vars[k] = decode_cf_variable(


Run was a sucess
Creating pickles
Created: master_tos_Omon_CESM2-WACCM_ssp534-over_r2i1p1f1_.pkl
Created: latlon_tos_Omon_CESM2-WACCM_ssp534-over_r2i1p1f1_.pkl
Created: average_tos_Omon_CESM2-WACCM_ssp534-over_r2i1p1f1_.pkl


In [25]:
# folder = '/Users/one/Documents/CMIP6D/' + "historical.ssp119.ssp126.ssp245.ssp370.ssp585/"


# for run in complete_runs:
#     if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
#         print(run)
#         print("already scanned")
#         continue
#     print(run)
#     processed_runs = []
#     if run in no_palau:
#         break
#     files = [x for x in os.listdir(folder) if "_".join(run) in x]
#     files.sort()
#     for file in files:
#         if file not in scanned_runs[run] and run not in no_palau["historical"]:
#             print(file)
#             ds = xr.open_dataset(folder+file)
#             if 'tos' not in ds.keys():
#                 no_palau["historical"].append(run)
#                 break
#             df = ds.tos.to_dataframe()

#             filtered = filter_palau(df)

#             if len(filtered[run[0]]) > 0:
#                 print("Run was a sucess")
#                 processed_runs.append(filtered)
#             else:
#                 no_palau["historical"].append(run)
#                 break
#             scanned_runs[run].append(file)

#     if len(processed_runs)> 0:
        run_pickles(run,processed_runs)

('tos', 'Omon', 'GFDL-ESM4', 'historical', 'r3i1p1f1')
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r8i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r2i1p1f1')
already scanned
('tos', 'Omon', 'AWI-CM-1-1-MR', 'historical', 'r2i1p1f1')
already scanned
('tos', 'Omon', 'AWI-CM-1-1-MR', 'historical', 'r5i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r7i1p1f1')
already scanned
('tos', 'Omon', 'ICON-ESM-LR', 'historical', 'r3i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r5i1p1f1')
already scanned
('tos', 'Omon', 'AWI-ESM-1-1-LR', 'historical', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r10i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r1i1p1f1')
already scanned
('tos', 'Omon', 'ICON-ESM-LR', 'historical', 'r2i1p1f1')
already scanned
('tos', 'Omon', 'MPI-ESM1-2-HR', 'historical', 'r4i1p1f1')
already scanned
('tos', 'Omon', 'AWI-CM-1-1-MR', 'historical', 

In [29]:
import os
import xarray as xr

folder_root = os.getcwd() + "/wgetCMIP6/tas/"

for run in complete_runs:
    if "_".join(run) in [(x[8:-5]) for x in os.listdir(os.getcwd() + "/average")]:
        print(run)
        print("already scanned")
        continue
    print(run)
    processed_runs = []
    if run in no_palau:
        break

    # Collect all matching files from subdirectories
    files = []
    for root, dirs, filenames in os.walk(folder_root):
        files.extend(
            [
                os.path.join(root, x)
                for x in filenames
                if "_".join(run) in x
            ]
        )
    files.sort()

    for file in files:
        if file not in scanned_runs[run] and run not in no_palau["historical"]:
            print(file)
            ds = xr.open_dataset(file)
            if 'tos' not in ds.keys():
                no_palau["historical"].append(run)
                break
            df = ds.tos.to_dataframe()

            filtered = filter_palau(df)

            if len(filtered[run[0]]) > 0:
                print("Run was a success")
                processed_runs.append(filtered)
            else:
                no_palau["historical"].append(run)
                break
            scanned_runs[run].append(file)

    if len(processed_runs) > 0:
        run_pickles(run, processed_runs)


In [31]:
complete_runs

[]

In [24]:
from datetime import datetime as dt

filtered = filter_palau(df)
master_reset = filtered

In [25]:
def convert_to_dt(x):
    return dt.strptime(str(x), '%Y-%m-%d %H:%M:%S')

filtered['year'] = filtered.time.apply(convert_to_dt).dt.year
filtered['date'] = filtered.time.apply(convert_to_dt)

In [26]:
folder = os.getcwd() + '/latlon/'

sources = {}

experiment_dict = {}
for file in os.listdir(folder):
    filename = file[file.find('_')+1:]
    source = find_sourceid(filename)
    
    experiment = find_experimentid(filename)
    variable = find_variableid(filename)
    if source in sources:
        sources[source].append((variable,experiment,file))
        
        if experiment not in experiment_dict[source]:
            experiment_dict[source].append(experiment)
    else:
        sources[source] = [(variable,experiment,file)]
        
        experiment_dict[source] = [experiment]

In [28]:
experiment_dict

{}

In [27]:
latlon_folder = os.getcwd() + '/gridded/'
if not os.path.exists(latlon_folder):
    os.makedirs(latlon_folder)

toslatlon_folder = latlon_folder + '/tos/'
if not os.path.exists(toslatlon_folder):
    os.makedirs(toslatlon_folder)

for source in ['CanESM5','HadGEM3-GC31-MM']:
    for experiment in experiment_dict[source]:
        file = [x for x in sources[source] if x[0]== 'tos' and x[1] == experiment][0][2]
        experiment_folder = toslatlon_folder + "/" + file[7:-5] + "/"
        if not os.path.exists(experiment_folder):
            os.makedirs(experiment_folder)
        df = pd.read_pickle(folder + file)
        df = df.reset_index()
        df = df[['year','latitude','longitude','tos']]
        print(df)
        
        year_range = str(min(df['year'].tolist())) + "-" + str(max(df['year'].tolist()))
        
        csv_dir = experiment_folder + file[7:-5] + ".csv"
        
        with open(experiment_folder + year_range, 'w') as fp:
            pass
        
        print(csv_dir)
        df.to_csv(csv_dir,index=False)

KeyError: 'CanESM5'

In [176]:
df.reset_index()

Unnamed: 0,latitude,longitude,year,tos
0,1.333194,128.5,2040,30.994082
1,1.333194,129.5,2040,31.163177
2,1.333194,130.5,2040,30.952095
3,1.333194,131.5,2040,30.999712
4,1.333194,132.5,2040,30.962298
...,...,...,...,...
16099,12.214899,134.5,2100,30.113791
16100,12.214899,135.5,2100,30.111844
16101,12.214899,136.5,2100,30.110485
16102,12.214899,137.5,2100,30.140363


In [22]:
for model in trimmed_runs:
    print(model)
    df_list = []
    for i, run in enumerate(trimmed_runs[model]):
        if 'tos' in run.keys():
            df_list.append(palau_eez(run.tos.to_dataframe()))
    if len(df_list) > 0:   
        master = pd.concat(df_list)
        master_reset = master.reset_index()
        master.to_pickle("./master/master" + str(x[0]) + str(x[1]) +".pkl")
        print("Created: " + "master" + str(x[0]) + str(x[1]) +".pkl")
        
        master_reset['year'] = master_reset['time'].dt.year
        
        latlon_master = compute_mean(master_reset)
        latlon_master.to_pickle.to_pickle("./latlon/latlon" + str(x[0]) + str(x[1]) +".pkl")
        print("Created: " + "latlon" + str(x[0]) + str(x[1]) +".pkl")

        averaged = latlon_master.reset_index().groupby(["year"], as_index = False)["tos"].mean()
        averaged.to_pickle.to_pickle("./average/average" + str(x[0]) + str(x[1]) +".pkl")
        print("Created: " + "average" + str(x[0]) + str(x[1]) +".pkl")
    

('GFDL-ESM4', 'r3i1p1f1')
('MPI-ESM1-2-HR', 'r8i1p1f1')


OSError: Cannot save file into a non-existent directory: 'cropped_master'