In [None]:
import numpy as np
import pandas as pd
import os
import qgrid
import gcsfs
import xarray as xr

from request import requests, set_request_id, get_ncfiles, concatenate
from search import search, esgf_search_sites
from identify import identify
from response import response, dict_to_dfcat, get_details

In [None]:
fs = gcsfs.GCSFileSystem(token='anon', access='read_only')

In [None]:
# Initialize cmip6-master.csv 
#url_cloud = 'https://storage.googleapis.com/cmip6/cmip6.csv'
#df = pd.read_csv(url_cloud)
#df.to_csv('csv/cmip6-master.csv',index=False)

In [None]:
# Make some choices
dtype = esgf_search_sites()
ESGF_site = dtype['llnl']
skip_sites = ['dist.nmlab.snu.ac.kr','esg.lasg.ac.cn','esgf-data2.diasjp.net']
#single_member_tables = ['Omon', 'CF3hr','3hr','E3hr', '6hrLev', 'day', '6hrPlev', '6hrPlevPt', 'fx', 'Ofx']
single_member_tables = ['fx', 'Ofx']

In [None]:
# Get any new requests
df_request_new = requests()
request_id = set_request_id()

c_file = 'csv/cmip6_'+request_id+'.csv'
x_file = 'csv/exceptions_'+request_id+'.txt'

df_request_new

In [None]:
# Search ESGF for the availability of requested data
df_ESGF = search(ESGF_site,df_request_new)
len(df_ESGF)

In [None]:
#qgrid.show_grid(df_ESGF)

In [None]:
df_master = pd.read_csv('csv/cmip6-master.csv')
#qgrid.show_grid(df_master)

In [None]:
df_needed = identify(df_master, df_request_new, df_ESGF, single_member_tables)
#qgrid.show_grid(df_needed)

In [None]:
assert len(df_needed) >= 1

In [None]:
# BIG LOOP  (zarr by zarr - can do in parallel)


print('number of files needed',len(df_needed))
print('number of stores to be created',df_needed.zstore.nunique())

new_zarrs = df_needed.zstore.unique()

zdict = {}
for item,zarr in enumerate(new_zarrs):

    # does it exist in ztemp already?
    zbdir  = 'ztemp'  + zarr
    if os.path.isfile(zbdir+'/.zmetadata'):
        print(item,'already exists:',zbdir)
        #continue

    gfiles = get_ncfiles(zarr,df_needed,skip_sites)
    if len(gfiles) == 0: 
        print(item,'no files available')
        continue
        
    # concatenate in time with mfdataset
    status, ds, ddict = concatenate(zarr,gfiles)  

    if status == 'failure':
        print(item,'oops, no dice')
        continue

    #ds.to_zarr(zbdir, consolidated=True, mode='w')  

    if not os.path.isfile(zbdir+'/.zmetadata'):
        continue
   
    gsurl, vlist = get_details(ds,zbdir,zarr)
    
    # remove netcdf files
    #for gfile in gfiles:
    #   os.system('rm -f '+ gfile)
    print(item,'successfully saved as ',zbdir) 
    
    # upload to cloud
    contents = fs.ls(gsurl+'/.zmetadata')
    if any("zmetadata" in s for s in contents):
        print(item,'store already in cloud')
    else:
        command = '/usr/bin/gsutil -m cp -r '+ zbdir + ' ' + gsurl
        print(command)
        #os.system(command) 
    print(gsurl)    
    try:
        ds = xr.open_zarr(fs.get_mapper(gsurl), consolidated=True)
        zdict[item] = vlist
    except:
        print('store did not get saved properly')
    

In [None]:
dz = dict_to_dfcat(zdict)

In [None]:
df_master_new = pd.concat([df_master, dz])

In [None]:
response(df_request_new,df_master_new)