# Completely delete bad zarr stores from GC and local storage

### for each zstore:

    GC storage:
    # 1. delete old version in GC
    # 2. delete entry in ncsv/GC_files_{activity_id}-{institution_id}.csv

    local storage:
    # 3. find and delete old local copy(ies)
    # 4. find and delete entry(ies) in shelf-new/h*.csv
    # 5. remove from concatenated shelf catalog, shelf-new/local.csv

    update noQC catalog:
    # 6. delete store from local noQC catalog

### After all zstores have been processed:

    - copy local noQC catalog to cloud

In [None]:
import pandas as pd
import os
import qgrid

# local
from utilities import remove_from_GC, remove_from_local, remove_from_catalogs, search_df

In [None]:
tables = 'Amon'
exps = ['historical']
variables = ['clt', 'evspsbl', 'hfls', 'hfss', 'hus', 'huss', 'prsn', 'prw',
       'psl', 'rlds', 'rlus', 'rlut', 'rlutcs', 'rsds', 'rsdt', 'rsus',
       'rsut', 'rsutcs', 'ta', 'tas', 'tasmax', 'tasmin', 'tauu', 'tauv',
       'tos', 'ts', 'ua', 'va', 'zg']
sources = 'EC-Earth3'
mems = 'r24i1p1f1'

d2 = pd.read_csv('https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores-noQC.csv')
print(len(d2))
dGC = search_df(d2,variable_id=variables,experiment_id=exps,table_id=tables,source_id=sources,member_id=mems)
len(dGC.zstore.values)

In [None]:
# specify zarr stores and remove them one by one 
#execute = False
execute = True

zstores = dGC.zstore.values

for zstore in zstores[2:]:
    # first test with execute=False, then do with execute=True
    remove_from_GC(zstore,execute=execute)
    remove_from_local(zstore,execute=execute)
    remove_from_catalogs(zstore,execute=execute)
    

In [None]:
da = pd.read_csv('csv/pangeo-cmip6-noQC.csv')
len(da.zstore.values)

In [None]:
# When done, update the GCS noQC catalog with the modified local one

gsutil = '/usr/bin/gsutil -m'
ret = os.system(f'{gsutil} cp csv/pangeo-cmip6-noQC.csv gs://cmip6/cmip6-zarr-consolidated-stores-noQC.csv')
if ret != 0:
    print('noQC upload not working')
    