In [1]:
# dependencies
import json
import os
from itertools import filterfalse

# zoltpy related dependencies
import zoltpy.util as zutil
from zoltpy.connection import ZoltarConnection
from zoltpy.quantile_io import json_io_dict_from_quantile_csv_file
from zoltpy.cdc_io import YYYY_MM_DD_DATE_FORMAT
from zoltpy.covid19 import COVID_TARGETS, covid19_row_validator, validate_quantile_csv_file, COVID_ADDL_REQ_COLS

In [2]:
# check current working directory
print(os.getcwd())

/Users/dhuang/workspace/reichlab/covid19-forecast-hub/code/notebooks


In [3]:
# hash db file path
HASH_DB_PATH = '../zoltar_scripts/validated_file_db.json'

# load hash database
with open(HASH_DB_PATH, 'r') as f:
    hash_db = json.load(f)

print(len(hash_db))

1763


In [4]:
# metadata fields to Zoltar params
MD_FIELDS_ZOLTAR_PARAMS = {
    'team_name': 'team_name',
    'model_name': 'name',
    'model_abbr': 'abbreviation',
    'model_contributors': 'contributors',
    'website_url': 'home_url',
    'license': 'license',
    'team_model_designation': 'notes',
    'methods': 'description',
    'repo_url': 'aux_data_url',
    'citation': 'citation',
    'methods_long': 'methods'
}

In [5]:
# make sure that we have the required environment variables
is_have_zoltar_credentials = False
Z_USERNAME, Z_PASSWORD = map(os.environ.get, ['Z_USERNAME', 'Z_PASSWORD'])
if Z_USERNAME and Z_PASSWORD:
    is_have_zoltar_credentials = True

print(f'Do we have Zoltar credentials? {"yes" if is_have_zoltar_credentials else "no"}')

Do we have Zoltar credentials? yes


In [21]:
# get connection to Zoltar
conn = ZoltarConnection()
try:
    conn.authenticate('covid19hub', 'nL82*&dKMvX%')
except RuntimeError:
    print(f'Connection to Zoltar not established; please try again')

print(f'Connection established with Zoltar')

Connection established with Zoltar


In [22]:
# get all existing timezeros and models in the project
project_obj = [project for project in conn.projects if project.name == 'COVID-19 Forecasts'][0]
model_to_csvs_dict = { \
    model.abbreviation: {f.source: f for f in model.forecasts} for model in project_obj.models \
}

In [23]:
# forecast directories path
FORECAST_DIRS_PATH = '../../data-processed/'

# get forecast directories
list_model_directories = filterfalse(
    lambda m: not os.path.isdir(f'{FORECAST_DIRS_PATH}{m}'),
    os.listdir(FORECAST_DIRS_PATH)
)

# get forecasts
for forecast_dir in list_model_directories:
    print(f'current model: {forecast_dir}')
    # m.split('.')[1] gets the extension
    list_forecast_csvs = filterfalse(
        lambda m: m.split('.')[-1] != 'csv',
        os.listdir(f'{FORECAST_DIRS_PATH}{forecast_dir}')
    )

    # iterate through all csvs in forecast directory
    # turn on the print statements for more info
    for forecast_csv in list_forecast_csvs:
        # print(f'checking if forecast {forecast_csv} was recorded...')
        is_forecast_hash_not_in_table = forecast_csv not in hash_db
        # print( \
        #     f'{forecast_csv} was not recorded.' \
        #     if is_forecast_hash_not_in_table \
        #     else f'{forecast_csv} was recorded.' \
        # )

        # print(f'checking if forecast {forecast_csv} is on Zoltar...')
        is_forecast_on_zoltar = forecast_csv in model_to_csvs_dict[forecast_dir]
        # print( \
        #     f'{forecast_csv} is on Zoltar.' \
        #     if is_forecast_on_zoltar \
        #     else f'{forecast_csv} is not on Zoltar; skipping.' \
        # )

        if is_forecast_hash_not_in_table and is_forecast_on_zoltar:
            print(f'\tdeleting {forecast_csv} because its hash is not recorded but it is uploaded to Zoltar...')
            job = model_to_csvs_dict[forecast_dir][forecast_csv].delete()
            print(f'\tdelete job for {forecast_csv} enqueued')
            pass


current model: UCSB-ACTS
current model: OliverWyman-Navigator
current model: JHUAPL-Bucky
	deleting 2020-11-09-JHUAPL-Bucky.csv because its hash is not recorded but it is uploaded to Zoltar...
	delete job for 2020-11-09-JHUAPL-Bucky.csv enqueued
current model: DDS-NBDS
	deleting 2020-11-09-DDS-NBDS.csv because its hash is not recorded but it is uploaded to Zoltar...
	delete job for 2020-11-09-DDS-NBDS.csv enqueued
current model: BPagano-RtDriven
current model: UVA-Ensemble
	deleting 2020-11-09-UVA-Ensemble.csv because its hash is not recorded but it is uploaded to Zoltar...
	delete job for 2020-11-09-UVA-Ensemble.csv enqueued
current model: CU-nochange
	deleting 2020-11-08-CU-nochange.csv because its hash is not recorded but it is uploaded to Zoltar...
	delete job for 2020-11-08-CU-nochange.csv enqueued
	deleting 2020-11-05-CU-nochange.csv because its hash is not recorded but it is uploaded to Zoltar...
	delete job for 2020-11-05-CU-nochange.csv enqueued
current model: RobertWalraven-E