In [2]:
import os, glob, sys
import configparser
import datetime
import numpy as np
import uuid
from concurrent.futures import ThreadPoolExecutor

In [3]:
class ConfigReader:
    def __init__(self):
        self.config_values = {}

    def read_config_file(self):
        
        # Navigate to the parent directory
        parent_dir = os.getcwd()

        # Specify the path to the config file in the parent directory
        #config_path = os.path.join(parent_dir, 'config.ini')

        # options for jupyter-lab
        # Get the current notebook's directory
        parent_dir = os.path.dirname(os.path.abspath(''))
        #parent_dir = os.path.dirname(notebook_directory)
        
        # Specify the path to the config file in the parent directory
        config_path = os.path.join(parent_dir, 'config.ini')
        print(config_path)

        # Read the configuration file
        config = configparser.ConfigParser()
        config.read(config_path)

        # Get options in the 'analysis' section and store in the dictionary
        for option, value in config.items('analysis'):
            self.config_values[option] = value

    def check_201_prev_days(self, start_date, num_prev_days=201):
        missing_dates = [
            date for date in (start_date - datetime.timedelta(days=i) for i in range(num_prev_days))
            if not all(
                len(glob.glob(os.path.join(self.config_values['analysis_processed_dir'], date.strftime("%Y%m%d"), "*.nc"))) == 2
            )
        ]
        print(missing_dates)
        sys.exit()
        '''
        # Process missing dates using ThreadPoolExecutor
        with ThreadPoolExecutor(max_workers=5) as executor:  # Set the number of workers as needed
            futures = {executor.submit(self.retrieve_analysis_data, missing_date): missing_date for missing_date in missing_dates}

        # Check if tasks are completed
        completed_tasks = [future for future in futures if future.done()]
        print(f"Completed tasks: {len(completed_tasks)} out of {len(missing_dates)}")
        '''
        for date in missing_dates:
            self.retrieve_analysis_data(date)

    def retrieve_analysis_data(self, date):
        print('Retrieving data for date:', date)
        for varname in ['olr', 'u850', 'u200', 'v850']:
            self.retrieve_var_data(date, varname)
        print('Data retrieval complete.')

    def retrieve_var_data(self, date, varname):
        query_files_dir = self.config_values['analysis_queryfiles']
        moosedir = os.path.join(self.config_values['analysis_moose_dir'], f'{str(date.year)}.pp')
        fc_times = np.arange(0, 168, 12)
        hr_list = ['00', '12']
        
        for hr in hr_list:
            remote_data_dir = os.path.join(self.config_values['analysis_raw_dir'], varname, 
                                           str(date.year), f'{date.month:02d}', f'{date.day:02d}')
            if not os.path.exists(remote_data_dir):
                os.makedirs(remote_data_dir)
            for fc in fc_times:
                self.retrieve_fc_data(date, varname, hr, fc, remote_data_dir, moosedir)

    def retrieve_fc_data(self, date, varname, hr, fc, remote_data_dir, moosedir):
        fct = f'{fc:03d}' if fc != 0 else '003'
        filemoose = f'prods_op_gl-mn_{date.strftime("%Y%m%d")}_{hr}_{fct}.pp'
        if date >= datetime.datetime(2018, 9, 25):
            filemoose = f'prods_op_gl-mn_{date.strftime("%Y%m%d")}_{hr}_{fct}.pp'
        outfile = f'qg{hr}T{fct}.pp'
        file_moose = os.path.join(moosedir, filemoose)
        local_query_file1 = os.path.join(self.config_values['analysis_dummy_queryfiles_dir'], 
                                         f'localquery_{varname}_{uuid.uuid1()}')
        self.create_query_file(varname, local_query_file1, filemoose, fct)

        #if not self.check_retrieval_complete(outfile, remote_data_dir):
        self.retrieve_missing_data(local_query_file1, moosedir, outfile, remote_data_dir)

    def create_query_file(self, varname, local_query_file1, filemoose, fct):
        query_files_dir = self.config_values['analysis_queryfiles']
        replacements = {'fctime': fct, 'filemoose': filemoose}
        query_file = os.path.join(query_files_dir, varname)
        with open(query_file) as query_infile, open(local_query_file1, 'w') as query_outfile:
            for line in query_infile:
                for src, target in replacements.items():
                    line = line.replace(src, target)
                query_outfile.write(line)

    def check_retrieval_complete(self, outfile, remote_data_dir):
        outfile_path = os.path.join(remote_data_dir, outfile)
        return os.path.exists(outfile_path) and os.path.getsize(outfile_path) > 0

    def retrieve_missing_data(self, local_query_file1, moosedir, outfile, remote_data_dir):
        command = f'/opt/moose-client-wrapper/bin/moo select {local_query_file1} {moosedir} {os.path.join(remote_data_dir, outfile)}'
        os.system(command)
        print(command)

        # Consider using subprocess to capture output/error instead of os.system()
        # subprocess.run(command, shell=True)
        # You can also handle the specific exceptions to avoid a sudden exit (sys.exit())


In [4]:
# Usage:
reader = ConfigReader()
reader.read_config_file()
values = reader.check_201_prev_days(datetime.datetime(2023, 11, 5), num_prev_days=2)
#print(values)

/net/home/h03/hadpx/MJO/Monitoring_new/config.ini


TypeError: 'bool' object is not iterable