In [None]:
!pip install schedule --quiet

In [None]:
import schedule
import time
import subprocess
import logging

# Set up logging for the scheduled jobs
logging.basicConfig(filename='/content/drive/My Drive/ETL_Pipeline/logs/etl_scheduler.log',
                    level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Function to run the ETL pipeline
def run_etl_pipeline():
    try:
        # Log the start time of the ETL process
        logging.info("ETL process started.")

        # Running the ETL pipeline by executing the notebook
        subprocess.run(["jupyter", "nbconvert", "--to", "notebook", "--execute", "/content/drive/My Drive/ETL_Pipeline/etl_pipeline.ipynb"], check=True)

        # Log success
        logging.info("ETL process completed successfully.")

    except Exception as e:
        # Log any errors
        logging.error(f"Error in running ETL pipeline: {str(e)}")

# Function to run the load_to_db pipeline
def run_load_to_db():
    try:
        # Log the start time of the load to DB process
        logging.info("Load to DB process started.")

        # Running the load_to_db pipeline by executing the notebook
        subprocess.run(["jupyter", "nbconvert", "--to", "notebook", "--execute", "/content/drive/My Drive/ETL_Pipeline/load_to_db.ipynb"], check=True)

        # Log success
        logging.info("Load to DB process completed successfully.")

    except Exception as e:
        # Log any errors
        logging.error(f"Error in running load_to_db pipeline: {str(e)}")

# Schedule the ETL pipeline to run once every day at midnight
schedule.every().day.at("00:00").do(run_etl_pipeline)

# Schedule the load_to_db pipeline to run once every day at 1:00 AM
schedule.every().day.at("01:00").do(run_load_to_db)

# Start the scheduler loop
while True:
    # Run all the jobs that are scheduled
    schedule.run_pending()
    time.sleep(60)  # wait for 1 minute before checking the schedule again
