In [None]:

Ways to write DAG for Scheduling
https://airflow.apache.org/docs/apache-airflow/stable/authoring-and-scheduling/cron.html

schedule_interval
Scheduling intervals using presets
Airflow provides a set of predefined intervals for common schedules. These are shortcuts that correspond to specific cron expressions.


### Given the example:
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    description="SOME_DAG_DESC"
) as dag:
     

"""
@once: Run the DAG only once as soon as the DAG is triggered.
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@once",
    description="SOME_DAG_DESC"
) as dag:
     

"""
@hourly: Run the DAG once every hour.
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@hourly",
    description="SOME_DAG_DESC"
) as dag:
     

"""
@daily: Run the DAG once every day at midnight (00:00).
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@daily",
    description="SOME_DAG_DESC"
) as dag:
     

"""
@weekly: Run the DAG once a week on Sunday at midnight (00:00).
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@weekly",
    description="SOME_DAG_DESC"
) as dag:
     

"""
@monthly: Run the DAG once a month on the first day of the month at midnight (00:00).
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@monthly",
    description="SOME_DAG_DESC"
) as dag:
     

"""
@yearly or @annually: Run the DAG once a year on January 1st at midnight (00:00).
"""
with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval="@yearly",
    description="SOME_DAG_DESC"
) as dag:
     
Scheduling intervals using timedelta
You can use Python's timedelta to specify intervals in terms of hours, days, or other time units. This allows for flexible and programmatic scheduling.


"""
Run every hour:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=timedelta(hours=1),
    description="SOME_DAG_DESC"
) as dag:
     

"""
Run every day (equivalent to @daily):
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    description="SOME_DAG_DESC"
) as dag:
     

"""
Run every week (equivalent to @weekly):
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=timedelta(weeks=1),
    description="SOME_DAG_DESC"
) as dag:
     

"""
Run every month (approximated as 30 days):
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=timedelta(days=30),
    description="SOME_DAG_DESC"
) as dag:
     

"""
Run every 15 minutes (custom interval):
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=timedelta(minutes=15),
    description="SOME_DAG_DESC"
) as dag:
     
Scheduling intervals using CRON
Cron expressions offer the most control and precision for scheduling. They are composed of 5 fields representing:

Minute (0-59)

Hour (0-23)

Day of the month (1-31)

Month (1-12)

Day of the week (0-6) (Sunday to Saturday)


"""
Every day at 9 AM:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval='0 9 * * *',
    description="SOME_DAG_DESC"
) as dag:
     

"""
Every Monday at 6 PM:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval='0 18 * * 1',
    description="SOME_DAG_DESC"
) as dag:
     

"""
Every 15 minutes:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval='*/15 * * * *',
    description="SOME_DAG_DESC"
) as dag:
     

"""
First day of every month at midnight:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval='0 0 1 * *',
    description="SOME_DAG_DESC"
) as dag:
     
start_date
The start_date defines when the DAG should start executing. It tells Airflow when to begin scheduling the DAG, and it’s required for all DAGs.

Start Date using datetime

"""
Fixed start date:
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    start_date=datetime(2024, 9, 12, 10, 0), # Start on September 12, 2024, at 10:00 AM
    description="SOME_DAG_DESC"
) as dag:
     
Start Date using days_ago

"""
Relative start date using days_ago:

The days_ago(n) function is a helper function that sets the start date relative to the current time. 
It's a flexible way to avoid hardcoding the date.
"""
from airflow.utils.dates import days_ago

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    start_date=days_ago(3),
    description="SOME_DAG_DESC"
) as dag:
     
end_date
The end_date defines when the DAG should stop being scheduled. Once the end_date is reached, Airflow will no longer schedule new runs for that DAG, even if the schedule_interval suggests more runs.

End Date using datetime

"""
Fixed end date:

Specify a hard end date using the datetime module.
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    start_date=datetime(2024, 9, 12, 10, 0), # Start on September 12, 2024, at 10:00 AM
    end_date=datetime(2024, 12, 31), # Stop scheduling the DAG after December 31, 2024
    description="SOME_DAG_DESC"
) as dag:
     

"""
No end date:

If you don’t set an end_date, the DAG will continue to run indefinitely according to the schedule_interval.
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    start_date=datetime(2024, 9, 12, 10, 0), # Start on September 12, 2024, at 10:00 AM
    end_date=datetime(2024, 9, 30, 18, 0), # End the DAG at 6:00 PM on September 30, 2024
    description="SOME_DAG_DESC"
) as dag:
     
To never end the pipeline using None

"""
No end date:

If you don’t set an end_date, the DAG will continue to run indefinitely according to the schedule_interval.
"""
from datetime import timedelta

with DAG(
    dag_id='SOME_DAG_ID',
    default_args=default_args,
    schedule_interval=None,
    start_date=datetime(2024, 9, 12, 10, 0), # Start on September 12, 2024, at 10:00 AM
    end_date=None, # Stop scheduling the DAG after December 31, 2024
    description="SOME_DAG_DESC"
) as dag:
     