# Homework Assignment 2

In [5]:
from datetime import date, datetime, timedelta
from pathlib import Path
import random
import os

### Date Logic

In [8]:
# Generator function to find and store all dates inside of the given range, inclusive the start and end date 
def daterange(start, end):
    
    current = start
    while current <= end:
        yield current
        current += timedelta(days=1)

### Status Check

In [11]:
# If necessary, a new directory will be created
BASE_DIR = Path("processed_data")
BASE_DIR.mkdir(exist_ok=True)

# Checks whether a date has already been processed
def is_day_processed(day: date) -> bool:
    # A date is considered processed if a folder exists for it
    return (BASE_DIR / day.isoformat()).exists()

### Mock-Processing

In [14]:
# Mock-processing that simulates download, processing and archiving of daily files
def mock_process_day(day, fail_probability=0.2):
    print(f"→ Processing {day}")

    # Simulating a random error during download
    if random.random() < fail_probability:
        raise RuntimeError("Mock download failed")

    # Save the mock files
    day_dir = BASE_DIR / day.isoformat()
    day_dir.mkdir(parents=True, exist_ok=True)

    with open(day_dir / "done.txt", "w") as f:
        f.write(f"Processed on {datetime.now()}\n")

    print(f"✓ {day} successfully processed")

### Find oldest missing day

In [17]:
# Find the oldest missing day of a given date range 
def find_oldest_missing_day(start, end) -> date | None:
    for d in daterange(start, end):
        # Use the established logic
        if not is_day_processed(d):
            return d
    return None

### Find oldest processed day

In [20]:
# Find the oldest fully processed day in the directory
def find_oldest_processed_day(processed_dir, date_format):
    
    oldest_date = None
    
    for entry in os.listdir(processed_dir):
        folder_path = os.path.join(processed_dir, entry)
        if not os.path.isdir(folder_path):
            continue  # Skip files, just looking for folders
        
        try:
            folder_date = datetime.strptime(entry, date_format).date() # Extract date from folder name
        except ValueError:
            # Ignore files that do not match the given date format
            continue

        # Check whether “Done.txt” exists in the folder
        done_file = os.path.join(folder_path, "Done.txt")
        if not os.path.isfile(done_file):
            continue  # Date has not been fully processed
            
        if oldest_date is None or folder_date < oldest_date:
            oldest_date = folder_date

    return oldest_date

### Central Workflow

In [23]:
# Central control flow that handles the daily data
def run_daily_workflow(
    start_date = None,
    end_date = None,
    target_date = None, 
    # ensure proper functionaility when script is executed w/o arguments
):

    # If the function is called without start_date, find a suitable start date
    if start_date is None:
        start_date = find_oldest_processed_day(BASE_DIR, "%Y-%m-%d")
    
    # If the function is called without end_date, find a suitable end date
    if end_date is None:
        end_date = date.today()
    
    # Check whether a target date or all days within the range should be processed
    if target_date is not None:
        print(f"Start workflow for individual target date: {target_date}")
        try:
            mock_process_day(target_date)
        except Exception as e:
            print(f"✗ Error on {target_date}: {e}")
        return

    print("Start workflow for all missing days")

    while True:
        missing_day = find_oldest_missing_day(start_date, end_date)
        if missing_day is None:
            print("✓ All days are processed")
            break

        try:
            mock_process_day(missing_day)
        except Exception as e:
            print(f"✗ Error on {missing_day}: {e}")
            print("→ Error logged, next day will be processed")

## Testing

In [25]:
# all arguments given

run_daily_workflow(    
    start_date=date(2024, 12, 1),
    end_date=date(2024, 12, 17),
)

Start workflow for all missing days
→ Processing 2024-12-01
✓ 2024-12-01 successfully processed
→ Processing 2024-12-02
✗ Error on 2024-12-02: Mock download failed
→ Error logged, next day will be processed
→ Processing 2024-12-02
✓ 2024-12-02 successfully processed
→ Processing 2024-12-03
✗ Error on 2024-12-03: Mock download failed
→ Error logged, next day will be processed
→ Processing 2024-12-03
✓ 2024-12-03 successfully processed
→ Processing 2024-12-04
✗ Error on 2024-12-04: Mock download failed
→ Error logged, next day will be processed
→ Processing 2024-12-04
✓ 2024-12-04 successfully processed
→ Processing 2024-12-05
✓ 2024-12-05 successfully processed
→ Processing 2024-12-06
✓ 2024-12-06 successfully processed
→ Processing 2024-12-07
✓ 2024-12-07 successfully processed
→ Processing 2024-12-08
✓ 2024-12-08 successfully processed
→ Processing 2024-12-09
✓ 2024-12-09 successfully processed
→ Processing 2024-12-10
✓ 2024-12-10 successfully processed
→ Processing 2024-12-11
✓ 2024-

In [27]:
# full date range given

run_daily_workflow(    
    start_date=date(2024, 12, 1),
    end_date=date(2024, 12, 17),
    target_date=date(2024, 12, 3),
)

Start workflow for individual target date: 2024-12-03
→ Processing 2024-12-03
✓ 2024-12-03 successfully processed


In [29]:
# w/o any arguments

run_daily_workflow()

Start workflow for all missing days
→ Processing 2024-12-18
✓ 2024-12-18 successfully processed
→ Processing 2024-12-19
✓ 2024-12-19 successfully processed
→ Processing 2024-12-20
✓ 2024-12-20 successfully processed
→ Processing 2024-12-21
✓ 2024-12-21 successfully processed
→ Processing 2024-12-22
✓ 2024-12-22 successfully processed
→ Processing 2024-12-23
✓ 2024-12-23 successfully processed
→ Processing 2024-12-24
✗ Error on 2024-12-24: Mock download failed
→ Error logged, next day will be processed
→ Processing 2024-12-24
✓ 2024-12-24 successfully processed
→ Processing 2024-12-25
✓ 2024-12-25 successfully processed
→ Processing 2024-12-26
✓ 2024-12-26 successfully processed
→ Processing 2024-12-27
✓ 2024-12-27 successfully processed
→ Processing 2024-12-28
✓ 2024-12-28 successfully processed
→ Processing 2024-12-29
✓ 2024-12-29 successfully processed
→ Processing 2024-12-30
✓ 2024-12-30 successfully processed
→ Processing 2024-12-31
✓ 2024-12-31 successfully processed
→ Processing 

In [31]:
# With explicit date - only process the given date 

run_daily_workflow(
    target_date=date(2024, 12, 3),
)

Start workflow for individual target date: 2024-12-03
→ Processing 2024-12-03
✓ 2024-12-03 successfully processed


In [33]:
# Controll of the status

for d in daterange(date(2024, 12, 1), date(2024, 12, 5)):
    print(d, "→", "OK" if is_day_processed(d) else "FEHLT")

2024-12-01 → OK
2024-12-02 → OK
2024-12-03 → OK
2024-12-04 → OK
2024-12-05 → OK
