**Import Dependencies**  
   - Use `re` for regex-based header detection.  
   - Use `pandas` to load and manipulate the sheet.  
   - Use `datetime` to parse time strings.  
   - Use `Path` to build file paths dynamically.

In [None]:
import pandas as pd
from datetime import datetime
from itertools import chain
from pathlib import Path
import re

 **Define Constants & File Path**  
   - Map DUoS colours (`green`, `amber`, `red`) to integer codes.  
   - Locate `ch2.xlsx` under `data/` without hard-coding absolute paths.

In [None]:
# dynamically build the path
base_path = Path().resolve()
ch2_path  = base_path / "data" / "ch2.xlsx"

# read the Excel sheet (adjust sheet_name/header as needed)
raw = pd.read_excel(
    ch2_path,
    sheet_name="Annex 1 LV and HV charges",
    header=None,
    dtype=str
)
# then mapping
DUOS_CATEGORY = {"green": 0, "amber": 1, "red": 2}

**Helper: `parse_ranges`**  
   - Split multi-line cells like  
     ```
     00:00 - 07:00
     11:00 - 14:00
     ```  
     into a list of `(start_str, end_str)` tuples.  
     
   - Normalize any `24:00` end times to `00:00` to allow standard parsing.

In [None]:
def parse_ranges(text):
    """Split "HH:MM - HH:MM\n…" into [(start, end), …], normalizing 24:00, 00:00."""
    if pd.isna(text) or not str(text).strip():
        return []
    out = []
    for part in str(text).splitlines():
        s, e = [t.strip() for t in part.split('-')]
        if e == '24:00':
            e = '00:00'
        out.append((s, e))
    return out


**Helper: `find_anchor_row`**  
   - Scan each row to find where any cell matches “Time periods” (case-insensitive).  
   - Return that row’s index for subsequent slicing.

In [None]:
def find_anchor_row(df, anchor="Time periods"):
    """Locate the row index where any cell matches anchor (case-insensitive)."""
    pattern = re.compile(re.escape(anchor), flags=re.IGNORECASE)
    mask = df.apply(lambda row: row.astype(str).str.contains(pattern).any(), axis=1)
    if not mask.any():
        raise ValueError(f"Anchor '{anchor}' not found in sheet.")
    return mask.idxmax()

**Helper: `detect_block_columns`**  
   - On the anchor row (or immediately below), detect which columns correspond to "Period", "Red", "Amber", and "Green" by matching cell text via regex.  
   - This guards against extra blank columns or different capitalizations.

In [None]:
def detect_block_columns(header_row, anchor_label="Time periods"):
    cols = {}
    # period_label col
    for i, cell in header_row.items():
        if isinstance(cell, str) and anchor_label.lower() in cell.lower():
            cols['period'] = i
            break
    else:
        raise ValueError(f"Could not find period column matching '{anchor_label}'")
    # red, amber, green
    for colour in ('red','amber','green'):
        for i, cell in header_row.items():
            if isinstance(cell, str) and colour in cell.lower():
                cols[colour] = i
                break
        else:
            raise ValueError(f"Could not find '{colour}' column")
    return cols

**Helper: `build_records`**

- Given lists of weekday segments for red/amber/green and a single weekend green segment, interleave them in this specific order:  
  1. Weekday red, amber, green  
  2. Weekend green  
  3. Any remaining weekday segments  
- Convert the time‐string tuples into `datetime.time` objects and return a tidy DataFrame.
- **Note:** Initially I tried to reshape by exploding the lines by color, but that didn’t reproduce the exact order shown in the HTML example. After a bit of thinking, I realized I needed this custom interleaving format was needed to match the required output.

In [None]:
def build_records(red_wd, amber_wd, green_wd, weekend):
    records = []
    max_seg = max(len(red_wd), len(amber_wd), len(green_wd))
    for i in range(max_seg):
        if i < len(red_wd):
            s,e = red_wd[i]
            records.append({
                'weekdays':   True,
                'colour':     DUOS_CATEGORY['red'],
                'start_time': datetime.strptime(s, "%H:%M").time(),
                'end_time':   datetime.strptime(e, "%H:%M").time(),
            })
        if i < len(amber_wd):
            s,e = amber_wd[i]
            records.append({
                'weekdays':   True,
                'colour':     DUOS_CATEGORY['amber'],
                'start_time': datetime.strptime(s, "%H:%M").time(),
                'end_time':   datetime.strptime(e, "%H:%M").time(),
            })
        if i < len(green_wd):
            s,e = green_wd[i]
            records.append({
                'weekdays':   True,
                'colour':     DUOS_CATEGORY['green'],
                'start_time': datetime.strptime(s, "%H:%M").time(),
                'end_time':   datetime.strptime(e, "%H:%M").time(),
            })
        if i == 0 and weekend:
            s,e = weekend[0]
            records.append({
                'weekdays':   False,
                'colour':     DUOS_CATEGORY['green'],
                'start_time': datetime.strptime(s, "%H:%M").time(),
                'end_time':   datetime.strptime(e, "%H:%M").time(),
            })
    return pd.DataFrame(records)

**Main Execution Flow**  
   1. Find the “Time periods” anchor row.  
   2. Detect the four relevant columns.  
   3. Slice out the two rows below (weekday vs weekend).  
   4. Parse each band’s time ranges.  
   5. Build and return the final DataFrame of eight rows.  

In [None]:
# Main flow

hdr = find_anchor_row(raw, anchor="Time periods")
header = raw.iloc[hdr]
cols   = detect_block_columns(header, anchor_label="Time periods")

# Extract the two rows (weekdays then weekend)
block = raw.iloc[hdr+1:hdr+3, [
    cols['period'], cols['red'], cols['amber'], cols['green']
]].copy()
block.columns = ['period_label','red','amber','green']
block['weekdays'] = block['period_label'].str.contains('Monday', case=False, na=False)

# Parse ranges
wd = block.iloc[0]; we = block.iloc[1]
red_wd   = parse_ranges(wd['red'])
amber_wd = parse_ranges(wd['amber'])
green_wd = parse_ranges(wd['green'])
weekend  = parse_ranges(we['green'])

# Build final DataFrame
df = build_records(red_wd, amber_wd, green_wd, weekend)

df

Unnamed: 0,weekdays,colour,start_time,end_time
0,True,2,11:00:00,14:00:00
1,True,1,07:00:00,11:00:00
2,True,0,00:00:00,07:00:00
3,False,0,00:00:00,00:00:00
4,True,2,16:00:00,19:00:00
5,True,1,14:00:00,16:00:00
6,True,0,23:00:00,00:00:00
7,True,1,19:00:00,23:00:00
