"""
Exploration of HORARIS.csv

This notebook explores the HORARIS.csv file, which contains details about the 
different time schedules applicable to surface parking spots in Barcelona.
"""


In [1]:

import pandas as pd
import os


In [2]:

# --- Configuration ---
DATA_DIR = "../data/raw" # Adjust if your notebook structure is different
HORARIS_FILE = os.path.join(DATA_DIR, "HORARIS.csv")


In [3]:
# --- Load Data ---
print(f"Loading data from: {HORARIS_FILE}")
try:
    df_horaris = pd.read_csv(HORARIS_FILE)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: File not found at {HORARIS_FILE}")
    print("Please ensure the data ingestion script has run successfully.")
    # Exit or handle error appropriately in a real script
    df_horaris = pd.DataFrame() # Create empty df to avoid subsequent errors

Loading data from: ../data/raw/HORARIS.csv
Data loaded successfully.


In [4]:
# --- Initial Inspection ---
if not df_horaris.empty:
    print("\n--- First 5 Rows ---")
    print(df_horaris.head())

    print("\n--- DataFrame Info ---")
    df_horaris.info()

    print("\n--- Descriptive Statistics ---")
    # Include 'all' to get stats for non-numeric columns too
    print(df_horaris.describe(include='all'))

    print("\n--- Missing Values per Column ---")
    print(df_horaris.isnull().sum())

    print("\n--- Unique Values in Key Columns ---")
    # Check ID_HORARI as it's the primary key we expect
    if 'ID_HORARI' in df_horaris.columns:
        print(f"Unique ID_HORARI values: {df_horaris['ID_HORARI'].nunique()}")
        print(f"Total rows: {len(df_horaris)}")
        # If the number of unique IDs matches the total rows, ID_HORARI is likely a unique identifier
        if df_horaris['ID_HORARI'].nunique() == len(df_horaris):
             print("ID_HORARI appears to be a unique identifier for each schedule.")
        else:
             print("WARN: ID_HORARI might have duplicate values.")
             # You might want to investigate duplicates here
             # print(df_horaris[df_horaris.duplicated(subset=['ID_HORARI'], keep=False)])
    else:
        print("Column 'ID_HORARI' not found.")

    # Check unique values in description columns to understand variety
    if 'DESCRIPCIO' in df_horaris.columns:
        print(f"Unique DESCRIPCIO values: {df_horaris['DESCRIPCIO'].nunique()}")
    if 'DESC_CURTA' in df_horaris.columns:
        print(f"Unique DESC_CURTA values: {df_horaris['DESC_CURTA'].nunique()}")

    # Add more exploration steps below as needed

else:
    print("\nDataFrame is empty, cannot perform inspection.")


# --- TODO: Further Analysis ---
# - Analyze the structure of the DESCRIPCIO field. Can it be parsed?
# - Check data types - should any columns be numeric or categorical?
# - Look for patterns or common schedule types.
# - Visualize schedule distributions if meaningful.

print("\n--- End of Initial Exploration ---")


--- First 5 Rows ---
   ID_HORARI CODI_HORARI                                         DESCRIPCIO  \
0          1          H1    de Dl a Ds de 09:00 a 14:00 i de 16:00  a 20:00   
1          2          H2    de Dl a Dv de 09:00 a 14:00 i de 16:00  a 20:00   
2          3          H3  de Dl a Dv de 09:00 a 14:00 i de 16:00  a 20:0...   
3          6          H6                        de Dl a Dv de 09:00 a 14:00   
4          7          H7    de Dl a Dv de 09:30 a 14:00 i de 16:30  a 20:00   

                        DESC_CURTA  INCLUS_FESTIUS  PARQUING_SOLS_DINS_HORARI  
0          de Dl a Ds 9-14 i 16-20               0                          0  
1          de Dl a Dv 9-14 i 16-20               0                          0  
2  de Dl a Dv 9-14 i 16-20 Ds 9-14               0                          0  
3                  de Dl a Dv 9-14               0                          0  
4    de Dl a Dv 9:30-14 i 16:30-20               0                          0  

--- DataFrame Info ---

In [5]:
# --- Further Analysis ---

print("\n--- Converting Flag Columns ---")
# Convert flag columns to boolean for better semantic meaning
try:
    df_horaris['INCLUS_FESTIUS'] = df_horaris['INCLUS_FESTIUS'].astype(bool)
    df_horaris['PARQUING_SOLS_DINS_HORARI'] = df_horaris['PARQUING_SOLS_DINS_HORARI'].astype(bool)
    print("Converted 'INCLUS_FESTIUS' and 'PARQUING_SOLS_DINS_HORARI' to boolean.")
    # Display value counts for the boolean flags
    print("\nValue counts for INCLUS_FESTIUS (True means applies on holidays):")
    print(df_horaris['INCLUS_FESTIUS'].value_counts())
    print("\nValue counts for PARQUING_SOLS_DINS_HORARI (True means parking ONLY allowed during schedule):")
    print(df_horaris['PARQUING_SOLS_DINS_HORARI'].value_counts())
except KeyError as e:
    print(f"Error converting flags: Column {e} not found.")

print("\n--- Examining High ID_HORARI Values ---")
# Let's check the row(s) with ID_HORARI > 1000 (arbitrary threshold based on stats)
high_id_threshold = 1000 
df_high_ids = df_horaris[df_horaris['ID_HORARI'] > high_id_threshold]
if not df_high_ids.empty:
    print(f"Found {len(df_high_ids)} rows with ID_HORARI > {high_id_threshold}:")
    print(df_high_ids)
else:
    print(f"No rows found with ID_HORARI > {high_id_threshold}.")
# Based on the initial describe, there seems to be one very large ID
print("\nSpecifically checking the maximum ID_HORARI:")
print(df_horaris[df_horaris['ID_HORARI'] == df_horaris['ID_HORARI'].max()])


print("\n--- Checking for Duplicate DESCRIPCIO ---")
# Find descriptions that appear more than once
duplicated_descriptions = df_horaris[df_horaris.duplicated(subset=['DESCRIPCIO'], keep=False)]
if not duplicated_descriptions.empty:
    print(f"Found {duplicated_descriptions['DESCRIPCIO'].nunique()} descriptions shared by multiple ID_HORARI values.")
    # Sort by description to group them together for easier comparison
    print(duplicated_descriptions.sort_values(by='DESCRIPCIO'))
else:
    print("No duplicate descriptions found across different ID_HORARI.")


print("\n--- Sample of Unique DESCRIPCIO values ---")
# Display the first 20 unique descriptions to understand the format
unique_desc = df_horaris['DESCRIPCIO'].unique()
print(f"Total unique descriptions: {len(unique_desc)}")
print("First 20 unique descriptions:")
for i, desc in enumerate(unique_desc[:20]):
    print(f"{i+1}. {desc}")

# --- TODO: Next Steps ---
# - Develop a strategy to parse the DESCRIPCIO strings into structured data 
#   (e.g., days of week, start time 1, end time 1, start time 2, end time 2)
# - This might involve regular expressions or complex string splitting.
# - Validate the parsed data against DESC_CURTA and the flag columns.

print("\n--- End of Further Analysis ---")


--- Converting Flag Columns ---
Converted 'INCLUS_FESTIUS' and 'PARQUING_SOLS_DINS_HORARI' to boolean.

Value counts for INCLUS_FESTIUS (True means applies on holidays):
INCLUS_FESTIUS
False    89
True     16
Name: count, dtype: int64

Value counts for PARQUING_SOLS_DINS_HORARI (True means parking ONLY allowed during schedule):
PARQUING_SOLS_DINS_HORARI
False    71
True     34
Name: count, dtype: int64

--- Examining High ID_HORARI Values ---
Found 3 rows with ID_HORARI > 1000:
     ID_HORARI CODI_HORARI                    DESCRIPCIO  \
102       9999         FUT  de Dl a Dv  de 09:30 a 13:00   
103   99999888         H99  de Dl a Dv  de 14:00 a 20:00   
104   99999999         Z99  de Dl a Dv  de 08:00 a 14:00   

                       DESC_CURTA  INCLUS_FESTIUS  PARQUING_SOLS_DINS_HORARI  
102  de Dl a Dv  de 09:30 a 13:00           False                      False  
103          de Dl a Dv  de 14-20           False                       True  
104           de Dl a Dv  de 8-14     

In [7]:
# --- Revised Schedule Description Parsing ---

import re

print("\n--- Revised Parsing Schedule Descriptions ---")

# Regex to find day specifiers like 'de Dl a Dv', 'Ds', 'Diumenges i festius' etc.
# More comprehensive pattern
day_pattern = re.compile(
    r"""
    (?:^|\,\s*|;\s*|\.\s*)  # Start of string, or after comma/semicolon/period + optional space
    (                       # Start capturing group 1 (full day spec)
      (?:de\s+)?            # Optional 'de ' prefix
      (                     # Start capturing group 2 (the actual days part)
        (?:Dl\s+a\s+Dg|Dl\s+a\s+Ds|Dl\s+a\s+Dv|   # Day ranges
         Dl|Dt|Dc|Dj|Dv|Ds|Dg|                  # Single days
         Dilluns|Dimarts|Dimecres|Dijous|Divendres|Dissabte|Diumenges| # Full day names
         Diumenges\s+i\s+festius|                 # Sundays and holidays
         festius)                               # Holidays alone
         (?:[\s\-]*(?:inclòs|excepte)[\s\-]*(?:festius|Fútbol))? # Optional modifiers
      )
    )
    """, 
    re.VERBOSE | re.IGNORECASE
)

# Regex to find time patterns (HH:MM a HH:MM)
time_pattern = re.compile(r'(\d{1,2}:\d{2})\s+a\s+(\d{1,2}:\d{2})')

def parse_schedule_description_revised(desc):
    """
    Revised attempt to parse the schedule description string.
    Returns a list of dictionaries, each representing a rule part.
    """
    parsed_rules = []
    
    # Normalize: remove extra spaces, handle 'i de' for easier regex
    desc = re.sub(r'\s{2,}', ' ', desc).strip()
    desc = desc.replace(' i de ', ', i ') # Facilitates finding second time slot

    # Find all day specifications
    day_matches = list(day_pattern.finditer(desc))
    
    # Find all time specifications
    time_matches = list(time_pattern.finditer(desc))

    current_rule = {}
    time_idx = 0

    # Iterate through day matches to structure rules
    for i, day_match in enumerate(day_matches):
        days_str = day_match.group(2).strip()
        rule_part_start = day_match.start()
        # Determine the end of this day rule's text (start of next day rule, or end of string)
        rule_part_end = day_matches[i+1].start() if i + 1 < len(day_matches) else len(desc)
        
        # Extract the substring relevant to this day rule
        rule_text = desc[rule_part_start:rule_part_end]

        # Find times *within* this rule's text substring
        times_in_rule = list(time_pattern.finditer(rule_text))
        
        time1 = times_in_rule[0].groups() if len(times_in_rule) > 0 else None
        time2 = times_in_rule[1].groups() if len(times_in_rule) > 1 else None
        
        # Fix for TypeError: Initialize days_str correctly
        current_days_str = days_str if days_str else 'Unknown' # Ensure it's a string

        # Check for holiday inclusion in the text (handle potential NoneType)
        if 'inclòs festius' in rule_text.lower():
             current_days_str += ' (Festius incl.)'
        
        parsed_rules.append({
            'days_str': current_days_str,
            'time1': time1,
            'time2': time2,
            'raw_part': rule_text.strip() 
        })

    # If no day parts were found, try a simpler global time extraction
    if not day_matches and time_matches:
         time1 = time_matches[0].groups() if len(time_matches) > 0 else None
         time2 = time_matches[1].groups() if len(time_matches) > 1 else None
         # Attempt to infer days context (very basic)
         days_str = 'Unknown (No day pattern matched)'
         if 'inclòs festius' in desc.lower():
              days_str += ' (Festius incl.)'

         parsed_rules.append({
            'days_str': days_str,
            'time1': time1,
            'time2': time2,
            'raw_part': desc
         })
         
    # Fallback if nothing meaningful found
    if not parsed_rules and desc:
         return [{'days_str': 'Fallback - Check Manually', 'time1': None, 'time2': None, 'raw_part': desc}]

    return parsed_rules if parsed_rules else None # Return None if truly empty


# Apply the revised parsing function
def safe_parse_revised(desc):
    try:
        return parse_schedule_description_revised(desc)
    except Exception as e:
        # Log the error with description for debugging
        print(f"ERROR parsing '{desc}': {type(e).__name__} - {e}")
        return None 

if 'DESCRIPCIO' in df_horaris.columns:
    df_horaris['parsed_schedule_revised'] = df_horaris['DESCRIPCIO'].apply(safe_parse_revised)

    print("\n--- Sample of Revised Parsed Schedules ---")
    # Use the same sample indices if you want to compare, or get a new sample
    if 'sample_indices' not in locals(): # Generate sample indices if they don't exist
         sample_indices = df_horaris.sample(min(15, len(df_horaris))).index
         
    for index in sample_indices:
        original_desc = df_horaris.loc[index, 'DESCRIPCIO']
        # Access the potentially new column, handle if it wasn't created
        parsed_result = df_horaris.get('parsed_schedule_revised', {}).get(index, "Parsing Skipped/Failed") 
        print(f"\nOriginal: {original_desc}")
        print(f"Revised Parsed: {parsed_result}")

    # Check parsing summary again
    failed_parses = df_horaris['parsed_schedule_revised'].isnull().sum()
    fallback_parses = sum(1 for p in df_horaris['parsed_schedule_revised'].dropna() if isinstance(p, list) and len(p) == 1 and p[0]['days_str'].startswith('Fallback'))
    # Re-calculate successful parses
    successful_parses = 0
    for p in df_horaris['parsed_schedule_revised'].dropna():
         if isinstance(p, list) and len(p) > 0 and not p[0]['days_str'].startswith('Fallback'):
              successful_parses += 1
              
    print(f"\nRevised Parsing Summary:")
    print(f"- Failed to parse (errors or returned None): {failed_parses}")
    print(f"- Used Fallback: {fallback_parses}")
    print(f"- Successfully parsed (at least partially): {successful_parses} / {len(df_horaris)}")

else:
    print("Column 'DESCRIPCIO' not found, skipping parsing.")
    
print("\n--- End of Revised Schedule Parsing Attempt ---")


--- Revised Parsing Schedule Descriptions ---

--- Sample of Revised Parsed Schedules ---

Original: de Dl a Dv  de 08:00 a 20:00 exclusiu resident de 0 a 24
Revised Parsed: [{'days_str': 'Dl a Dv', 'time1': ('08:00', '20:00'), 'time2': None, 'raw_part': 'de Dl a Dv de 08:00 a 20:00 exclusiu resident de 0 a 24'}]

Original: de Dl a Ds  de 08:00 a 11:00
Revised Parsed: [{'days_str': 'Dl a Ds', 'time1': ('08:00', '11:00'), 'time2': None, 'raw_part': 'de Dl a Ds de 08:00 a 11:00'}]

Original: de Dl a Dv de 06:00 a 20:00
Revised Parsed: [{'days_str': 'Dl a Dv', 'time1': ('06:00', '20:00'), 'time2': None, 'raw_part': 'de Dl a Dv de 06:00 a 20:00'}]

Original: de Dl a Dv de 09:00 a 20:00, Ds de 09:00 a 14:00
Revised Parsed: [{'days_str': 'Dl a Dv', 'time1': ('09:00', '20:00'), 'time2': None, 'raw_part': 'de Dl a Dv de 09:00 a 20:00'}, {'days_str': 'Ds', 'time1': ('09:00', '14:00'), 'time2': None, 'raw_part': ', Ds de 09:00 a 14:00'}]

Original: de Dl a Dv de 07:00 a 20:00, Ds de 07:00 a 14:

In [8]:
# --- Convert Parsed Schedules to Structured Format ---

import datetime

print("\n--- Converting Parsed Data to Structured Format ---")

# Mapping from abbreviations/ranges to day numbers (Monday=0, Sunday=6)
day_mapping = {
    'Dl': {0}, 'Dt': {1}, 'Dc': {2}, 'Dj': {3}, 'Dv': {4}, 'Ds': {5}, 'Dg': {6},
    'Dilluns': {0}, 'Dimarts': {1}, 'Dimecres': {2}, 'Dijous': {3}, 'Divendres': {4}, 'Dissabte': {5}, 'Diumenges': {6},
    'Dl a Dv': set(range(5)), # Mon-Fri
    'Dl a Ds': set(range(6)), # Mon-Sat
    'Dl a Dg': set(range(7)), # Mon-Sun
    'Diumenges i festius': {6}, # Treat as Sunday for now, handle holidays separately
    'festius': set() # Represents holidays, handle via flag
}

def structure_parsed_rules(parsed_list, original_includes_festius_flag):
    """
    Converts the list of parsed rule dictionaries into a list of structured tuples.
    Each tuple: (set_of_days:int, start1:time, end1:time, start2:time|None, end2:time|None, applies_holidays:bool)
    """
    if not isinstance(parsed_list, list):
        return None # Handle cases where parsing failed or returned None

    structured_rules = []
    for rule_part in parsed_list:
        if not isinstance(rule_part, dict): continue # Skip if format is unexpected

        days_str = rule_part.get('days_str', '').strip()
        time1_tuple = rule_part.get('time1')
        time2_tuple = rule_part.get('time2')

        # Initialize defaults
        days_set = set()
        applies_holidays = original_includes_festius_flag # Start with the original flag

        # --- Map Days ---
        # Clean up the string before mapping
        cleaned_days_str = days_str.split('(')[0].strip() # Remove "(Festius incl.)" part for lookup
        
        # Prioritize direct map lookup
        if cleaned_days_str in day_mapping:
            days_set = day_mapping[cleaned_days_str]
        else:
            # Fallback: Check if it's a single day abbrev/name not caught? Unlikely with current regex
            pass # Add more complex logic if needed

        # --- Determine Holiday Applicability ---
        # Use the flag AND check if the text specifically mentioned festius/holidays
        if 'festius' in days_str.lower():
            applies_holidays = True
        # If the rule text was JUST 'festius', the days_set will be empty, rely only on flag
        if cleaned_days_str.lower() == 'festius':
             days_set = set() # Explicitly represents 'only holidays' if needed? Or rely on flag? Let's use flag.

        # --- Convert Times ---
        def time_from_str(time_str):
             if not time_str: return None
             try:
                 # Handle potential 24:00 -> wrap to 23:59:59 for time objects or handle specially
                 if time_str == "24:00": 
                     # Option 1: Use near-midnight
                     # return datetime.time(23, 59, 59) 
                     # Option 2: Return a special marker, e.g. "24:00" string
                     return "24:00" 
                 # Option 3: Use timedelta if comparing durations
                 hour, minute = map(int, time_str.split(':'))
                 return datetime.time(hour, minute)
             except ValueError:
                 print(f"Warning: Could not parse time string '{time_str}'")
                 return None

        start1, end1 = None, None
        if time1_tuple:
            start1 = time_from_str(time1_tuple[0])
            end1 = time_from_str(time1_tuple[1])
            
        start2, end2 = None, None
        if time2_tuple:
            start2 = time_from_str(time2_tuple[0])
            end2 = time_from_str(time2_tuple[1])

        # Add the structured rule if it has days or times
        if days_set or start1:
            structured_rules.append(
                (days_set, start1, end1, start2, end2, applies_holidays)
            )

    return structured_rules if structured_rules else None


# --- Apply the structuring function ---
# We need to pass both the parsed list and the original flag
if 'parsed_schedule_revised' in df_horaris.columns and 'INCLUS_FESTIUS' in df_horaris.columns:
    
    structured_results = []
    for index, row in df_horaris.iterrows():
        parsed_list = row['parsed_schedule_revised']
        original_flag = row['INCLUS_FESTIUS']
        structured_rule = structure_parsed_rules(parsed_list, original_flag)
        structured_results.append(structured_rule)
        
    df_horaris['structured_schedule'] = structured_results

    print("\n--- Sample of Structured Schedules ---")
    # Display original, revised parsed, and final structured for comparison
    if 'sample_indices' not in locals(): # Regenerate if needed
         sample_indices = df_horaris.sample(min(15, len(df_horaris))).index
         
    for index in sample_indices:
        print(f"\nID_HORARI: {df_horaris.loc[index, 'ID_HORARI']}")
        print(f"Original:  {df_horaris.loc[index, 'DESCRIPCIO']}")
        # print(f"Parsed:    {df_horaris.loc[index, 'parsed_schedule_revised']}") # Optional: uncomment to see intermediate
        print(f"Structured:{df_horaris.loc[index, 'structured_schedule']}")

    # Check how many failed structuring (resulted in None)
    failed_structuring = df_horaris['structured_schedule'].isnull().sum()
    print(f"\nStructuring Summary:")
    print(f"- Failed to structure (returned None): {failed_structuring}")
    print(f"- Successfully structured: {len(df_horaris) - failed_structuring} / {len(df_horaris)}")

else:
    print("Required columns ('parsed_schedule_revised', 'INCLUS_FESTIUS') not found. Skipping structuring.")


# --- TODO: Final Representation & Usage ---
# - The 'structured_schedule' column now holds a list of tuples for each ID_HORARI.
# - Create a function `is_regulated(target_datetime, schedule_id)` that uses this 
#   structured data (and potentially holiday info) to return True/False.
# - Consider how to handle the PARQUING_SOLS_DINS_HORARI flag in that function.
# - Need a reliable source for Barcelona public holidays for the `is_regulated` function.

print("\n--- End of Schedule Structuring ---")


--- Converting Parsed Data to Structured Format ---

--- Sample of Structured Schedules ---

ID_HORARI: 95
Original:  de Dl a Dv  de 08:00 a 20:00 exclusiu resident de 0 a 24
Structured:[({0, 1, 2, 3, 4}, datetime.time(8, 0), datetime.time(20, 0), None, None, False)]

ID_HORARI: 74
Original:  de Dl a Ds  de 08:00 a 11:00
Structured:[({0, 1, 2, 3, 4, 5}, datetime.time(8, 0), datetime.time(11, 0), None, None, False)]

ID_HORARI: 36
Original:  de Dl a Dv de 06:00 a 20:00
Structured:[({0, 1, 2, 3, 4}, datetime.time(6, 0), datetime.time(20, 0), None, None, False)]

ID_HORARI: 83
Original:  de Dl a Dv de 09:00 a 20:00, Ds de 09:00 a 14:00
Structured:[({0, 1, 2, 3, 4}, datetime.time(9, 0), datetime.time(20, 0), None, None, False), ({5}, datetime.time(9, 0), datetime.time(14, 0), None, None, False)]

ID_HORARI: 33
Original:  de Dl a Dv de 07:00 a 20:00, Ds de 07:00 a 14:00
Structured:[({0, 1, 2, 3, 4}, datetime.time(7, 0), datetime.time(20, 0), None, None, False), ({5}, datetime.time(7, 0), d

In [14]:
# --- Create Schedule Checking Function ---

import datetime
import holidays # Needs installation: pip install holidays

print("\n--- Defining Schedule Checking Function ---")

# Initialize Spain/Catalonia holidays
# Use 'ES' for Spain-wide, 'CT' for Catalonia specific region
# You might need subdivision codes for more accuracy, check 'holidays' library docs
# Example: Barcelona province 'ES-B' or Catalonia 'ES-CT'
try:
    # Use Catalonia region for specific holidays
    bcn_holidays = holidays.country_holidays('ES', subdiv='CT') 
    print("Initialized Catalonia public holidays.")
except KeyError:
    print("Warning: Catalonia subdivision 'CT' not found in 'holidays' library version.")
    print("Falling back to Spain ('ES') national holidays.")
    bcn_holidays = holidays.country_holidays('ES')

# Store the structured schedules in a dictionary for faster lookup by ID_HORARI
schedule_dict = df_horaris.set_index('ID_HORARI')['structured_schedule'].to_dict()
# Also store the 'Parking Only' flag for reference
parking_only_dict = df_horaris.set_index('ID_HORARI')['PARQUING_SOLS_DINS_HORARI'].to_dict()

def is_parking_regulated(check_dt, schedule_id):
    """
    Checks if parking is regulated for a given schedule_id at a specific datetime.

    Args:
        check_dt (datetime.datetime): The date and time to check.
        schedule_id (int): The ID_HORARI to check against.

    Returns:
        str: "Regulated", "Unregulated", "Parking Prohibited", "Unknown Schedule"
             Returns "Parking Prohibited" if the PARQUING_SOLS_DINS_HORARI flag is True
             and the time falls outside the specified schedule hours.
    """

    rules = schedule_dict.get(schedule_id)
    if rules is None:
        if schedule_id in df_horaris['ID_HORARI'].values:
             print(f"Warning: Schedule ID {schedule_id} found but has no structured rules (structuring failed?).")
        return "Unknown Schedule"

    target_dow = check_dt.weekday() # Monday is 0, Sunday is 6
    target_time = check_dt.time()
    target_date = check_dt.date()

    is_holiday = target_date in bcn_holidays

    parking_is_regulated = False # Assume unregulated unless a rule matches

    for rule in rules:
        days_set, start1, end1, start2, end2, applies_holidays_flag = rule

        # Check holiday applicability first
        if is_holiday and not applies_holidays_flag:
            continue # This rule doesn't apply on holidays

        # If it IS a holiday and the rule DOES apply, treat it like a matching day
        # Otherwise, check if the target day of the week matches the rule's days
        if not (is_holiday and applies_holidays_flag) and target_dow not in days_set:
             continue # This rule doesn't apply on this day of the week

        # Now check the time
        time_match = False

        # --- Handle time comparisons, including "24:00" ---
        def check_time_in_range(start_time, end_time):
             if start_time is None or end_time is None: return False

             # Default assignments (will be overwritten below if conditions match)
             start_time_obj = start_time 
             end_time_obj = end_time
             is_overnight = False

             if isinstance(end_time, str) and end_time == "24:00":
                 # Rule spans up to midnight
                 end_time_obj = datetime.time(23, 59, 59, 999999) # Inclusive midnight
                 if isinstance(start_time, str) and start_time == "00:00": # Spans full day
                      return True
                 else: # Handles cases like 08:00 to 24:00
                      start_time_obj = start_time # <<< FIX: Assign start_time_obj here >>>
             elif isinstance(start_time, str) and start_time == "00:00":
                 start_time_obj = datetime.time(0, 0)
                 end_time_obj = end_time # Already a time object
             elif isinstance(start_time, datetime.time) and isinstance(end_time, datetime.time) and end_time < start_time:
                 # Overnight range (e.g., 22:00 - 06:00) - less common in this data?
                 is_overnight = True
                 # start_time_obj and end_time_obj already assigned correctly at start
             # else: # Normal case - start_time_obj and end_time_obj already assigned correctly at start
                 # pass 

             # Perform comparison - ensure both are time objects now unless it was full day
             if not isinstance(start_time_obj, datetime.time) or not isinstance(end_time_obj, datetime.time):
                  # This case should ideally not be reached if parsing/logic is correct
                  # Could happen if start was "00:00" and end was "24:00" (handled above)
                  # Or if parsing returned unexpected types
                  print(f"Warning: Unexpected time types for comparison: {start_time_obj}, {end_time_obj}")
                  return False 
                  
             if is_overnight:
                 # Check if target time is >= start OR <= end
                 return target_time >= start_time_obj or target_time < end_time_obj
             else:
                 # Normal check: target time is >= start AND < end
                 return target_time >= start_time_obj and target_time < end_time_obj


        # Check first time slot
        if start1 is not None and check_time_in_range(start1, end1):
            time_match = True

        # Check second time slot if the first didn't match
        if not time_match and start2 is not None and check_time_in_range(start2, end2):
            time_match = True

        # If day and time match, parking is regulated by this rule
        if time_match:
            parking_is_regulated = True
            break # Found a matching rule, no need to check others for this schedule_id

    # Final check: incorporate the PARQUING_SOLS_DINS_HORARI flag
    parking_only_flag = parking_only_dict.get(schedule_id, False) # Default to False if ID missing

    if parking_is_regulated:
        return "Regulated"
    elif parking_only_flag: # If it wasn't regulated, but parking is ONLY allowed during schedule...
        return "Parking Prohibited"
    else: # Wasn't regulated, and parking isn't restricted outside hours
        return "Unregulated"


# --- Example Usage ---
print("\n--- Example Schedule Checks ---")

# Get a few schedule IDs to test, including ones with different flags/rules
test_ids = df_horaris.sample(3)['ID_HORARI'].tolist()
if 3 not in test_ids: test_ids.append(3) # Ensure ID 3 (split schedule) is tested
if 72 not in test_ids: test_ids.append(72) # Ensure ID 72 (includes holidays) is tested
# Find an ID where PARQUING_SOLS_DINS_HORARI is True
# Handle potential empty DataFrame if no True values exist
parking_only_true_rows = df_horaris[df_horaris['PARQUING_SOLS_DINS_HORARI']]
if not parking_only_true_rows.empty:
    parking_only_true_id = parking_only_true_rows.iloc[0]['ID_HORARI']
    if parking_only_true_id not in test_ids: test_ids.append(parking_only_true_id)
else:
    print("Warning: No schedules found with PARQUING_SOLS_DINS_HORARI=True for testing.")


print(f"Testing with Schedule IDs: {test_ids}")

# Example datetimes
dt_monday_10am = datetime.datetime(2024, 5, 13, 10, 0) # Monday 10:00
dt_monday_3pm = datetime.datetime(2024, 5, 13, 15, 0) # Monday 15:00 (afternoon break for some)
dt_saturday_10am = datetime.datetime(2024, 5, 18, 10, 0) # Saturday 10:00
dt_saturday_5pm = datetime.datetime(2024, 5, 18, 17, 0) # Saturday 17:00
dt_sunday_10am = datetime.datetime(2024, 5, 19, 10, 0) # Sunday 10:00
dt_holiday = datetime.datetime(2024, 8, 15, 11, 0) # Assumption Day (Holiday) 11:00
dt_night = datetime.datetime(2024, 5, 13, 23, 0) # Monday 23:00

test_dts = [dt_monday_10am, dt_monday_3pm, dt_saturday_10am, dt_saturday_5pm, dt_sunday_10am, dt_holiday, dt_night]

for dt in test_dts:
    dt_str = dt.strftime('%Y-%m-%d %H:%M (%A)')
    is_hol = dt.date() in bcn_holidays # CORRECTED LINE
    print(f"\nChecking for: {dt_str}" + (" (Holiday)" if is_hol else ""))
    for sid in test_ids:
        status = is_parking_regulated(dt, sid)
        print(f"  ID {sid:<8}: {status}")

print("\n--- End of Schedule Checking Function Definition and Examples ---")



--- Defining Schedule Checking Function ---
Initialized Catalonia public holidays.

--- Example Schedule Checks ---
Testing with Schedule IDs: [62, 92, 109, 3, 72, np.int64(27)]

Checking for: 2024-05-13 10:00 (Monday)
  ID 62      : Regulated
  ID 92      : Regulated
  ID 109     : Regulated
  ID 3       : Regulated
  ID 72      : Regulated
  ID 27      : Regulated

Checking for: 2024-05-13 15:00 (Monday)
  ID 62      : Regulated
  ID 92      : Regulated
  ID 109     : Regulated
  ID 3       : Unregulated
  ID 72      : Regulated
  ID 27      : Regulated

Checking for: 2024-05-18 10:00 (Saturday)
  ID 62      : Parking Prohibited
  ID 92      : Parking Prohibited
  ID 109     : Parking Prohibited
  ID 3       : Regulated
  ID 72      : Regulated
  ID 27      : Regulated

Checking for: 2024-05-18 17:00 (Saturday)
  ID 62      : Parking Prohibited
  ID 92      : Parking Prohibited
  ID 109     : Parking Prohibited
  ID 3       : Unregulated
  ID 72      : Regulated
  ID 27      : Parki