In [1]:
import networkx as nx
import csv
import time
from collections import defaultdict
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
class Service:
    def __init__(
        self,
        service_id, 
        rake_num,
        start_station,
        start_time,
        end_station,
        end_time,
        direction,
        service_time=0,
        same_jurisdiction=None,
        step_back_rake=None,
        step_back_location=None,
        merged_rake_num1=None,
        merged_rake_num2=None
    ):
        self.service_id = str(service_id)
        self.rake_num = rake_num
        self.start_station = start_station
        self.start_time = start_time
        self.end_station = end_station
        self.end_time = end_time
        self.direction = direction
        self.service_time = int(service_time) if service_time else 0
        self.same_jurisdiction = same_jurisdiction
        self.step_back_rake = step_back_rake
        self.step_back_location = step_back_location
        self.merged_rake_num1 = merged_rake_num1
        self.merged_rake_num2 = merged_rake_num2


def load_services(csv_file):
    df = pd.read_csv(csv_file)
    services = []
    for _, row in df.iterrows():
        s = Service(
            service_id=row.get("Service"),   
            rake_num=row.get("Rake Num"),
            start_station=get_base_station_name(row.get("Start Station")),
            start_time=parse_time_to_minutes(row.get("Start Time")),
            end_station=get_base_station_name(row.get("End Station")),
            end_time=parse_time_to_minutes(row.get("End Time")),
            direction=row.get("Direction"),
            service_time=int(row["service time"]) if "service time" in row and pd.notna(row["service time"]) else 0,
            same_jurisdiction=row["Same Jurisdiction"] if "Same Jurisdiction" in row else None,
            step_back_rake=row["Step Back Rake"] if "Step Back Rake" in row else None,
            step_back_location=row["Step Back Location"] if "Step Back Location" in row else None,
            merged_rake_num1=row["mergedRakeNum1"] if "mergedRakeNum1" in row else None,
            merged_rake_num2=row["mergedRakeNum2"] if "mergedRakeNum2" in row else None,
        )
        services.append(s)
    return services


#----------------------------------# Adding Dummy Nodes
# Dummy start node
start_service = Service(
    service_id="S",
    rake_num=None,
    start_station=None,
    start_time=None,
    end_station=None,
    end_time=None,
    direction=None,
    service_time=0,
    same_jurisdiction=None,
    step_back_rake=None,
    step_back_location=None,
    merged_rake_num1=None,
    merged_rake_num2=None
)

# Dummy end node
end_service = Service(
    service_id="T",
    rake_num=None,
    start_station=None,
    start_time=None,
    end_station=None,
    end_time=None,
    direction=None,
    service_time=0,
    same_jurisdiction=None,
    step_back_rake=None,
    step_back_location=None,
    merged_rake_num1=None,
    merged_rake_num2=None
)


## Input Parameters

In [3]:
# === Configuration ===
#TIMETABLE_FILE = "C:/Users/srupe/Downloads/crew/mainLoop_aadesh.csv"
MIN_RAKE_GAP_MINUTES = 30   # Minimum required gap between different rakes
ALLOWED_RAKE_CHANGE_STATIONS = {"KKDA", "PVGW"}  # Allowed rake-change stations
MAX_CONNECTION_GAP_MINUTES = 120   # Maximum allowed gap between services

BREAK_STATIONS = {"KKDA", "PVGW"}  # Breaks allowed only here
CUMULATIVE_BREAKS_DURATION = 120    # cumulative breaks should be less than CUMULATIVE_BREAKS_DURATIONS min
SHORT_BREAK = 30     # short break duration
LONG_BREAK =  50     # Long break duration
DUTY_TIME_LIMIT = 445 # Noraml duty duration
MORN_EVEN_DUTY_TIME_LIMIT = 405  # morning and evening duty time duration
MORNING_SHIFT_CUTOFF = 360     # 360 -> 6:00 AM
EVENING_SHIFT_CUTOFF = 1410     # 1410 -> 23:30 PM
CONTINUOUS_DRIVE_LIMIT = 180   # continuous driving without a break greater than 30 mins in it.
DRIVING_TIME_LIMIT =360        # Actual driving time(Mins) it have gaps< SHORT_BREAKS also counted in it


# === Jurisdiction Buckets ===
jurisdiction_dict = {
    1: {'MKPR','MKPR UP','MKPR DN','SAKP','DDSC','DDSC DN PF','DDSC SDG','DDSC SDG STABLE (DAY)','DDSC DN',
        'DDSC SDG','PVGW','PVGW UP','PVGW DN','MKPD','MKPD','SAKP 3RD','SAKP 3RD PF','MKPR DN SDG','MKPR DN PF','DDSC DN SDG'},
    2: {'MUPR DN SDG STABLE (DAY)','MUPR 4TH SDG STABLE (DAY)','MUPR 3RD SDG STABLE','SVVR','SVVR DN','MUPR',
        'MUPR DN','MUPR 4TH','MUPR 3RD SDG','KKDA DN','KKDA UP','IPE','IPE 3RD PF','IPE 3RD','VND','VND (M)',
        'MVPO','MVPO DN','NZM','NIZM','KKDA','MUPR DN SDG','MVPO DN PF','SVVR DN PF','MUPR 3RD SDG','MUPR 4TH PF',
        'MUPR 4TH SDG','MUPR DN PF','MUPR DN SDG','MUPR DN SDG'}
}

In [4]:
# === Utility Functions ===
def parse_time_to_minutes(t: str):
    """
    Converts a time string 'HH:MM' or 'HH:MM:SS' into minutes since midnight.
    Supports hours >= 24 (e.g., 24:05 → 1445 minutes, 25:07 → 1507 minutes)
    """
    try:
        t = t.strip()
        # Split HH:MM or HH:MM:SS
        parts = t.split(":")
        hours = int(parts[0])
        minutes = int(parts[1])
        return hours * 60 + minutes
    except:
        return None


def get_base_station_name(station: str):
    """Extracts the base station name (first word) in uppercase."""
    if not station:
        return None
    return station.strip().split()[0].upper()


def rake_feasible_connection(s1, s2):
    """
    Checks if a connection between two services is feasible.
    - Must be same base station.
    - 0 <= (start - end) <= MAX_CONNECTION_GAP_MINUTES
    - If rake is same: allow directly.
    - If rake is different: allowed only at ALLOWED_RAKE_CHANGE_STATIONS 
      and gap >= MIN_RAKE_GAP_MINUTES.
    """

    # Skip if stations or times are missing
    if not s1.end_station or not s2.start_station or s1.end_time is None or s2.start_time is None:
        return False
    
    # Handle dummy start/end services
    if not s1.end_station or not s2.start_station:
        return False

    if s1.end_station != s2.start_station:
        return False

    gap = s2.start_time - s1.end_time

    # Must be non-negative and within max connection window
    if gap < 0 or gap > MAX_CONNECTION_GAP_MINUTES:
        return False

    if s1.rake_num == s2.rake_num:
        # Same rake: gap condition is already checked
        return True
    else:
        # Rake change allowed only at certain stations with min gap
        return (s1.end_station in ALLOWED_RAKE_CHANGE_STATIONS) and (gap >= MIN_RAKE_GAP_MINUTES)


## Graph building

In [5]:
def build_graph(services_list, start_service, end_service):
    """
    Build a directed graph from Service objects.
    All nodes are Service objects (including start and end dummies).
    """

    services_list = [s for s in services_list if s.start_time is not None and s.end_time is not None]

    G = nx.DiGraph()

    # Add nodes (store Service object directly as key)
    for s in services_list:
        G.add_node(s.service_id, data=s)

    # Add start/end dummy nodes
    G.add_node(start_service.service_id, data=start_service)
    G.add_node(end_service.service_id, data=end_service)

    # Connect source to all services
    for s in services_list:
        G.add_edge(start_service.service_id, s.service_id, color="black")

    # Feasible service_id-to-service_id connections
    for i, s1 in enumerate(services_list):
        for j, s2 in enumerate(services_list):
            if i == j:
                continue
            if rake_feasible_connection(s1, s2):
                #edge_color = "red" if s1.rake_num != s2.rake_num else "gray"
                G.add_edge(s1.service_id, s2.service_id)

    # Connect all services to sink
    for s in services_list:
        G.add_edge(s.service_id, end_service.service_id, color="black")

    return G


## Checking Current path

In [6]:
def is_path_acceptable(
    path, end_service,
    total_driving_time, DRIVING_TIME_LIMIT,
    DUTY_TIME_LIMIT
):
    """
    Checks whether a partial path is acceptable:
    - Driving time limit
    - Duty time limit
    - Continuous driving CONTINUOUS_DRIVE_LIMIT-min rule
    """

    
    # -----------------------------
    # Condition 1: Driving time limit
    # -----------------------------
    if total_driving_time > DRIVING_TIME_LIMIT:
        return False

    # -----------------------------
    # Condition 2: Duty time limit
    # -----------------------------
    first_service_start_time = path[1].start_time  # skip dummy SOURCE

    if path[-1] == end_service:
        last_service = path[-2]  # path ended with sink dummy
    else:
        last_service = path[-1]

    last_service_end_time = last_service.end_time
    total_duty_time = last_service_end_time - first_service_start_time

    # Morning/evening tighter limit
    if first_service_start_time < MORNING_SHIFT_CUTOFF or first_service_start_time > EVENING_SHIFT_CUTOFF:
        effective_duty_limit = MORN_EVEN_DUTY_TIME_LIMIT
    else:
        effective_duty_limit = DUTY_TIME_LIMIT      # usually 445

    if total_duty_time > effective_duty_limit:
        return False

    # -----------------------------
    # Condition 3: Continuous driving CONTINUOUS_DRIVE_LIMIT rule
    # -----------------------------
    continuous_drive = 0
    # skip dummy SOURCE and SINK
    end_index = len(path) - 1 if path[-1] == end_service else len(path)


    for i in range(1, end_index):
        current_service = path[i]
        service_time = current_service.service_time or 0
        continuous_drive += service_time
    
        # Add gap to next service if it exists
        if i < end_index - 1:
            next_service = path[i + 1]
            gap_btw_service = next_service.start_time - current_service.end_time
    
            if gap_btw_service < SHORT_BREAK:
                continuous_drive += gap_btw_service
            elif gap_btw_service >= SHORT_BREAK:
                continuous_drive = 0
    
        # Rule 1: cannot exceed continuous drive limit
        if continuous_drive > CONTINUOUS_DRIVE_LIMIT:
            return False

    # -----------------------------
    # All conditions passed
    # -----------------------------
    return True


## Checking Final Path

In [7]:
from functools import lru_cache

@lru_cache(maxsize=None)
def get_jurisdiction_groups(station):
    """
    Return the set of jurisdiction groups a station belongs to.

    CHANGE 1: Add caching (lru_cache) so repeated calls with the same station
    are very cheap. We return a frozenset (immutable) so the cached object is
    stable and safe to use with set operations like isdisjoint.
    """
    # build frozenset from global jurisdiction_dict 
    return frozenset(
        Jurisdiction_group_id
        for Jurisdiction_group_id, stations in jurisdiction_dict.items()
        if station in stations
    )


def is_final_path_valid(path):
    """
    Check if a completed path ending at end_service is valid based on:
    1. Jurisdiction overlap between first and last duty.
    2. Required break conditions.
    """

    # === Jurisdiction check ===
    start_station_first_duty = path[1].start_station
    end_station_last_duty = path[-2].end_station

    start_groups = get_jurisdiction_groups(start_station_first_duty)
    end_groups = get_jurisdiction_groups(end_station_last_duty)

    # Short-circuit early: if no overlap in jurisdiction groups => invalid path.
    # This avoids running the more expensive break logic when jurisdiction fails.
    if start_groups.isdisjoint(end_groups):
        return False

    # Pass computed start_groups into has_required_breaks to
    # avoid recomputing the same group inside that function.
    if not has_required_breaks(path, start_jurisdictions=start_groups):
        return False

    return True


def has_required_breaks(path, start_jurisdictions=None):
    """
    Check break conditions:
    Case 1: Only one break >=50min (and total < CUMULATIVE_BREAKS_DURATION).
    Case 2: Multiple breaks:
        - If there a 30min break, at least one >=50min break must also exist.
        - Two or more >=50min breaks are allowed.
        - In all cases, total < CUMULATIVE_BREAKS_DURATION.
    Additional:
    - At least one break must lie within the jurisdiction of the first duty's start station.

    CHANGE 2: Accept `start_jurisdictions` as an optional argument. If provided,
    we reuse it instead of calling get_jurisdiction_groups(path[1].start_station).
    """

    if len(path) < 6:
        return False  # skip very short paths

    # Use passed-in start_jurisdictions if available (avoids recomputation).
    if start_jurisdictions is None:
        start_station_first_duty = path[1].start_station
        start_jurisdictions = get_jurisdiction_groups(start_station_first_duty)

    # Extract times
    start_times = [s.start_time for s in path[1:-1]]
    end_times = [s.end_time for s in path[1:-1]]

    gaps = [
        start_times[i + 1] - end_times[i]
        for i in range(len(start_times) - 1)
    ]

    breaks = []
    has_break_in_same_jurisdiction = False

    for i, break_gap in enumerate(gaps):
        station = path[1 + i].end_station  # station where break occurs

        # Only consider allowed break stations and minimal break_gap
        if station in BREAK_STATIONS and break_gap >= SHORT_BREAK:
            breaks.append(break_gap)

            break_jurisdictions = get_jurisdiction_groups(station)
            if not start_jurisdictions.isdisjoint(break_jurisdictions):
                has_break_in_same_jurisdiction = True

    if not breaks:
        return False

    # Ensure at least one break is in same jurisdiction
    if not has_break_in_same_jurisdiction:
        return False

    total_break = sum(breaks)
    if total_break > CUMULATIVE_BREAKS_DURATION:                       # changed here
        return False

    has_SHORT_BREAK = any(SHORT_BREAK <= b < LONG_BREAK for b in breaks)
    has_LONG_BREAK = any(b >= LONG_BREAK for b in breaks)

    # Case 1: single break
    if len(breaks) == 1:
        return has_LONG_BREAK

    # Case 2: multiple breaks
    if has_SHORT_BREAK and not has_LONG_BREAK:
        return False  # shorts without a long break not allowed

    # Valid if (short + long) OR (multiple longs)
    return True


## Stack based DFS and saving feasible duties to CSV

In [None]:
def stack_based_all_paths(
    G, start_service, end_service,
    DRIVING_TIME_LIMIT, DUTY_TIME_LIMIT,
    max_paths=int,
    output_csv="valid_paths_using_Network_1010_1.csv"
):
    """
    Enumerates all feasible paths using stack-based DFS.
    Saves valid paths (formatted) into a CSV file.
    """
    stack = [(start_service, [start_service], 0)]  # node, path_so_far, driving time
    #n = 0

    with open(output_csv, mode='w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        while stack:
            # if n >= max_paths:
            #     break

            current_service, path, total_driving_time = stack.pop()

            for neighbor_id in G.successors(current_service.service_id):
                neighbor = G.nodes[neighbor_id]["data"]
                service_time = neighbor.service_time or 0

                # Calculate gap 
                last_service = path[-1]
                if neighbor.start_time is not None and last_service.end_time is not None:
                    gap = neighbor.start_time - last_service.end_time
                else:
                    gap = 0

                new_total_driving_time = total_driving_time + service_time
                if gap < SHORT_BREAK:                                             # changed here <=
                    new_total_driving_time += gap

                new_path = path + [neighbor]

                if is_path_acceptable(
                    new_path, end_service,
                    new_total_driving_time, DRIVING_TIME_LIMIT,
                    DUTY_TIME_LIMIT
                ):
                    if neighbor.service_id == end_service.service_id:
                        if is_final_path_valid(new_path):
                            #n += 1

                            # Extract middle services (exclude S and T)
                            middle_services = [s.service_id for s in new_path[1:-1]]

                            # Write as separate columns (no quotes)
                            writer.writerow(middle_services)

                            #print(",".join(map(str, middle_services)))

                    else:
                        stack.append((neighbor, new_path, new_total_driving_time))

    print(f"Total valid paths found: {n}")
    return n


## Main Call

In [9]:
# Start total timer
total_start_time = time.time()

# ================================
#  Load CSV services
# ================================
start_time = time.time()
TIMETABLE_FILE = "C:/Users/srupe/Desktop/MTP/crew/mainLoop_aadesh.csv"
services = load_services(TIMETABLE_FILE)
end_time = time.time()
print(f"Loaded {len(services)} services in {end_time - start_time:.3f} seconds")

# ================================
# Build graph with Service objects
# ================================
start_time = time.time()
G = build_graph(services, start_service, end_service)
end_time = time.time()
print(f"Built graph with {len(G.nodes)} nodes and {len(G.edges)} edges in {end_time - start_time:.3f} seconds")

# ================================
# Call stack-based path finder
# ================================
start_time = time.time()
valid_paths_count = stack_based_all_paths(
    G,
    start_service=start_service,
    end_service=end_service,
    DRIVING_TIME_LIMIT=360,
    DUTY_TIME_LIMIT=445,
    max_paths=10000
)
end_time = time.time()
print(f"Found {valid_paths_count} valid paths in {end_time - start_time:.3f} seconds")

# ================================
# Total time
# ================================
total_end_time = time.time()
print(f"[Total Time] {total_end_time - total_start_time:.3f} seconds")


Loaded 944 services in 0.193 seconds
Built graph with 946 nodes and 23555 edges in 0.498 seconds
Total valid paths found: 10000
Found 10000 valid paths in 0.625 seconds
[Total Time] 1.316 seconds
