In [2]:
import pandas as pd
import numpy as np
from collections import deque
from datetime import datetime, timedelta

def main():
    print("\n=== Train Route Finder ===")
    print("Finds connecting routes between any two stations with distance, time, and cost calculations\n")

    # Load the data
    try:
        df = pd.read_csv('Train_details.csv')

        # Keep only trains with 5-digit train numbers
        df = df[df['Train No'].astype(str).str.len() == 5]

        # Add random Seat Availability (0 or 1)
        df['Seat Availability'] = np.random.choice([0, 1], size=len(df),p=[0,1])

        print("Successfully loaded train schedule data.")
    except FileNotFoundError:
        print("Error: Could not find 'Train_details.csv'. Please ensure the file is in the same directory.")
        return
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    # Get user input
    while True:
        print("\nEnter station codes (3-4 letter codes like PGT, KOTA, etc.)")
        source = input("Enter origin station code: ").strip().upper()
        destination = input("Enter destination station code: ").strip().upper()

        if source not in df['Station Code'].values:
            print(f"Error: Station code '{source}' not found in database.")
            continue
        if destination not in df['Station Code'].values:
            print(f"Error: Station code '{destination}' not found in database.")
            continue
        break

    while True:
        try:
            max_connections = int(input("Enter maximum number of connections (1-4 recommended): "))
            if 0 <= max_connections <= 4:
                break
            print("Please enter a number between 0 and 4")
        except ValueError:
            print("Please enter a valid number")

    output_file = f"{source}_to_{destination}_routes.csv"
    routes_df = find_and_save_routes(df, source, destination, max_connections, output_file)

    if routes_df is not None:
        print("\nAnalysis of found routes:")
        analyze_routes(routes_df)
        print(f"\nDetailed routes saved to: {output_file}")

def find_and_save_routes(df, source, destination, max_connections=3, output_file='connecting_routes.csv'):
    graph = {}
    train_info = {}

    for _, row in df.iterrows():
        train_no = row['Train No']
        station = row['Station Code']
        dest_station = row['Destination Station']
        distance = row['Distance']

        if station not in graph:
            graph[station] = {}
        if train_no not in graph[station]:
            graph[station][train_no] = dest_station

        if train_no not in train_info:
            train_info[train_no] = {
                'name': row['Train Name'],
                'seq': {},
                'source': row['Source Station'],
                'dest': row['Destination Station'],
                'seat_availability': row['Seat Availability']
            }

        train_info[train_no]['seq'][row['SEQ']] = {
            'station': station,
            'arrival': row['Arrival time'],
            'departure': row['Departure Time'],
            'distance': distance
        }

    def calculate_time_diff(start_time, end_time):
        try:
            fmt = '%H:%M:%S'
            t1 = datetime.strptime(start_time, fmt)
            t2 = datetime.strptime(end_time, fmt)
            if t2 < t1:
                t2 += timedelta(days=1)
            return (t2 - t1).total_seconds() / 3600
        except:
            return 0

    def format_time(hours):
        """Formats time in hours to HHh MMm format."""
        hr = int(hours)
        mins = int((hours % 1) * 60)
        return f"{hr}h {mins}m"

    # Logic to find nearby stations with direct trains to destination within 100km range of palakad
    def find_nearby_stations_with_direct_trains(source, destination, max_distance=100):
        nearby_stations = set()

        # Get trains originating from palakad
        source_trains = df[df['Source Station'] == source]['Train No'].unique()

        # Iterate through each train originating from palakad
        for train in source_trains:
            # Get the sequence of stations for this train
            train_data = df[df['Train No'] == train].sort_values('Distance')

            # If palakad is in the sequence, find nearby stations
            if source in train_data['Station Code'].values:
                # Convert 'Distance' column to numeric, handling errors
                train_data['Distance'] = pd.to_numeric(train_data['Distance'], errors='coerce')
                source_distance = train_data[train_data['Station Code'] == source]['Distance'].values[0]

                # Find nearby stations within the max distance
                near_stations = train_data[abs(train_data['Distance'] - source_distance) <= max_distance]

                # Check if nearby stations have direct trains to the destination
                for station in near_stations['Station Code'].values:
                    if station != source and destination in df[df['Station Code'] == station]['Destination Station'].values:
                        nearby_stations.add(station)
        print(nearby_stations)

        return nearby_stations


    def bfs(start, end, max_depth):
        queue = deque()
        queue.append((start, [], 0))
        visited = set()
        routes = []

        while queue:
            current, path, depth = queue.popleft()
            if current == end:
                routes.append(path)
                continue
            if depth >= max_depth:
                continue
            if current in visited:
                continue
            visited.add(current)
            if current not in graph:
                continue
            for train_no, next_station in graph[current].items():
                seq_info = train_info[train_no]['seq']
                stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
                try:
                    current_idx = stations_in_order.index(current)
                    next_idx = stations_in_order.index(next_station)
                    if next_idx > current_idx:
                        new_path = path + [(train_no, current, next_station)]
                        queue.append((next_station, new_path, depth + 1))
                except ValueError:
                    continue
        return routes

    all_routes = []
    for connections in range(1, max_connections + 1):
        routes = bfs(source, destination, connections)
        all_routes.extend(routes)

    csv_data = []
    for route_num, route in enumerate(all_routes, 1):
        total_trains = len(route)
        total_distance = 0
        total_duration_hours = 0
        route_segments = []

        for segment in route:
            train_no, from_station, to_station = segment
            train_name = train_info[train_no]['name']
            seat_available = train_info[train_no]['seat_availability']

            seq_info = train_info[train_no]['seq']
            for seq, info in seq_info.items():
                if info['station'] == from_station:
                    departure = info['departure']
                    from_distance = float(info['distance'])
                if info['station'] == to_station:
                    arrival = info['arrival']
                    to_distance = float(info['distance'])

            segment_distance = abs(to_distance - from_distance)
            total_distance += segment_distance

             # Calculate segment duration based on distance
            if segment_distance > 1800:
                segment_duration = segment_distance / 58  # Superfast long-distance
            elif segment_distance > 1000:
                segment_duration = segment_distance / 60
            elif segment_distance > 500:
                segment_duration = segment_distance / 55
            elif segment_distance > 300:
                segment_duration = segment_distance / 50
            elif segment_distance > 150:
                segment_duration = segment_distance / 45
            else:
                segment_duration = segment_distance / 38  # Local or short distance
            wait_time = 0
            total_duration_hours += segment_duration
            if route.index(segment) > 0:
                prev_arrival = route_segments[-1]['Arrival Time']
                wait_time = calculate_time_diff(prev_arrival, departure)
                if wait_time < 0:
                    wait_time += 24
                total_duration_hours += wait_time

            route_segments.append({
                'Train Number': train_no,
                'Train Name': train_name,
                'From Station': from_station,
                'Departure Time': departure,
                'To Station': to_station,
                'Arrival Time': arrival,
                'Segment Distance (km)': segment_distance,
                'Segment Duration (hrs)': format_time(segment_duration),
                'Seat Availability': seat_available,
                'Waiting Time Before Segment (hrs)': format_time(wait_time)


            })

        total_cost = total_distance * 1
        hours = int(total_duration_hours)
        minutes = int((total_duration_hours % 1) * 60)
        total_duration = f"{hours}h {minutes}m"

        for segment_num, segment in enumerate(route_segments, 1):
            csv_data.append({
                'Route Number': route_num,
                'Total Trains in Route': total_trains,
                'Segment Number': segment_num,
                'Train Number': segment['Train Number'],
                'Train Name': segment['Train Name'],
                'From Station': segment['From Station'],
                'Departure Time': segment['Departure Time'],
                'To Station': segment['To Station'],
                'Arrival Time': segment['Arrival Time'],
                'Segment Distance (km)': segment['Segment Distance (km)'],
                'Segment Duration (hrs)': segment['Segment Duration (hrs)'],
                'Seat Availability': segment['Seat Availability'],
                'Waiting Time Before Segment (hrs)': segment['Waiting Time Before Segment (hrs)'],
                'Total Route Distance (km)': total_distance,
                'Total Route Duration': total_duration,
                'Estimated Cost (₹)': total_cost
            })

    if csv_data:
        routes_df = pd.DataFrame(csv_data)
        columns = [
            'Route Number', 'Total Trains in Route', 'Segment Number',
            'Train Number', 'Train Name', 'From Station', 'Departure Time',
            'To Station', 'Arrival Time', 'Segment Distance (km)',
            'Segment Duration (hrs)', 'Seat Availability', 'Waiting Time Before Segment (hrs)',
            'Total Route Distance (km)', 'Total Route Duration', 'Estimated Cost (₹)'
        ]
        routes_df = routes_df[columns]
        routes_df.to_csv(output_file, index=False)
        print(f"\nSuccessfully saved {len(all_routes)} routes to {output_file}")
        return routes_df
    else:
        print("\nNo valid routes found with the given parameters.")
        return None

def analyze_routes(routes_df):
    unique_routes = routes_df['Route Number'].unique()
    print(f"\nFound {len(unique_routes)} unique routes:")

    for route_num in unique_routes:
        route_data = routes_df[routes_df['Route Number'] == route_num]
        first_row = route_data.iloc[0]
        print(f"\nRoute {route_num}:")
        print(f"- Trains required: {first_row['Total Trains in Route']}")
        print(f"- Total distance: {first_row['Total Route Distance (km)']} km")
        print(f"- Total duration: {first_row['Total Route Duration']}")
        print(f"- Estimated cost: ₹{first_row['Estimated Cost (₹)']}")

        print("  Segments:")
        for _, segment in route_data.iterrows():
            print(f"  {segment['Segment Number']}. {segment['Train Name']} ({segment['Train Number']})")
            print(f"     {segment['From Station']} ({segment['Departure Time']}) → {segment['To Station']} ({segment['Arrival Time']})")
            print(f"     Distance: {segment['Segment Distance (km)']} km | Duration: {segment['Segment Duration (hrs)']} hrs | Seat Available: {segment['Seat Availability']}")

    routes_summary = routes_df.drop_duplicates('Route Number')

    if len(routes_summary) > 1:
        fastest = routes_summary.loc[routes_summary['Total Route Duration'].idxmin()]
        shortest = routes_summary.loc[routes_summary['Total Route Distance (km)'].idxmin()]
        cheapest = routes_summary.loc[routes_summary['Estimated Cost (₹)'].idxmin()]

        print("\nRecommendations:")
        print(f"- Fastest route: Route {fastest['Route Number']} ({fastest['Total Route Duration']})")
        print(f"- Shortest route: Route {shortest['Route Number']} ({shortest['Total Route Distance (km)']} km)")
        print(f"- Cheapest route: Route {cheapest['Route Number']} (₹{cheapest['Estimated Cost (₹)']})")

if __name__ == "__main__":
    main()


=== Train Route Finder ===
Finds connecting routes between any two stations with distance, time, and cost calculations



  df = pd.read_csv('Train_details.csv')


Successfully loaded train schedule data.

Enter station codes (3-4 letter codes like PGT, KOTA, etc.)

Successfully saved 153 routes to PGT_to_KOTA_routes.csv

Analysis of found routes:

Found 153 unique routes:

Route 1:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 55h 12m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45h 34m hrs | Seat Available: 1
  2. SVDK-KOTA WE (19804)
     NDLS (14:15:00) → KOTA (23:50:00)
     Distance: 457.0 km | Duration: 9h 8m hrs | Seat Available: 1

Route 2:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 68h 37m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45h 34m hrs | Seat Available: 1
  2. UHP-KOTA WEE (19806)
     NDLS (03:40:00) → KOTA (11:10:00)
     Distance: 457.0 km | Duration: 9h 8m hrs | Seat Available: 1

Route 3:
- Tr

In [3]:
import pandas as pd
import numpy as np
from collections import deque
from datetime import datetime, timedelta

def main():
    print("\n=== Train Route Finder ===")
    print("Finds connecting routes between any two stations with distance, time, and cost calculations\n")

    # Load the data
    try:
        df = pd.read_csv('Train_details.csv')

        # Keep only trains with 5-digit train numbers
        df = df[df['Train No'].astype(str).str.len() == 5]

        # Add random Seat Availability (0 or 1)
        df['Seat Availability'] = np.random.choice([0, 1], size=len(df),p=[0.2,0.8])

        print("Successfully loaded train schedule data.")
    except FileNotFoundError:
        print("Error: Could not find 'Train_details.csv'. Please ensure the file is in the same directory.")
        return
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    # Get user input
    while True:
        print("\nEnter station codes (3-4 letter codes like PGT, KOTA, etc.)")
        source = input("Enter origin station code: ").strip().upper()
        destination = input("Enter destination station code: ").strip().upper()

        if source not in df['Station Code'].values:
            print(f"Error: Station code '{source}' not found in database.")
            continue
        if destination not in df['Station Code'].values:
            print(f"Error: Station code '{destination}' not found in database.")
            continue
        break

    while True:
        try:
            max_connections = int(input("Enter maximum number of connections (1-4 recommended): "))
            if 0 <= max_connections <= 4:
                break
            print("Please enter a number between 0 and 4")
        except ValueError:
            print("Please enter a valid number")

    output_file = f"{source}_to_{destination}_routes.csv"
    routes_df = find_and_save_routes(df, source, destination, max_connections, output_file)

    if routes_df is not None:
        print("\nAnalysis of found routes:")
        analyze_routes(routes_df)
        print(f"\nDetailed routes saved to: {output_file}")
        print("\n=== Nearby 2-hour junctions to catch train to destination ===")
        suggest_nearby_junctions(df, source, destination)

# def suggest_nearby_junctions(df, source, destination):  # Define the function at the top level
#     # Get all station codes that appear in the data
#     station_codes = df['Station Code'].unique()

#     # Find distances from source to all other stations (ignoring direct train requirement)
#     source_rows = df[df['Station Code'] == source]
#     nearby_station_codes = set()

#     for train in source_rows['Train No'].unique():
#           train_data = df[df['Train No'] == train].sort_values('Distance')
#           if source in train_data['Station Code'].values:
#               # Convert 'Distance' column to numeric, handling errors
#               train_data['Distance'] = pd.to_numeric(train_data['Distance'], errors='coerce')
#               source_distance = train_data[train_data['Station Code'] == source]['Distance'].values[0]
#               train_data['DistanceFromSource'] = abs(train_data['Distance'] - source_distance)
#               near_stations = train_data[train_data['DistanceFromSource'] <= 150]
#               nearby_station_codes.update(near_stations['Station Code'].values)
#     # Now check if these nearby stations have a direct train to the destination
#     potential_junctions = []
#     for station in nearby_station_codes:
#         direct = df[(df['Station Code'] == station) & (df['Destination Station'] == destination)]
#         if not direct.empty:
#             potential_junctions.append((station, round(direct['Distance'].values[0], 2)))

#     if potential_junctions:
#         print("\nStations within ~2 hours (~150 km) that have direct train to destination:")
#         for junc, dist in potential_junctions:
#             print(f"- {junc} ({dist} km approx. from {source})")
#     else:
#         print("No nearby junctions found with direct train to destination within 2 hours travel.")

def find_and_save_routes(df, source, destination, max_connections=3, output_file='connecting_routes.csv'):
    graph = {}
    train_info = {}

    for _, row in df.iterrows():
        train_no = row['Train No']
        station = row['Station Code']
        dest_station = row['Destination Station']
        distance = row['Distance']

        if station not in graph:
            graph[station] = {}
        if train_no not in graph[station]:
            graph[station][train_no] = dest_station

        if train_no not in train_info:
            train_info[train_no] = {
                'name': row['Train Name'],
                'seq': {},
                'source': row['Source Station'],
                'dest': row['Destination Station'],
                'seat_availability': row['Seat Availability']
            }

        train_info[train_no]['seq'][row['SEQ']] = {
            'station': station,
            'arrival': row['Arrival time'],
            'departure': row['Departure Time'],
            'distance': distance
        }

    def calculate_time_diff(start_time, end_time):
        try:
            fmt = '%H:%M:%S'
            t1 = datetime.strptime(start_time, fmt)
            t2 = datetime.strptime(end_time, fmt)
            if t2 < t1:
                t2 += timedelta(days=1)
            return (t2 - t1).total_seconds() / 3600
        except:
            return 0

    def format_wait_time(hours):
      hr = int(hours)
      mins = int((hours % 1) * 60)
      return f"{hr}h {mins}m"

    def suggest_nearby_junctions(df, source, destination):
      # Get all station codes that appear in the data
      station_codes = df['Station Code'].unique()

      # Find distances from source to all other stations (ignoring direct train requirement)
      source_rows = df[df['Station Code'] == source]
      nearby_station_codes = set()

      for train in source_rows['Train No'].unique():
          train_data = df[df['Train No'] == train].sort_values('Distance')
          if source in train_data['Station Code'].values:
              source_distance = train_data[train_data['Station Code'] == source]['Distance'].values[0]
              train_data['DistanceFromSource'] = abs(train_data['Distance'] - source_distance)
              near_stations = train_data[train_data['DistanceFromSource'] <= 150]
              nearby_station_codes.update(near_stations['Station Code'].values)

      # Now check if these nearby stations have a direct train to the destination
      potential_junctions = []
      for station in nearby_station_codes:
          direct = df[(df['Station Code'] == station) & (df['Destination Station'] == destination)]
          if not direct.empty:
              potential_junctions.append((station, round(direct['Distance'].values[0], 2)))

      if potential_junctions:
          print("\nStations within ~2 hours (~150 km) that have direct train to destination:")
          for junc, dist in potential_junctions:
              print(f"- {junc} ({dist} km approx. from {source})")
      else:
          print("No nearby junctions found with direct train to destination within 2 hours travel.")


    def bfs(start, end, max_depth):
        queue = deque()
        queue.append((start, [], 0))
        visited = set()
        routes = []

        while queue:
            current, path, depth = queue.popleft()
            if current == end:
                routes.append(path)
                continue
            if depth >= max_depth:
                continue
            if current in visited:
                continue
            visited.add(current)
            if current not in graph:
                continue
            for train_no, next_station in graph[current].items():
                seq_info = train_info[train_no]['seq']
                stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
                try:
                    current_idx = stations_in_order.index(current)
                    next_idx = stations_in_order.index(next_station)
                    if next_idx > current_idx:
                        new_path = path + [(train_no, current, next_station)]
                        queue.append((next_station, new_path, depth + 1))
                except ValueError:
                    continue
        return routes

    all_routes = []
    for connections in range(1, max_connections + 1):
        routes = bfs(source, destination, connections)
        all_routes.extend(routes)

    csv_data = []
    for route_num, route in enumerate(all_routes, 1):
        total_trains = len(route)
        total_distance = 0
        total_duration_hours = 0
        route_segments = []

        for segment in route:
            train_no, from_station, to_station = segment
            train_name = train_info[train_no]['name']
            seat_available = train_info[train_no]['seat_availability']

            seq_info = train_info[train_no]['seq']
            for seq, info in seq_info.items():
                if info['station'] == from_station:
                    departure = info['departure']
                    from_distance = float(info['distance'])
                if info['station'] == to_station:
                    arrival = info['arrival']
                    to_distance = float(info['distance'])

            segment_distance = abs(to_distance - from_distance)
            total_distance += segment_distance

             # Calculate segment duration based on distance
            if segment_distance > 1800:
                segment_duration = segment_distance / 58  # Superfast long-distance
            elif segment_distance > 1000:
                segment_duration = segment_distance / 60
            elif segment_distance > 500:
                segment_duration = segment_distance / 55
            elif segment_distance > 300:
                segment_duration = segment_distance / 50
            elif segment_distance > 150:
                segment_duration = segment_distance / 45
            else:
                segment_duration = segment_distance / 38  # Local or short distance
            wait_time = 0
            total_duration_hours += segment_duration
            if route.index(segment) > 0:
                prev_arrival = route_segments[-1]['Arrival Time']
                wait_time = calculate_time_diff(prev_arrival, departure)
                if wait_time < 0:
                    wait_time += 24
                total_duration_hours += wait_time

            route_segments.append({
                'Train Number': train_no,
                'Train Name': train_name,
                'From Station': from_station,
                'Departure Time': departure,
                'To Station': to_station,
                'Arrival Time': arrival,
                'Segment Distance (km)': segment_distance,
                'Segment Duration (hrs)': round(segment_duration, 2),
                'Seat Availability': seat_available,
                'Waiting Time Before Segment (hrs)': round(wait_time, 2)


            })

        total_cost = total_distance * 1
        hours = int(total_duration_hours)
        minutes = int((total_duration_hours % 1) * 60)
        total_duration = f"{hours}h {minutes}m"

        for segment_num, segment in enumerate(route_segments, 1):
            csv_data.append({
                'Route Number': route_num,
                'Total Trains in Route': total_trains,
                'Segment Number': segment_num,
                'Train Number': segment['Train Number'],
                'Train Name': segment['Train Name'],
                'From Station': segment['From Station'],
                'Departure Time': segment['Departure Time'],
                'To Station': segment['To Station'],
                'Arrival Time': segment['Arrival Time'],
                'Segment Distance (km)': segment['Segment Distance (km)'],
                'Segment Duration (hrs)': segment['Segment Duration (hrs)'],
                'Seat Availability': segment['Seat Availability'],
                'Waiting Time Before Segment (hrs)': segment['Waiting Time Before Segment (hrs)'],
                'Total Route Distance (km)': total_distance,
                'Total Route Duration': total_duration,
                'Estimated Cost (₹)': total_cost
            })

    if csv_data:
        routes_df = pd.DataFrame(csv_data)
        columns = [
            'Route Number', 'Total Trains in Route', 'Segment Number',
            'Train Number', 'Train Name', 'From Station', 'Departure Time',
            'To Station', 'Arrival Time', 'Segment Distance (km)',
            'Segment Duration (hrs)', 'Seat Availability', 'Waiting Time Before Segment (hrs)',
            'Total Route Distance (km)', 'Total Route Duration', 'Estimated Cost (₹)'
        ]
        routes_df = routes_df[columns]
        routes_df.to_csv(output_file, index=False)
        print(f"\nSuccessfully saved {len(all_routes)} routes to {output_file}")
        return routes_df
    else:
        print("\nNo valid routes found with the given parameters.")
        return None

def analyze_routes(routes_df):
    unique_routes = routes_df['Route Number'].unique()
    print(f"\nFound {len(unique_routes)} unique routes:")

    for route_num in unique_routes:
        route_data = routes_df[routes_df['Route Number'] == route_num]
        first_row = route_data.iloc[0]
        print(f"\nRoute {route_num}:")
        print(f"- Trains required: {first_row['Total Trains in Route']}")
        print(f"- Total distance: {first_row['Total Route Distance (km)']} km")
        print(f"- Total duration: {first_row['Total Route Duration']}")
        print(f"- Estimated cost: ₹{first_row['Estimated Cost (₹)']}")

        print("  Segments:")
        for _, segment in route_data.iterrows():
            print(f"  {segment['Segment Number']}. {segment['Train Name']} ({segment['Train Number']})")
            print(f"     {segment['From Station']} ({segment['Departure Time']}) → {segment['To Station']} ({segment['Arrival Time']})")
            print(f"     Distance: {segment['Segment Distance (km)']} km | Duration: {segment['Segment Duration (hrs)']} hrs | Seat Available: {segment['Seat Availability']}")

    routes_summary = routes_df.drop_duplicates('Route Number')

    if len(routes_summary) > 1:
        fastest = routes_summary.loc[routes_summary['Total Route Duration'].idxmin()]
        shortest = routes_summary.loc[routes_summary['Total Route Distance (km)'].idxmin()]
        cheapest = routes_summary.loc[routes_summary['Estimated Cost (₹)'].idxmin()]

        print("\nRecommendations:")
        print(f"- Fastest route: Route {fastest['Route Number']} ({fastest['Total Route Duration']})")
        print(f"- Shortest route: Route {shortest['Route Number']} ({shortest['Total Route Distance (km)']} km)")
        print(f"- Cheapest route: Route {cheapest['Route Number']} (₹{cheapest['Estimated Cost (₹)']})")

if __name__ == "__main__":
    main()



=== Train Route Finder ===
Finds connecting routes between any two stations with distance, time, and cost calculations



  df = pd.read_csv('Train_details.csv')


Successfully loaded train schedule data.

Enter station codes (3-4 letter codes like PGT, KOTA, etc.)

Successfully saved 153 routes to PGT_to_KOTA_routes.csv

Analysis of found routes:

Found 153 unique routes:

Route 1:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 55h 12m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45.57 hrs | Seat Available: 1
  2. SVDK-KOTA WE (19804)
     NDLS (14:15:00) → KOTA (23:50:00)
     Distance: 457.0 km | Duration: 9.14 hrs | Seat Available: 0

Route 2:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 68h 37m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45.57 hrs | Seat Available: 1
  2. UHP-KOTA WEE (19806)
     NDLS (03:40:00) → KOTA (11:10:00)
     Distance: 457.0 km | Duration: 9.14 hrs | Seat Available: 1

Route 3:
- Trains r

NameError: name 'suggest_nearby_junctions' is not defined


=== Train Route Finder ===
Finds connecting routes between any two stations with distance, time, and cost calculations



  df = pd.read_csv('Train_details.csv')


Successfully loaded train schedule data.

Enter station codes (3-4 letter codes like PGT, KOTA, etc.)
Enter origin station code: PGT
Enter destination station code: KOTA
Enter maximum number of connections (1-4 recommended): 4

Successfully saved 153 routes to PGT_to_KOTA_routes.csv

Analysis of found routes:

Found 153 unique routes:

Route 1:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 55h 12m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45h 34m hrs | Seat Available: 1
  2. SVDK-KOTA WE (19804)
     NDLS (14:15:00) → KOTA (23:50:00)
     Distance: 457.0 km | Duration: 9h 8m hrs | Seat Available: 1

Route 2:
- Trains required: 2
- Total distance: 3100.0 km
- Total duration: 68h 37m
- Estimated cost: ₹3100.0
  Segments:
  1. KERALA EXPRE (12625)
     PGT (18:45:00) → NDLS (13:45:00)
     Distance: 2643.0 km | Duration: 45h 34m hrs | Seat Available: 1
  2. UHP-KOTA WEE (1

In [None]:
# import pandas as pd
# import numpy as np
# import networkx as nx
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# from uuid import uuid4

# def convert_to_minutes(duration_str):
#     """Convert duration string (e.g., '53h 5m') to minutes."""
#     try:
#         if pd.isna(duration_str):
#             return np.nan
#         h, m = duration_str.lower().replace('h', '').replace('m', '').split()
#         return int(h) * 60 + int(m)
#     except:
#         return np.nan

# def load_and_clean_data(file_path):
#     """Load and preprocess train route data."""
#     try:
#         df = pd.read_csv(file_path)
#     except FileNotFoundError:
#         raise FileNotFoundError("CSV file not found.")

#     df_available = df[df["Seat Availability"] == 1].copy()
#     df_available["Total Duration (min)"] = df_available["Total Route Duration"].apply(convert_to_minutes)
#     df_available = df_available.dropna(subset=["Total Duration (min)"])

#     # Calculate waiting time between segments (if applicable)
#     df_available = df_available.sort_values(["Route Number", "Segment Number"])
#     df_available["Next Departure"] = df_available.groupby("Route Number")["Departure Time"].shift(-1)
#     df_available["Arrival Time"] = pd.to_datetime(df_available["Arrival Time"], format="%H:%M", errors="coerce")
#     df_available["Next Departure"] = pd.to_datetime(df_available["Next Departure"], format="%H:%M", errors="coerce")
#     df_available["Waiting Time (min)"] = (
#         (df_available["Next Departure"] - df_available["Arrival Time"]).dt.total_seconds() / 60
#     ).fillna(0)

#     return df_available

# def calculate_multi_objective_score(df_routes):
#     """Calculate a normalized multi-objective score for routes."""
#     # Normalize features (min-max normalization)
#     df_routes["Norm Duration"] = (
#         df_routes["Total Duration (min)"] - df_routes["Total Duration (min)"].min()
#     ) / (df_routes["Total Duration (min)"].max() - df_routes["Total Duration (min)"].min() + 1e-6)

#     df_routes["Norm Cost"] = (
#         df_routes["Estimated Cost (₹)"] - df_routes["Estimated Cost (₹)"].min()
#     ) / (df_routes["Estimated Cost (₹)"].max() - df_routes["Estimated Cost (₹)"].min() + 1e-6)

#     df_routes["Norm Transfers"] = (
#         df_routes["Num Transfers"] - df_routes["Num Transfers"].min()
#     ) / (df_routes["Num Transfers"].max() - df_routes["Num Transfers"].min() + 1e-6)

#     df_routes["Norm Waiting"] = (
#         df_routes["Total Waiting (min)"] - df_routes["Total Waiting (min)"].min()
#     ) / (df_routes["Total Waiting (min)"].max() - df_routes["Total Waiting (min)"].min() + 1e-6)

#     # Weighted score (adjust weights based on priority)
#     weights = {
#         "duration": 0.4,
#         "cost": 0.3,
#         "transfers": 0.2,
#         "waiting": 0.1
#     }
#     df_routes["Score"] = (
#         weights["duration"] * df_routes["Norm Duration"] +
#         weights["cost"] * df_routes["Norm Cost"] +
#         weights["transfers"] * df_routes["Norm Transfers"] +
#         weights["waiting"] * df_routes["Norm Waiting"]
#     )

#     return df_routes

# def optimize_routes(df_available):
#     """Find optimal and near-optimal routes using multi-objective optimization."""
#     # Aggregate route-level metrics
#     df_routes = df_available.groupby("Route Number").agg({
#         "Total Duration (min)": "first",
#         "Estimated Cost (₹)": "first",
#         "Total Route Distance (km)": "first",
#         "Segment Number": "count",
#         "Waiting Time (min)": "sum"
#     }).reset_index()

#     df_routes["Num Transfers"] = df_routes["Segment Number"] - 1
#     df_routes["Total Waiting (min)"] = df_routes["Waiting Time (min)"]

#     # Calculate multi-objective score
#     df_routes = calculate_multi_objective_score(df_routes)

#     # Find optimal route (lowest score)
#     optimal_route = df_routes.loc[df_routes["Score"].idxmin()]

#     # Find near-optimal routes (within 10% of best score)
#     threshold_score = optimal_route["Score"] * 1.1
#     near_optimal_routes = df_routes[
#         (df_routes["Score"] <= threshold_score) &
#         (df_routes["Route Number"] != optimal_route["Route Number"])
#     ].sort_values("Score").head(5)

#     return optimal_route, near_optimal_routes

# def visualize_route_interactive(df, optimal_route_number):
#     """Create an interactive visualization of the optimal route using Plotly."""
#     G = nx.DiGraph()
#     optimal_segments = df[df["Route Number"] == optimal_route_number]

#     for _, row in optimal_segments.iterrows():
#         G.add_edge(
#             row["From Station"],
#             row["To Station"],
#             label=f'{row["Train Name"]} ({row["Segment Duration (hrs)"]} hrs)',
#             duration=row["Segment Duration (hrs)"],
#             train=row["Train Name"]
#         )

#     # Linear layout for sequential stations
#     pos = {station: (i, 0) for i, station in enumerate(G.nodes())}

#     # Prepare Plotly traces
#     edge_x, edge_y = [], []
#     edge_text = []
#     for edge in G.edges():
#         x0, y0 = pos[edge[0]]
#         x1, y1 = pos[edge[1]]
#         edge_x.extend([x0, x1, None])
#         edge_y.extend([y0, y1, None])
#         edge_text.append(G[edge[0]][edge[1]]["label"])

#     edge_trace = go.Scatter(
#         x=edge_x, y=edge_y,
#         line=dict(width=2, color='blue'),
#         hoverinfo='none',
#         mode='lines'
#     )

#     node_x, node_y = [], []
#     for node in G.nodes():
#         x, y = pos[node]
#         node_x.append(x)
#         node_y.append(y)

#     node_trace = go.Scatter(
#         x=node_x, y=node_y,
#         mode='markers+text',
#         text=list(G.nodes()),
#         textposition="top center",
#         marker=dict(size=20, color='lightblue', line=dict(width=2, color='black')),
#         hoverinfo='text'
#     )

#     # Create figure
#     fig = go.Figure(data=[edge_trace, node_trace],
#                     layout=go.Layout(
#                         title="🚆 Optimal Train Route: Palakkad to Kota",
#                         showlegend=False,
#                         hovermode='closest',
#                         margin=dict(b=20, l=5, r=5, t=40),
#                         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
#                         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
#                     ))

#     # Add edge annotations
#     for i, edge in enumerate(G.edges()):
#         x0, y0 = pos[edge[0]]
#         x1, y1 = pos[edge[1]]
#         fig.add_annotation(
#             x=(x0 + x1) / 2, y=(y0 + y1) / 2 + 0.1,
#             text=G[edge[0]][edge[1]]["label"],
#             showarrow=False,
#             font=dict(size=10)
#         )

#     fig.show()

# def visualize_pareto_front(df_routes):
#     """Visualize the Pareto front for duration vs. cost."""
#     fig = go.Figure()

#     # Scatter plot of all routes
#     fig.add_trace(go.Scatter(
#         x=df_routes["Total Duration (min)"],
#         y=df_routes["Estimated Cost (₹)"],
#         mode='markers',
#         marker=dict(size=8, color=df_routes["Score"], colorscale='Viridis', showscale=True),
#         text=df_routes["Route Number"],
#         hovertemplate="Route: %{text}<br>Duration: %{x} min<br>Cost: ₹%{y}<br>Score: %{marker.color:.2f}"
#     ))

#     # Highlight optimal route
#     optimal_route = df_routes.loc[df_routes["Score"].idxmin()]
#     fig.add_trace(go.Scatter(
#         x=[optimal_route["Total Duration (min)"]],
#         y=[optimal_route["Estimated Cost (₹)"]],
#         mode='markers',
#         marker=dict(size=12, color='red', symbol='star'),
#         name='Optimal Route'
#     ))

#     fig.update_layout(
#         title="Pareto Front: Duration vs. Cost",
#         xaxis_title="Total Duration (min)",
#         yaxis_title="Estimated Cost (₹)",
#         showlegend=True
#     )

#     fig.show()

# def main(file_path):
#     # Load and clean data
#     df_available = load_and_clean_data(file_path)

#     # Optimize routes
#     optimal_route, near_optimal_routes = optimize_routes(df_available)

#     # Print results
#     print("\n✅ Optimal Route Details:")
#     print(optimal_route[["Route Number", "Total Duration (min)", "Estimated Cost (₹)",
#                         "Num Transfers", "Total Waiting (min)", "Score"]])

#     print("\n📌 Segments in Optimal Route:")
#     optimal_segments = df_available[df_available["Route Number"] == optimal_route["Route Number"]]
#     print(optimal_segments[[
#         "Segment Number", "Train Name", "From Station", "Departure Time",
#         "To Station", "Arrival Time", "Segment Distance (km)", "Segment Duration (hrs)"
#     ]])

#     print("\n📊 Near-Optimal Routes (within 10% of best score):")
#     print(near_optimal_routes[[
#         "Route Number", "Total Duration (min)", "Estimated Cost (₹)",
#         "Num Transfers", "Total Waiting (min)", "Score"
#     ]])

#     # Visualize optimal route
#     visualize_route_interactive(df_available, optimal_route["Route Number"])

#     # Visualize Pareto front
#     df_routes = df_available.groupby("Route Number").agg({
#         "Total Duration (min)": "first",
#         "Estimated Cost (₹)": "first",
#         "Segment Number": "count",
#         "Waiting Time (min)": "sum"
#     }).reset_index()
#     df_routes["Num Transfers"] = df_routes["Segment Number"] - 1
#     df_routes["Total Waiting (min)"] = df_routes["Waiting Time (min)"]
#     df_routes = calculate_multi_objective_score(df_routes)
#     visualize_pareto_front(df_routes)

# if __name__ == "__main__":
#     file_path = "PGT_to_KOTA_routes.csv"
#     main(file_path)

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# Load the CSV file (update path as needed)
file_path = "PGT_to_KOTA_routes.csv"
df = pd.read_csv(file_path)

# Convert 'Total Route Duration' (like "53h 5m") to minutes
def convert_to_minutes(duration_str):
    try:
        h, m = duration_str.lower().replace('h', '').replace('m', '').split()
        return int(h) * 60 + int(m)
    except:
        return np.nan

# Filter only routes with seat availability
df_available = df[df["Seat Availability"] == 1].copy()
df_available["Total Duration (min)"] = df_available["Total Route Duration"].apply(convert_to_minutes)

# Group by route to get one row per route
df_routes = df_available.groupby("Route Number").agg({
    "Total Duration (min)": "first",
    "Estimated Cost (₹)": "first",
    "Total Route Distance (km)": "first"
}).reset_index()

# Find optimal route (min time, then min cost)
min_time = df_routes["Total Duration (min)"].min()
best_routes = df_routes[df_routes["Total Duration (min)"] == min_time]
optimal_route = best_routes.loc[best_routes["Estimated Cost (₹)"].idxmin()]
optimal_route_number = optimal_route["Route Number"]

print("\n✅ Optimal Route Details:")
print(optimal_route)

# Show full route segments
optimal_segments = df[df["Route Number"] == optimal_route_number]

print("\n📌 Segments in Optimal Route:")
print(optimal_segments[[
    "Segment Number", "Train Name", "From Station", "Departure Time",
    "To Station", "Arrival Time", "Segment Distance (km)", "Segment Duration (hrs)"
]])

# Find near-optimal routes (within 30 mins of best time)
threshold_time = min_time + 30
near_optimal_routes = df_routes[
    (df_routes["Total Duration (min)"] <= threshold_time) &
    (df_routes["Route Number"] != optimal_route_number)
].sort_values(["Total Duration (min)", "Estimated Cost (₹)"])

print("\n📊 Near-Optimal Routes (within 30 mins):")
print(near_optimal_routes)

# Visualize optimal route as a graph
G = nx.DiGraph()

for _, row in optimal_segments.iterrows():
    G.add_edge(
        row["From Station"],
        row["To Station"],
        label=f'{row["Train Name"]} ({row["Segment Duration (hrs)"]} hrs)'
    )

pos = nx.spring_layout(G, seed=42)
plt.figure(figsize=(10, 6))
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', arrows=True)
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=9)
plt.title("🚆 Optimal Train Route: Palakkad to Kota")
plt.axis('off')
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'PGT_to_KOTA_routes.csv'

In [None]:
import pandas as pd
from collections import deque
from datetime import datetime, timedelta

def main():
    print("\n=== Train Route Finder ===")
    print("Finds connecting routes between any two stations with distance, time, and cost calculations\n")

    # Load the data
    try:
        df = pd.read_csv('Train_details.csv')
        print("Successfully loaded train schedule data.")
    except FileNotFoundError:
        print("Error: Could not find 'Train_details.csv'. Please ensure the file is in the same directory.")
        return
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    # Get user input
    while True:
        print("\nEnter station codes (3-4 letter codes like PGT, KOTA, etc.)")
        source = input("Enter origin station code: ").strip().upper()
        destination = input("Enter destination station code: ").strip().upper()

        # Validate stations exist
        if source not in df['Station Code'].values:
            print(f"Error: Station code '{source}' not found in database.")
            continue
        if destination not in df['Station Code'].values:
            print(f"Error: Station code '{destination}' not found in database.")
            continue
        break

    while True:
        try:
            max_connections = int(input("Enter maximum number of connections (1-4 recommended): "))
            if 0 <= max_connections <= 4:
                break
            print("Please enter a number between 0 and 4")
        except ValueError:
            print("Please enter a valid number")

    output_file = f"{source}_to_{destination}_routes.csv"

    # Find and save routes
    routes_df = find_and_save_routes(df, source, destination, max_connections, output_file)

    if routes_df is not None:
        print("\nAnalysis of found routes:")
        analyze_routes(routes_df)
        print(f"\nDetailed routes saved to: {output_file}")

def find_and_save_routes(df, source, destination, max_connections=3, output_file='connecting_routes.csv'):
    """Find routes between stations and save to CSV with metrics"""

    # Create a graph: {station: {train_no: destination_station}}
    graph = {}
    train_info = {}

    # Build the graph and train information
    for _, row in df.iterrows():
        train_no = row['Train No']
        station = row['Station Code']
        dest_station = row['Destination Station']
        distance = row['Distance']

        if station not in graph:
            graph[station] = {}
        if train_no not in graph[station]:
            graph[station][train_no] = dest_station

        # Store train information
        if train_no not in train_info:
            train_info[train_no] = {
                'name': row['Train Name'],
                'seq': {},
                'source': row['Source Station'],
                'dest': row['Destination Station']
            }

        train_info[train_no]['seq'][row['SEQ']] = {
            'station': station,
            'arrival': row['Arrival time'],
            'departure': row['Departure Time'],
            'distance': distance
        }

    # Helper function to calculate time difference in hours
    def calculate_time_diff(start_time, end_time):
        try:
            fmt = '%H:%M:%S'
            t1 = datetime.strptime(start_time, fmt)
            t2 = datetime.strptime(end_time, fmt)

            # Handle overnight trains
            if t2 < t1:
                t2 += timedelta(days=1)

            return (t2 - t1).total_seconds() / 3600  # Convert to hours
        except:
            return 0

    # Find nearby junctions with direct trains to destination
    def find_nearby_junctions_with_direct_trains(source, destination, max_distance=100):
        nearby_junctions = set()

        # Find all stations within 100 km of source
        source_stations = set()
        for train_no, info in train_info.items():
            for seq, seq_info in info['seq'].items():
                if seq_info['station'] == source:
                    source_distance = float(seq_info['distance'])
                    # Find all stations on this train within 100 km
                    for other_seq, other_info in info['seq'].items():
                        other_distance = float(other_info['distance'])
                        if abs(other_distance - source_distance) <= max_distance:
                            source_stations.add(other_info['station'])

        # Check which of these stations have direct trains to destination
        for station in source_stations:
            if station in graph:
                for train_no in graph[station]:
                    # Check if this train goes to destination
                    seq_info = train_info[train_no]['seq']
                    stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
                    if destination in stations_in_order:
                        from_idx = stations_in_order.index(station)
                        to_idx = stations_in_order.index(destination)
                        if to_idx > from_idx:
                            nearby_junctions.add(station)

        return nearby_junctions

    # Find nearby junctions with direct trains to destination
    nearby_junctions = find_nearby_junctions_with_direct_trains(source, destination)

    # BFS implementation to find routes with max_connections
    def bfs(start, end, max_depth):
        queue = deque()
        queue.append((start, [], 0))
        visited = set()
        routes = []

        while queue:
            current, path, depth = queue.popleft()

            if current == end:
                routes.append(path)
                continue

            if depth >= max_depth:
                continue

            if current in visited:
                continue

            visited.add(current)

            if current not in graph:
                continue

            for train_no, next_station in graph[current].items():
                seq_info = train_info[train_no]['seq']
                stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]

                try:
                    current_idx = stations_in_order.index(current)
                    next_idx = stations_in_order.index(next_station)
                    if next_idx > current_idx:
                        new_path = path + [(train_no, current, next_station)]
                        queue.append((next_station, new_path, depth + 1))
                except ValueError:
                    continue

        return routes

    # Find all possible routes (original + via nearby junctions)
    all_routes = []

    # 1. Find direct routes (original BFS)
    for connections in range(1, max_connections + 1):
        routes = bfs(source, destination, connections)
        all_routes.extend(routes)

    # 2. Find routes via nearby junctions with direct trains to destination
    for junction in nearby_junctions:
        # Route from source to junction (max 1 connection)
        to_junction_routes = bfs(source, junction, 1)

        # Direct route from junction to destination
        for train_no in graph.get(junction, {}):
            seq_info = train_info[train_no]['seq']
            stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
            if destination in stations_in_order:
                from_idx = stations_in_order.index(junction)
                to_idx = stations_in_order.index(destination)
                if to_idx > from_idx:
                    direct_segment = [(train_no, junction, destination)]
                    # Combine with routes to junction
                    for route in to_junction_routes:
                        combined_route = route + direct_segment
                        all_routes.append(combined_route)

    # Prepare data for CSV
    csv_data = []
    for route_num, route in enumerate(all_routes, 1):
        total_trains = len(route)
        total_distance = 0
        total_duration_hours = 0
        route_segments = []

        # Calculate route statistics
        for segment in route:
            train_no, from_station, to_station = segment
            train_name = train_info[train_no]['name']

            # Get segment details
            seq_info = train_info[train_no]['seq']
            for seq, info in seq_info.items():
                if info['station'] == from_station:
                    departure = info['departure']
                    from_distance = float(info['distance']) # Convert to numeric type before operation
                if info['station'] == to_station:
                    arrival = info['arrival']
                    to_distance = float(info['distance']) # Convert to numeric type before operation

            # Calculate segment distance
            segment_distance = abs(to_distance - from_distance)
            total_distance += segment_distance


            if segment_distance >2500:
                segment_duration = segment_distance/65

            elif segment_distance > 2000:
                segment_duration = segment_distance/68.5

            elif segment_distance > 1450:
                segment_duration = segment_distance/72.5

            elif segment_distance > 800:
                segment_duration = segment_distance/80
            elif segment_distance > 300:
                segment_duration = segment_distance/60

            elif segment_distance > 300:
                segment_duration = segment_distance/60


            else:
                segment_duration = segment_distance/37.5


            seg_hours = int(segment_duration)
            seg_minutes = int((segment_duration % 1) * 60)
            segment_duration_str = f"{seg_hours}h {seg_minutes}m"

            total_duration_hours += segment_duration

             # Add waiting time for subsequent segments
            if route.index(segment) > 0:
                prev_arrival = route_segments[-1]['Arrival Time']
                wait_time = calculate_time_diff(prev_arrival, departure)
                if wait_time < 0:
                    wait_time += 24  # Next day
                total_duration_hours += wait_time

            route_segments.append({
                'Train Number': train_no,
                'Train Name': train_name,
                'From Station': from_station,
                'Departure Time': departure,
                'To Station': to_station,
                'Arrival Time': arrival,
                'Segment Distance (km)': segment_distance,
                'Segment Duration (hrs)': segment_duration_str,

            })


        # Calculate total route cost (₹1 per km)
        total_cost = total_distance * 1

       # Format total duration
        hours = int(total_duration_hours)
        minutes = int((total_duration_hours % 1) * 60)
        total_duration = f"{hours}h {minutes}m"

        # Add route summary and segments to CSV data
        for segment_num, segment in enumerate(route_segments, 1):
            csv_data.append({
                'Route Number': route_num,
                'Total Trains in Route': total_trains,
                'Segment Number': segment_num,
                'Train Number': segment['Train Number'],
                'Train Name': segment['Train Name'],
                'From Station': segment['From Station'],
                'Departure Time': segment['Departure Time'],
                'To Station': segment['To Station'],
                'Arrival Time': segment['Arrival Time'],
                'Segment Distance (km)': segment['Segment Distance (km)'],
                'Segment Duration (hrs)': segment['Segment Duration (hrs)'],
                'Total Route Distance (km)': total_distance,
                'Total Route Duration': total_duration,
                'Estimated Cost (₹)': total_cost
            })

    # Create DataFrame and save to CSV
    if csv_data:
        routes_df = pd.DataFrame(csv_data)

        # Reorder columns for better readability
        columns = [
            'Route Number', 'Total Trains in Route', 'Segment Number',
            'Train Number', 'Train Name', 'From Station', 'Departure Time',
            'To Station', 'Arrival Time', 'Segment Distance (km)',
            'Segment Duration (hrs)', 'Total Route Distance (km)',
            'Total Route Duration', 'Estimated Cost (₹)'
        ]
        routes_df = routes_df[columns]

        routes_df.to_csv(output_file, index=False)
        print(f"\nSuccessfully saved {len(all_routes)} routes to {output_file}")

        return routes_df
    else:
        print("\nNo valid routes found with the given parameters.")
        return None

def analyze_routes(routes_df):
    """Analyze and display summary statistics about found routes"""
    # Get unique routes
    unique_routes = routes_df['Route Number'].unique()

    print(f"\nFound {len(unique_routes)} unique routes:")

    # Print summary for each route
    for route_num in unique_routes:
        route_data = routes_df[routes_df['Route Number'] == route_num]
        first_row = route_data.iloc[0]

        print(f"\nRoute {route_num}:")
        print(f"- Trains required: {first_row['Total Trains in Route']}")
        print(f"- Total distance: {first_row['Total Route Distance (km)']} km")
        print(f"- Total duration: {first_row['Total Route Duration']}")
        print(f"- Estimated cost: ₹{first_row['Estimated Cost (₹)']}")

        # Print segments
        print("  Segments:")
        for _, segment in route_data.iterrows():
            print(f"  {segment['Segment Number']}. {segment['Train Name']} ({segment['Train Number']})")
            print(f"     {segment['From Station']} ({segment['Departure Time']}) → {segment['To Station']} ({segment['Arrival Time']})")
            print(f"     Distance: {segment['Segment Distance (km)']} km | Duration: {segment['Segment Duration (hrs)']} hrs")

    # Find fastest and shortest routes
    routes_summary = routes_df.drop_duplicates('Route Number')

    if len(routes_summary) > 1:
        fastest = routes_summary.loc[routes_summary['Total Route Duration'].idxmin()]
        shortest = routes_summary.loc[routes_summary['Total Route Distance (km)'].idxmin()]
        cheapest = routes_summary.loc[routes_summary['Estimated Cost (₹)'].idxmin()]

        print("\nRecommendations:")
        print(f"- Fastest route: Route {fastest['Route Number']} ({fastest['Total Route Duration']})")
        print(f"- Shortest route: Route {shortest['Route Number']} ({shortest['Total Route Distance (km)']} km)")
        print(f"- Cheapest route: Route {cheapest['Route Number']} (₹{cheapest['Estimated Cost (₹)']})")

if __name__ == "__main__":
    main()


=== Train Route Finder ===
Finds connecting routes between any two stations with distance, time, and cost calculations



  df = pd.read_csv('Train_details.csv')


Successfully loaded train schedule data.

Enter station codes (3-4 letter codes like PGT, KOTA, etc.)
Enter origin station code: PGT
Enter destination station code: GKP
Enter maximum number of connections (1-4 recommended): 2

Successfully saved 46 routes to PGT_to_GKP_routes.csv

Analysis of found routes:

Found 46 unique routes:

Route 1:
- Trains required: 1
- Total distance: 2889.0 km
- Total duration: 44h 26m
- Estimated cost: ₹2889.0
  Segments:
  1. RAPTISAGAR E (12512)
     PGT (14:05:00) → GKP (15:20:00)
     Distance: 2889.0 km | Duration: 44h 26m hrs

Route 2:
- Trains required: 1
- Total distance: 2889.0 km
- Total duration: 44h 26m
- Estimated cost: ₹2889.0
  Segments:
  1. RAPTISAGAR E (12512)
     PGT (14:05:00) → GKP (15:20:00)
     Distance: 2889.0 km | Duration: 44h 26m hrs

Route 3:
- Trains required: 2
- Total distance: 2890.0 km
- Total duration: 58h 4m
- Estimated cost: ₹2890.0
  Segments:
  1. ERS-MAS SPL (6006)
     PGT (23:10:00) → MAS (10:30:00)
     Distance:

In [None]:
import pandas as pd
from collections import deque
from datetime import datetime, timedelta

def main():
    print("\n=== Train Route Finder ===")
    print("Finds connecting routes between any two stations with distance, time, and cost calculations\n")

    # Load the data
    try:
        df = pd.read_csv('train_data.csv')
        print("Successfully loaded train schedule data.")
    except FileNotFoundError:
        print("Error: Could not find 'Train_details.csv'. Please ensure the file is in the same directory.")
        return
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return

    # Get user input
    while True:
        print("\nEnter station codes (3-4 letter codes like PGT, KOTA, etc.)")
        source = input("Enter origin station code: ").strip().upper()
        destination = input("Enter destination station code: ").strip().upper()

        # Validate stations exist
        if source not in df['Station Code'].values:
            print(f"Error: Station code '{source}' not found in database.")
            continue
        if destination not in df['Station Code'].values:
            print(f"Error: Station code '{destination}' not found in database.")
            continue
        break

    while True:
        try:
            max_connections = int(input("Enter maximum number of connections (1-4 recommended): "))
            if 0 <= max_connections <= 4:
                break
            print("Please enter a number between 0 and 4")
        except ValueError:
            print("Please enter a valid number")

    output_file = f"{source}_to_{destination}_routes.csv"

    # Find and save routes
    routes_df = find_and_save_routes(df, source, destination, max_connections, output_file)

    if routes_df is not None:
        print("\nAnalysis of found routes:")
        analyze_routes(routes_df)
        print(f"\nDetailed routes saved to: {output_file}")

def find_and_save_routes(df, source, destination, max_connections=3, output_file='connecting_routes.csv'):
    """Find routes between stations and save to CSV with metrics"""

    # Create a graph: {station: {train_no: destination_station}}
    graph = {}
    train_info = {}

    # Build the graph and train information
    for _, row in df.iterrows():
        train_no = row['Train No']
        station = row['Station Code']
        dest_station = row['Destination Station']
        distance = row['Distance']

        if station not in graph:
            graph[station] = {}
        if train_no not in graph[station]:
            graph[station][train_no] = dest_station

        # Store train information
        if train_no not in train_info:
            train_info[train_no] = {
                'name': row['Train Name'],
                'seq': {},
                'source': row['Source Station'],
                'dest': row['Destination Station']
            }

        train_info[train_no]['seq'][row['SEQ']] = {
            'station': station,
            'arrival': row['Arrival time'],
            'departure': row['Departure Time'],
            'distance': distance
        }

    # Helper function to calculate time difference in hours
    def calculate_time_diff(start_time, end_time):
        try:
            fmt = '%H:%M:%S'
            t1 = datetime.strptime(start_time, fmt)
            t2 = datetime.strptime(end_time, fmt)

            # Handle overnight trains
            if t2 < t1:
                t2 += timedelta(days=1)

            return (t2 - t1).total_seconds() / 3600  # Convert to hours
        except:
            return 0

    # Find nearby junctions with direct trains to destination
    def find_nearby_junctions_with_direct_trains(source, destination, max_distance=100):
        nearby_junctions = set()

        # Find all stations within 100 km of source
        source_stations = set()
        for train_no, info in train_info.items():
            for seq, seq_info in info['seq'].items():
                if seq_info['station'] == source:
                    source_distance = float(seq_info['distance'])
                    # Find all stations on this train within 100 km
                    for other_seq, other_info in info['seq'].items():
                        other_distance = float(other_info['distance'])
                        if abs(other_distance - source_distance) <= max_distance:
                            source_stations.add(other_info['station'])

        # Check which of these stations have direct trains to destination
        for station in source_stations:
            if station in graph:
                for train_no in graph[station]:
                    # Check if this train goes to destination
                    seq_info = train_info[train_no]['seq']
                    stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
                    if destination in stations_in_order:
                        from_idx = stations_in_order.index(station)
                        to_idx = stations_in_order.index(destination)
                        if to_idx > from_idx:
                            nearby_junctions.add(station)

        return nearby_junctions

    # Find nearby junctions with direct trains to destination
    nearby_junctions = find_nearby_junctions_with_direct_trains(source, destination)

    # BFS implementation to find routes with max_connections
    def bfs(start, end, max_depth):
        queue = deque()
        queue.append((start, [], 0))
        visited = set()
        routes = []

        while queue:
            current, path, depth = queue.popleft()

            if current == end:
                routes.append(path)
                continue

            if depth >= max_depth:
                continue

            if current in visited:
                continue

            visited.add(current)

            if current not in graph:
                continue

            for train_no, next_station in graph[current].items():
                seq_info = train_info[train_no]['seq']
                stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]

                try:
                    current_idx = stations_in_order.index(current)
                    next_idx = stations_in_order.index(next_station)
                    if next_idx > current_idx:
                        new_path = path + [(train_no, current, next_station)]
                        queue.append((next_station, new_path, depth + 1))
                except ValueError:
                    continue

        return routes

    # Find all possible routes (original + via nearby junctions)
    all_routes = []

    # 1. Find direct routes (original BFS)
    for connections in range(1, max_connections + 1):
        routes = bfs(source, destination, connections)
        all_routes.extend(routes)

    # 2. Find routes via nearby junctions with direct trains to destination
    for junction in nearby_junctions:
        # Route from source to junction (max 1 connection)
        to_junction_routes = bfs(source, junction, 1)

        # Direct route from junction to destination
        for train_no in graph.get(junction, {}):
            seq_info = train_info[train_no]['seq']
            stations_in_order = [v['station'] for k, v in sorted(seq_info.items(), key=lambda x: int(x[0]))]
            if destination in stations_in_order:
                from_idx = stations_in_order.index(junction)
                to_idx = stations_in_order.index(destination)
                if to_idx > from_idx:
                    direct_segment = [(train_no, junction, destination)]
                    # Combine with routes to junction
                    for route in to_junction_routes:
                        combined_route = route + direct_segment
                        all_routes.append(combined_route)

    # Prepare data for CSV
    csv_data = []
    for route_num, route in enumerate(all_routes, 1):
        total_trains = len(route)
        total_distance = 0
        total_duration_hours = 0
        route_segments = []

        # Calculate route statistics
        for segment in route:
            train_no, from_station, to_station = segment
            train_name = train_info[train_no]['name']

            # Get segment details
            seq_info = train_info[train_no]['seq']
            for seq, info in seq_info.items():
                if info['station'] == from_station:
                    departure = info['departure']
                    from_distance = float(info['distance'])
                if info['station'] == to_station:
                    arrival = info['arrival']
                    to_distance = float(info['distance'])

            # Calculate segment distance
            segment_distance = abs(to_distance - from_distance)
            total_distance += segment_distance

            # Calculate segment duration in hours
            segment_duration = segment_distance / train_info[train_no]['speed_kmph']


            # Convert to hours and minutes format
            segment_hours = int(segment_duration)
            segment_minutes = int((segment_duration % 1) * 60)
            segment_duration_str = f"{segment_hours}h {segment_minutes}m"

            total_duration_hours += segment_duration

            # Add waiting time between segments
            if route.index(segment) > 0:
                prev_arrival = route_segments[-1]['Arrival Time']
                wait_time = calculate_time_diff(prev_arrival, departure)
                if wait_time < 0:
                    wait_time += 24
                total_duration_hours += wait_time

            route_segments.append({
                'Train Number': train_no,
                'Train Name': train_name,
                'From Station': from_station,
                'Departure Time': departure,
                'To Station': to_station,
                'Arrival Time': arrival,
                'Segment Distance (km)': segment_distance,
                'Segment Duration': segment_duration_str
            })

        # Calculate total route cost (₹1 per km)
        total_cost = total_distance * 1

        # Format total duration
        total_hours = int(total_duration_hours)
        total_minutes = int((total_duration_hours % 1) * 60)
        total_duration = f"{total_hours}h {total_minutes}m"

        # Add route summary and segments to CSV data
        for segment_num, segment in enumerate(route_segments, 1):
            csv_data.append({
                'Route Number': route_num,
                'Total Trains in Route': total_trains,
                'Segment Number': segment_num,
                'Train Number': segment['Train Number'],
                'Train Name': segment['Train Name'],
                'From Station': segment['From Station'],
                'Departure Time': segment['Departure Time'],
                'To Station': segment['To Station'],
                'Arrival Time': segment['Arrival Time'],
                'Segment Distance (km)': segment['Segment Distance (km)'],
                'Segment Duration': segment['Segment Duration'],
                'Total Route Distance (km)': total_distance,
                'Total Route Duration': total_duration,
                'Estimated Cost (₹)': total_cost
            })

    # Create DataFrame and save to CSV
    if csv_data:
        routes_df = pd.DataFrame(csv_data)

        # Reorder columns for better readability
        columns = [
            'Route Number', 'Total Trains in Route', 'Segment Number',
            'Train Number', 'Train Name', 'From Station', 'Departure Time',
            'To Station', 'Arrival Time', 'Segment Distance (km)',
            'Segment Duration', 'Total Route Distance (km)',
            'Total Route Duration', 'Estimated Cost (₹)'
        ]
        routes_df = routes_df[columns]

        routes_df.to_csv(output_file, index=False)
        print(f"\nSuccessfully saved {len(all_routes)} routes to {output_file}")

        return routes_df
    else:
        print("\nNo valid routes found with the given parameters.")
        return None

def analyze_routes(routes_df):
    """Analyze and display summary statistics about found routes"""
    # Get unique routes
    unique_routes = routes_df['Route Number'].unique()

    print(f"\nFound {len(unique_routes)} unique routes:")

    # Print summary for each route
    for route_num in unique_routes:
        route_data = routes_df[routes_df['Route Number'] == route_num]
        first_row = route_data.iloc[0]

        print(f"\nRoute {route_num}:")
        print(f"- Trains required: {first_row['Total Trains in Route']}")
        print(f"- Total distance: {first_row['Total Route Distance (km)']} km")
        print(f"- Total duration: {first_row['Total Route Duration']}")
        print(f"- Estimated cost: ₹{first_row['Estimated Cost (₹)']}")

        # Print segments
        print("  Segments:")
        for _, segment in route_data.iterrows():
            print(f"  {segment['Segment Number']}. {segment['Train Name']} ({segment['Train Number']})")
            print(f"     {segment['From Station']} ({segment['Departure Time']}) → {segment['To Station']} ({segment['Arrival Time']})")
            print(f"     Distance: {segment['Segment Distance (km)']} km | Duration: {segment['Segment Duration']}")

    # Find fastest and shortest routes
    routes_summary = routes_df.drop_duplicates('Route Number')

    if len(routes_summary) > 1:
        fastest = routes_summary.loc[routes_summary['Total Route Duration'].idxmin()]
        shortest = routes_summary.loc[routes_summary['Total Route Distance (km)'].idxmin()]
        cheapest = routes_summary.loc[routes_summary['Estimated Cost (₹)'].idxmin()]

        print("\nRecommendations:")
        print(f"- Fastest route: Route {fastest['Route Number']} ({fastest['Total Route Duration']})")
        print(f"- Shortest route: Route {shortest['Route Number']} ({shortest['Total Route Distance (km)']} km)")
        print(f"- Cheapest route: Route {cheapest['Route Number']} (₹{cheapest['Estimated Cost (₹)']})")

if __name__ == "__main__":
    main()