<a href="https://colab.research.google.com/github/sbigstar0310/Term_project/blob/main/lp_modified.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install related libraries

In [None]:
!pip install -q pulp
!pip install numpy
!pip install tqdm

import pulp
import numpy as np
import tqdm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m40.8 MB/s[0m eta [36m0:00:00[0m


## Get optimal time interval with dummy data

In [None]:
import pulp

# Parameters
train_capacity = 960
operating_cost_per_train = 10000
waiting_cost_per_minute = 10
discomfort_constant = 10

# Station data
date = "2024-01-01"
time ="03~04"
stations = ['s1101', 's1102', 's1103', 's1104', 's1105', 's1106', 's1107', 's1108', 's1109', 's1110',
            's1111', 's1112', 's1113', 's1114', 's1115', 's1116', 's1117', 's1118', 's1119', 's1120',
            's1121', 's1222'
            ]
boarding = [150, 53, 124, 903, 474, 80, 175, 133, 221, 88, 166, 216,
            109, 178, 25, 439, 132, 139, 88, 160, 59, 210]
alighting = [159, 58, 112, 478, 435, 68, 273, 131, 279, 150, 242, 214,
             166, 200, 44, 568, 124, 77, 78, 188, 71, 237]

# check all input list have same len
if len(stations) != len(boarding) or len(stations) != len(alighting):
    print(f"Error: Input lists must have the same length.")
    print(f"Lengths -> Stations: {len(stations)}, Boarding: {len(boarding)}, Alighting: {len(alighting)}")
    raise IndexError("Input lists length mismatch.")

# Possible train schedules
n_values = list(range(1, 61))  # Trains per hour
x_values = [60 / n for n in n_values]  # Corresponding intervals in minutes

# Total boarding passengers
total_boarding_passengers = sum(boarding)

# To store results for each n_t
results = []

for n_t in n_values:
    prob = pulp.LpProblem(f"Train_Interval_Optimization_n_{n_t}", pulp.LpMinimize)

    # Decision variables
    final_boarding = pulp.LpVariable.dicts("Final_Boarding", range(len(stations)), lowBound=0, cat="Continuous")
    passengers_per_train = pulp.LpVariable.dicts("Passengers_Per_Train", range(len(stations)), lowBound=0, cat="Continuous")

    # Objective function components
    operating_cost = n_t * operating_cost_per_train
    waiting_cost = (60 / n_t) * waiting_cost_per_minute * total_boarding_passengers
    discomfort_cost = (discomfort_constant / train_capacity) * pulp.lpSum([passengers_per_train[i] for i in range(len(stations))])

    prob += operating_cost + waiting_cost + discomfort_cost, "Total_Cost"

    # Passenger flow constraints
    for i in range(len(stations)):
        if i == 0:
            prob += final_boarding[i] == boarding[i] - alighting[i], f"Passenger_Flow_Station_{i+1}"
        else:
            prob += final_boarding[i] == final_boarding[i - 1] + (boarding[i] - alighting[i]), f"Passenger_Flow_Station_{i+1}"

        # Passengers per train
        prob += final_boarding[i] == passengers_per_train[i] * n_t, f"Passengers_Per_Train_Constraint_{i+1}"

    # Capacity constraints
    for i in range(len(stations)):
        prob += final_boarding[i] <= train_capacity * n_t, f"Capacity_Constraint_Station_{i+1}"

    prob.solve()

    results.append({
        "n_t": n_t,
        "status": pulp.LpStatus[prob.status],
        "total_cost": pulp.value(prob.objective),
        "passenger_load": [final_boarding[i].varValue for i in range(len(stations))]
    })


# Find the optimal solution
optimal_result = min(results, key=lambda x: x["total_cost"])

# Print results
print(f"Optimal Number of Trains per Hour (n_t): {optimal_result['n_t']}")
print(f"Total Cost: ${optimal_result['total_cost']:.2f}")
print("Passenger Load at Each Station:")
for i, load in enumerate(optimal_result["passenger_load"]):
    print(f"  Station {stations[i]}: {load:.2f} passengers")


Optimal Number of Trains per Hour (n_t): 16
Total Cost: $322077.53
Passenger Load at Each Station:
  Station s1101: 0.00 passengers
  Station s1102: 0.00 passengers
  Station s1103: 12.00 passengers
  Station s1104: 437.00 passengers
  Station s1105: 476.00 passengers
  Station s1106: 488.00 passengers
  Station s1107: 390.00 passengers
  Station s1108: 392.00 passengers
  Station s1109: 334.00 passengers
  Station s1110: 272.00 passengers
  Station s1111: 225.00 passengers
  Station s1112: 227.00 passengers
  Station s1113: 170.00 passengers
  Station s1114: 148.00 passengers
  Station s1115: 129.00 passengers
  Station s1116: 0.00 passengers
  Station s1117: 0.00 passengers
  Station s1118: 62.00 passengers
  Station s1119: 72.00 passengers
  Station s1120: 39.00 passengers
  Station s1121: 27.00 passengers
  Station s1222: 0.00 passengers


## Global Variables (train_capacity, operating cost, ...)

In [None]:
# Global variable of parameters
train_capacity = 960 # 21 train sets with 84 cars (4 cars per set), maximum capacity of 240 passengers per car => 960 passengers per set.
operating_cost_per_train = 10000
waiting_cost_per_minute = 10
discomfort_constant = 10

## Function that get input stations, boarding, alighting return output of optimal Train Interval

In [None]:
def get_optimal_train_interval(stations, boarding, alighting, train_capacity, operating_cost_per_train, waiting_cost_per_minute, discomfort_constant):
  # Boarding and alighting data for each station
  stations = stations
  boarding = boarding
  alighting = alighting

  # check all input list have same len
  if len(stations) != len(boarding) or len(stations) != len(alighting):
      print(f"Error: Input lists must have the same length.")
      print(f"Lengths -> Stations: {len(stations)}, Boarding: {len(boarding)}, Alighting: {len(alighting)}")
      raise IndexError("Input lists length mismatch.")

  # Possible train schedules
  x_values = list(range(1, 20, 2))      # train time intervals in minutes
  n_values = [60 / x for x in x_values] # Number of trains during 1 hour

  # Total boarding passengers
  total_boarding_passengers = sum(boarding)

  # To store results for each n_t
  results = []

  for n_t in n_values:
      prob = pulp.LpProblem(f"Train_Interval_Optimization_n_{n_t}", pulp.LpMinimize)

      # Decision variables
      final_boarding = pulp.LpVariable.dicts("Final_Boarding", range(len(stations)), lowBound=0, cat="Continuous")
      passengers_per_train = pulp.LpVariable.dicts("Passengers_Per_Train", range(len(stations)), lowBound=0, cat="Continuous")

      # Objective function components
      operating_cost = n_t * operating_cost_per_train
      waiting_cost = (60 / n_t) * waiting_cost_per_minute * total_boarding_passengers
      discomfort_cost = (discomfort_constant / train_capacity) * pulp.lpSum([passengers_per_train[i] for i in range(len(stations))])

      prob += operating_cost + waiting_cost + discomfort_cost, "Total_Cost"

      # Passenger flow constraints
      for i in range(len(stations)):
          if i == 0:
              prob += final_boarding[i] == boarding[i] - alighting[i], f"Passenger_Flow_Station_{i+1}"
          else:
              prob += final_boarding[i] == final_boarding[i - 1] + (boarding[i] - alighting[i]), f"Passenger_Flow_Station_{i+1}"

          # Passengers per train
          prob += final_boarding[i] == passengers_per_train[i] * n_t, f"Passengers_Per_Train_Constraint_{i+1}"

      # Capacity constraints
      for i in range(len(stations)):
          prob += final_boarding[i] <= train_capacity * n_t, f"Capacity_Constraint_Station_{i+1}"

      prob.solve()

      # Store results
      results.append({
          "n_t": n_t,
          "x_t": 60 / n_t,  # Corresponding interval for this train schedule
          "status": pulp.LpStatus[prob.status],
          "total_cost": pulp.value(prob.objective),
          "passenger_load": [final_boarding[i].varValue for i in range(len(stations))]
      })


  # Find the optimal solution
  optimal_result = min(results, key=lambda x: x["total_cost"])

  # Print results
  # print(f"Optimal Number of Trains per Hour (n_t): {optimal_result['n_t']}")
  # print(f"Total Cost: ${optimal_result['total_cost']:.2f}")
  # print("Passenger Load at Each Station:")
  # for i, load in enumerate(optimal_result["passenger_load"]):
  #     print(f"  Station {stations[i]}: {load:.2f} passengers")

  return optimal_result['x_t']

## Get data from Excel file

In [None]:
# Load the uploaded file to analyze its content
import pandas as pd

# Step 1: Load Data
data = pd.read_csv("/content/train_dataset.csv", encoding='euc-kr') # Korean encoding.

data.head()

# Rows info: [date | station_number | name of station | boarding or alighting | times ...]


Unnamed: 0,날짜,역번호,역명,구분,03-04시,04-05시,05-06시,06-07시,07-08시,08-09시,...,17-18시,18-19시,19-20시,20-21시,21-22시,22-23시,23-00시,00-01시,01-02시,02-03시
0,2024-01-01,1101,판암,승차,0,66,61,55,50,87,...,120,129,54,47,33,13,13,2,0,0
1,2024-01-01,1101,판암,하차,0,53,50,74,48,51,...,145,130,111,85,73,94,62,17,0,0
2,2024-01-01,1102,신흥,승차,0,0,18,106,21,40,...,41,37,30,15,16,8,3,0,0,0
3,2024-01-01,1102,신흥,하차,0,0,9,104,17,15,...,45,69,39,37,27,39,20,6,0,0
4,2024-01-01,1103,대동,승차,0,27,94,31,51,77,...,116,84,102,52,49,37,28,0,0,0


## Seperate data by Weekdays, Weekends

In [None]:
# Check your global variable
# print(train_capacity)
# print(operating_cost_per_train)
# print(waiting_cost_per_minute)
# print(discomfort_constant)

# Step 2: Prepare Data
data['날짜'] = pd.to_datetime(data['날짜']) # data['date']
data['요일'] = data['날짜'].dt.dayofweek    # data['dayOfWeek'] Monday=0, Sunday=6

# Separate weekdays (0-4) and weekends (5-6)
weekdays_data = data[data['요일'] <= 4]
weekends_data = data[data['요일'] >= 5]

## Calculate the average time interval at weekends and weekdays

In [None]:
from tqdm import tqdm
import pandas as pd

def calculate_daily_time_period_averages(dataset, train_capacity, operating_cost_per_train, waiting_cost_per_minute, discomfort_constant):
    time_columns = [col for col in dataset.columns if col.endswith('시')]  # Extract time period columns
    daily_intervals = []

    # 날짜 열이 datetime 형식인지 확인하고 변환
    dataset['날짜'] = pd.to_datetime(dataset['날짜'])

    # 2024-01-01부터 2024-01-10까지 필터링
    # filtered_data = dataset[(dataset['날짜'] >= '2024-01-01') & (dataset['날짜'] <= '2024-02-10')]

    # Group data by date and iterate with tqdm to show progress
    for date, group in tqdm(dataset.groupby('날짜'), desc="Processing Dates", unit="date"):
        daily_time_intervals = {}

        for time_col in time_columns:
            # Extract boarding and alighting data for the time period
            boarding = group[(group['구분'] == '승차')][time_col].tolist()
            alighting = group[(group['구분'] == '하차')][time_col].tolist()
            stations = group['역명'].unique()

            try:
                # Calculate the optimal interval for the current time period
                interval = get_optimal_train_interval(
                    stations=stations,
                    boarding=boarding,
                    alighting=alighting,
                    train_capacity=train_capacity,
                    operating_cost_per_train=operating_cost_per_train,
                    waiting_cost_per_minute=waiting_cost_per_minute,
                    discomfort_constant=discomfort_constant,
                )
                daily_time_intervals[time_col] = interval
            except Exception as e:
                print(f"Error for {date} at {time_col}: {e}")
                daily_time_intervals[time_col] = None

        daily_intervals.append(daily_time_intervals)

    return daily_intervals

def calculate_final_averages(daily_intervals, is_weekend):
    # Aggregate results across all dates
    aggregated_intervals = {}

    for daily_time_intervals in daily_intervals:
        for time_col, interval in daily_time_intervals.items():
            if interval is not None:
                if time_col not in aggregated_intervals:
                    aggregated_intervals[time_col] = []
                aggregated_intervals[time_col].append(interval)

    # Calculate the average for each time period
    final_averages = {
        time_col: sum(intervals) / len(intervals) if intervals else None
        for time_col, intervals in aggregated_intervals.items()
    }

    return final_averages


## Do calculations

In [None]:
# Step 1: Calculate daily time period averages for weekdays and weekends
weekday_daily_intervals = calculate_daily_time_period_averages(
    weekdays_data, train_capacity, operating_cost_per_train, waiting_cost_per_minute, discomfort_constant
)
weekend_daily_intervals = calculate_daily_time_period_averages(
    weekends_data, train_capacity, operating_cost_per_train, waiting_cost_per_minute, discomfort_constant
)

# Step 2: Calculate final averages for weekdays and weekends
weekday_final_averages = calculate_final_averages(weekday_daily_intervals, is_weekend=False)
weekend_final_averages = calculate_final_averages(weekend_daily_intervals, is_weekend=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['날짜'] = pd.to_datetime(dataset['날짜'])
Processing Dates: 100%|██████████| 207/207 [10:02<00:00,  2.91s/date]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['날짜'] = pd.to_datetime(dataset['날짜'])
Processing Dates: 100%|██████████| 82/82 [03:57<00:00,  2.89s/date]


## Print out results

In [None]:
# For the assumption that the upper train direction and the down train direction have equal passengers, multiply by 2.

print("Weekdays:")
for time_period, avg_interval in weekday_final_averages.items():
    if avg_interval is not None:
        print(f"{time_period} -> Average {avg_interval*2:.10f} minutes")

print("\nWeekends:")
for time_period, avg_interval in weekend_final_averages.items():
    if avg_interval is not None:
        print(f"{time_period} -> Average {avg_interval*2:.10f} minutes")

Weekdays:
03-04시 -> Average 38.0000000000 minutes
04-05시 -> Average 37.9227053140 minutes
05-06시 -> Average 14.3091787440 minutes
06-07시 -> Average 10.3478260870 minutes
07-08시 -> Average 6.4057971014 minutes
08-09시 -> Average 6.2898550725 minutes
09-10시 -> Average 6.2125603865 minutes
10-11시 -> Average 6.1932367150 minutes
11-12시 -> Average 6.1739130435 minutes
12-13시 -> Average 6.1352657005 minutes
13-14시 -> Average 6.0966183575 minutes
14-15시 -> Average 6.0966183575 minutes
15-16시 -> Average 6.0772946860 minutes
16-17시 -> Average 6.0772946860 minutes
17-18시 -> Average 6.0772946860 minutes
18-19시 -> Average 6.0966183575 minutes
19-20시 -> Average 6.2318840580 minutes
20-21시 -> Average 8.2222222222 minutes
21-22시 -> Average 8.6859903382 minutes
22-23시 -> Average 9.9613526570 minutes
23-00시 -> Average 13.7874396135 minutes
00-01시 -> Average 37.3623188406 minutes
01-02시 -> Average 38.0000000000 minutes
02-03시 -> Average 38.0000000000 minutes

Weekends:
03-04시 -> Average 38.0000000000 min