# Step2: Capacity

#### Input：

    od_hourly_zip_poisson_daytype_1.csv

#### Output：

    Minlimit_capacity_daytype_1.csv


This step reads hourly demand, builds an integer linear programming model under the minmum dockings limitation, and optimizes initial bikes and dockings capacities for each station.

In [None]:

import pandas as pd
import numpy as np
import pulp
import math

# Wrap as a function: given an OD file path, output result file

def run_model_for_file(input_csv, output_csv):
    print("Running model: ", input_csv)

    # reading data
    df = pd.read_csv(input_csv)

    # Ensure station IDs are strings
    df['start_station_id'] = df['start_station_id'].astype(str)
    df['end_station_id']   = df['end_station_id'].astype(str)

    # Parameters: cost 

    r1 = 1     # Cost per bike: 365/365
    r2 = 1.83    # Cost per docking: 90000/27/(365*5)

    MAX_PARK = 30       #Station capacity upper 
    MIN_PARK = 3        #Station capacity lower
    DEMAND_COL = "demand_zip_poisson"    # Use ZIP-predicted demand column

    # Build station index and mapping

    stations = sorted(pd.unique(pd.concat([df['start_station_id'], df['end_station_id']])))
    N = len(stations)
    station_to_idx = {z:i for i,z in enumerate(stations)}

    df['i_idx'] = df['start_station_id'].map(station_to_idx)
    df['j_idx'] = df['end_station_id'].map(station_to_idx)

    # Time periods = natural hours (1..24)

    hours = sorted(df['hour'].unique())
    T = max(hours)
    print("Use nature hour K =", T)
    print("Total demand =", df[DEMAND_COL].sum())

    # Construct B[i,j,t] (demand from i to j in hour t)

    B = np.zeros((N, N, T+1), dtype=int)

    for _, row in df.iterrows():
        i = row['i_idx']
        j = row['j_idx']
        t = int(row['hour'])
        demand = int(row[DEMAND_COL])
        if 1 <= t <= T:
            B[i,j,t] = demand


    # Compute R[i,j,t] = B[i,j,t - 1]
    R = np.zeros_like(B)

    for i in range(N):
        for j in range(N):
            for k in range(1, t+1):
                if t - 1 > 0:
                    R[i,j,t] = B[i,j,t-1]

    # Build model
    model = pulp.LpProblem("Bike_Planning", pulp.LpMinimize)

    # x_i = initial bikes
    # y_i = total parking capacity
    xi = {i: pulp.LpVariable(f"x_{i}", lowBound=0, cat="Integer") for i in range(N)}
    yi = {i: pulp.LpVariable(f"y_{i}", lowBound=0, cat="Integer") for i in range(N)}

    # Objective: minimize total bike and parking costs
    model += pulp.lpSum([r1 * xi[i] + r2 * yi[i] for i in range(N)])

    # State variables for recursion
    Vi = {}
    Li = {}
    A = {}
    LA = {}

    for i in range(N):
        for t in range(1,T + 1):
            Vi[(i,t)] = pulp.LpVariable(f"V_{i}_{t}", lowBound=0, cat="Integer")
            Li[(i,t)] = pulp.LpVariable(f"L_{i}_{t}", lowBound=0, cat="Integer")
            A[(i,t)]  = pulp.LpVariable(f"A_{i}_{t}", lowBound=0, cat="Integer")
            LA[(i,t)] = pulp.LpVariable(f"LA_{i}_{t}", lowBound=0, cat="Integer")


    # Constraints (core model)

    # 1) Initial inventory: V_{i,1} = x_i
    for i in range(N):
        model += Vi[(i,1)] == xi[i]

    # 2) Inventory V recursion over time
    for i in range(N):
        for t in range(2, T+1):
            model += Vi[(i,t)] == \
                Vi[(i,t-1)] + \
                pulp.lpSum([R[j,i,t-1] for j in range(N)]) - \
                pulp.lpSum([B[i,j,t-1] for j in range(N)])

    # 3) Available bikes A = inventory V + arrivals
    for i in range(N):
        for t in range(1, T+1):
            model += A[(i,t)] == Vi[(i,t)] + pulp.lpSum([R[j,i,t] for j in range(N)])

    # 4) Free slots L_i,t = y_i - V_i,t
    for i in range(N):
        for k in range(1, T+1):
            model += Li[(i,t)] == yi[i] - Vi[(i,t)]

    # 5) LA = free slots + departures
    for i in range(N):
        for k in range(1, T+1):
            model += LA[(i,t)] == Li[(i,t)] + pulp.lpSum([B[i,j,t] for j in range(N)])

    # 6) Feasibility constraints: enough bikes and free slots
    for i in range(N):
        for k in range(1, T+1):
            model += A[(i,t)]  >= pulp.lpSum([B[i,j,t] for j in range(N)])
            model += LA[(i,t)] >= pulp.lpSum([R[j,i,t] for j in range(N)])

    # 7) Initial bikes cannot exceed capacity: x_i <= y_i
    for i in range(N):
        model += xi[i] <= yi[i]

    # 8) Capacity bounds
    for i in range(N):
        model += yi[i] <= MAX_PARK
        model += yi[i] >= MIN_PARK

    # Solve the model
    solver = pulp.PULP_CBC_CMD(msg=False)
    model.solve(solver)
    total_x= sum(int(pulp.value(xi[i])) for i in range(N))
    total_y= sum(int(pulp.value(yi[i])) for i in range(N))
    print("Total bikes (sum of x_i) =", total_x)
    print("Total dockings (sum of y_i) =", total_y)
    print("Status:", pulp.LpStatus[model.status])
    print("Objective =", pulp.value(model.objective))

    # Export results
    res = pd.DataFrame({
        "station_id": stations,
        "x_i": [int(pulp.value(xi[i])) for i in range(N)],
        "y_i": [int(pulp.value(yi[i])) for i in range(N)]
    })
    res.to_csv(output_csv, index=False)
    print(res.head())
    print("Output: ", output_csv)


# Run the model for day_type  1

run_model_for_file(
    "od_hourly_zip_poisson_daytype_1.csv",
    "Minlimit_capacity_daytype_1.csv"
)

Running model:  od_hourly_zip_poisson_daytype_1.csv
Use nature hour K = 23
Total demand = 355
Total bikes (sum of x_i) = 225
Total dockings (sum of y_i) = 722
Status: Optimal
Objective = 1546.260000000001
  station_id  x_i  y_i
0       1017    1    3
1       1018    0    3
2       1019    6    7
3       1024    4    6
4       1025    9    9
Output:  Minlimit_capacity_daytype_1.csv
