In [2]:
import geopy.distance
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
from itertools import combinations, permutations
import folium
import csv

In [3]:
def distance(station1, station2, stationsDF):
    
    #Finds the distance between two stations
    #Inputs:
    #     station1 (string): the CCC-TTTTTT-BBBB (county, track, block) unique id for the first potential station location
    #     station2 (string): the CCC-TTTTTT-BBBB unique id for the second potential station location
    #     stationsDF (pandas DataFrame): the cumulative data for all potential stations (internal points of census blocks) for the entire city
    #Outputs:
    #     distance (float): the distance between station1 and station2
    
    distance = geopy.distance.distance((stationsDF.at[station1,'Latitude'],stationsDF.at[station1,'Longitude']),
                        (stationsDF.at[station2,'Latitude'],stationsDF.at[station2,'Longitude'])).mi  
    return distance

In [4]:
#=====================================================
#City Subway Network Optimization - Point Selection
#=====================================================

def point_selection(all_stations, nearbyList, num_stations, mandatory_points):

    #Selects the optimal set of points to select, maximizing city population within a half-mile radius of a station
    #Inputs:
    #     all_stations (pandas DataFrame): the DataFrame containing information for all potential stations (internal points of census blocks)
    #     nearbyList (list of lists of ints, those ints being 0 or 1):
    #          The big list contains n lists, where n is the number of potential stations.
    #          Each smaller list contains n integers, all being either 0 or 1.
    #          Each integer represents whether the station at that index is too close (1) to the station represented by the list, or
    #               whether it is sufficiently far away (0).
    #     num_stations (int): the number of stations that the model may place
    #     mandatory_points (list of tuples of 2 floats): latitude and longitude coordinates for all points that must be placed within a half-mile
    #          of a station, or if that is not possible, have a station place at the absolute closest possible location
    #Outputs:
    #     points_selected (list of strings): the CCC-TTTTTT-BBBB id of all stations placed
    #     people_reached (int): the total number of people in the city living within a half-mile of a station
    
    #model creation
    point_selection_mod = gp.Model("City Subway Point Selection")

    #creating the list of point objects
    points = []

    #creating the point objects, where the objective value for each point is the number of people reached (within a half-mile radius)
    for blockIndex in range(len(all_stations)):
        points.append(point_selection_mod.addVar(obj = all_stations.iloc[blockIndex].loc["Nearby_Population"], vtype = "B", name = all_stations.iloc[blockIndex].loc["Name"]))

    #the model may only distribute the parameter-defined number of stations
    point_selection_mod.addConstr(gp.quicksum(points) == num_stations)
    
    #mandating that each (lat, long) tuple in mandatory_points is within half a mile of a selected station
    for mandatory_point in mandatory_points:
        distancesFromMandatoryPoint = []
        lat = mandatory_point[0]
        long = mandatory_point[1]
        
        mandatoryPointMatrix = [0 for i in range(len(all_stations))]
        
        #finding the blocks within a half mile radius of the mandatory point
        for nearbyOption in range(len(all_stations)):
            dist = geopy.distance.distance((lat, long), (all_stations.iloc[nearbyOption].loc["Latitude"], all_stations.iloc[nearbyOption].loc["Longitude"])).mi
            distancesFromMandatoryPoint.append(dist)
            if dist <= 0.5:
                mandatoryPointMatrix[nearbyOption] = 1
        
        #if no internal point of a census block is wthin a half-mile of a mandatory point, choose the single nearest census block
        if mandatoryPointMatrix.count(1) == 0:
            
            minDistIndex = distancesFromMandatoryPoint.index(min(distancesFromMandatoryPoint))
            mandatoryPointMatrix[minDistIndex] = 1
        
        #by this point, mandatoryPointMatrix represents 0 for all the stations that are too far from the mandatory point to place a station sufficiently
        #     nearby. The list "points" represents all the potential stations, where each one functions as a 1 if selected and a 0 if not. The dot product
        #     of multiplying these two matrices must be at least 1, so that there is an overlap between stations selected and stations that meet the
        #     proximity requirement to a mandatory point.
        point_selection_mod.addConstr(gp.LinExpr(mandatoryPointMatrix, points) == 1)

    #no two stations can be within a mile of each other
    for potential_station_index in range(len(nearbyList)):
        point_selection_mod.addConstr(points[potential_station_index] * (gp.LinExpr(nearbyList[potential_station_index], points)) == 0)

    #maximize population reached
    point_selection_mod.ModelSense = 0
    point_selection_mod.optimize()

    #creating the list of points that were selected
    points_selected = []
    if point_selection_mod.Status == gp.GRB.OPTIMAL:
        for point in points:
            if point.X == 1:
                points_selected.append(point.Varname)
    
    people_reached = int(point_selection_mod.ObjVal)
    
    return points_selected, people_reached


In [5]:
#=====================================================
#City Subway Network Optimization - Station Mapping
#=====================================================

def station_mapping(points_selected, all_stations):

    #Produces data relevant to the route optimization process
    #Inputs:
    #     points_selected (list of strings): contains the CCC-TTTTTT-BBBB ids of all the points to be mapped
    #     all_stations (pandas DataFrame): the DataFrame containing information for all potential stations (internal points of census blocks)
    #Outputs:
    #     station_indexes (pandas DataFrame): the same information as points_selected, but in a DataFrame for other functions to use later
    #     points_selected_df (pandas DataFrame): the DataFrame containing geographical and population data for all the selected points
    #     distances (dictionary: keys are tuples of form (string, string), values are floats): the distances between all combinations of selected points
    
    #Creating the pandas dataframe to run the route optimization from
    latitudesColumn = []
    longitudesColumn = []

    #Getting the latitude and longitude of all selected stations
    for point_id in range(len(points_selected)):
        lat_long_index = all_stations.loc[points_selected[point_id]].loc["Name"]
        latitudesColumn.append(all_stations.loc[lat_long_index].loc["Latitude"])
        longitudesColumn.append(all_stations.loc[lat_long_index].loc["Longitude"])

    stationsDoL = {"Name": points_selected,
                   "Latitude": latitudesColumn,
                  "Longitude": longitudesColumn}
    points_selected_df = pd.DataFrame(stationsDoL, index = points_selected)
    points_selected_df = points_selected_df.rename_axis('ID')

    station_indexes = points_selected_df.index

    #getting the distance of all possible station-to-station links
    distances = {(station1, station2): distance(station1, station2, points_selected_df) for station1, station2 in permutations(station_indexes, 2)}
    
    return station_indexes, points_selected_df, distances

In [6]:
#=====================================================
#City Subway Network Optimization - Subtour Elimination
#=====================================================

#Create subtour elimination constraint function (this code chunk originates from Dr. Anthony Bonifonte at Denison University)
#These two functions allow the route optimization function to eliminate subtours, which are isolated sub-loops that do not connect to the rest of the route

## Calculate shortest subtour for subtour elimination constraints
def subtour(edges):
    nodes = set(i for e in edges for i in e)
    unvisited = list(nodes)
    cycle = list(nodes)
    while unvisited:  # true if list is non-empty
        thiscycle = []
        neighbors = unvisited
        while neighbors:
            current = neighbors[0]
            thiscycle.append(current)
            unvisited.remove(current)
            neighbors = [j for i, j in edges.select(current, '*') if j in unvisited]
        if len(thiscycle) <= len(cycle):
            cycle = thiscycle # New shortest subtour
    return cycle

## Subtour elimination function
def subtourelim(model, where):
    global subtour_iterations
    if where == GRB.Callback.MIPSOL:
        # make a list of edges selected in the solution
        vals = model.cbGetSolution(model._vars)
        selected = gp.tuplelist((i, j) for i, j in model._vars.keys()
            if vals[i, j] > 0.5)
        tour = subtour(selected)
        if len(tour) < len(selected): #len(selected) is total number of edges
            model.cbLazy(gp.quicksum(model._vars[i, j] for i, j in permutations(tour, 2)) <= len(tour)-1)

In [7]:
#=====================================================
#City Subway Network Optimization - Route Optimization
#=====================================================

#This code chunk is adapted from work produced by Dr. Anthony Bonifonte at Denison University

def routeOptimization(station_indexes, points_selected_df, distances):

    #Finds the optimal route that connects all selected points in a continuous, non-repeating loop while minimizing total track mileage
    #Inputs:
    #     station_indexes (pandas DataFrame): a DataFrame containing CCC-TTTTTT-BBBB ids for all selected stations
    #     points_selected_df (pandas DataFrame): the DataFrame containing geographical and population data for all the selected points
    #     distances (dictionary: keys are tuples of form (string, string), values are floats): the distances between all combinations of selected points
    #Outputs:
    #     map (folium Map object): the visual map of the optimized route
    #     total_track_mileage (float): the total mileage of the complete, optimized route
    
    station_model = gp.Model("Subway Route Design")

    # Variables: is station 'i' adjacent to station 'j' on the route?
    vars = station_model.addVars(distances.keys(), obj=distances, vtype=GRB.BINARY, name='Station Pairs')

    # Constraints: one incoming and one outgoing edge to each capital
    cons1 = station_model.addConstrs(vars.sum(m, '*') == 1 for m in station_indexes)
    cons2 = station_model.addConstrs(vars.sum('*',m ) == 1 for m in station_indexes)

    # Add all constraints for pairwise violated subtours
    station_model.addConstrs(vars[(m1, m2)] + vars[(m2, m1)] <= 1 for m1 in station_indexes for m2 in station_indexes if m1!=m2)

    station_model._vars = vars
    station_model.Params.lazyConstraints = 1
    station_model.optimize(subtourelim)

    vals = station_model.getAttr('x', vars)
    #specifying selected edges, relative to distances dictionary, which contained all possible options
    selected = gp.tuplelist((i, j) for i, j in vals.keys() if vals[i, j] > 0.5)

    #Visual map creation
    map = folium.Map(location=[40, -95], zoom_start=4)
    for idx, station in points_selected_df.iterrows():
        folium.Marker([station['Latitude'],station['Longitude']],
                                popup=station['Name'],
                                 icon=folium.Icon(color= 'blue')).add_to(map)

    #Visual addition of the selected edges
    for loc in selected:
        folium.PolyLine([[points_selected_df.at[loc[0],'Latitude'],points_selected_df.at[loc[0],'Longitude']],
                         [points_selected_df.at[loc[1],'Latitude'],points_selected_df.at[loc[1],'Longitude']]]).add_to(map)

    total_track_mileage = float(station_model.ObjVal)
        
    return map, total_track_mileage

In [8]:
#=====================================================
#City Subway Network Optimization - Validation
#=====================================================

def existingStationPopulations(stationsCoordinates, blocksDF):
    
    #checks how many people live within a half-mile of a station in a city with existing stations on a real rail transit line
    #Inputs:
    #     stationsCoordinates (list of tuples of form (float, float)): the latitude and longitude coordinates for all real-world stations
    #     blocksDF (pandas DataFrame): a DataFrame containing latitude, longitude, population, and nearby population for all census blocks in the city
    #Outputs:
    #     population (int): the number of people living within a half-mile of a station in the city for which the existing station coordinates were entered
    
    accessible_stations = []
    for station in stationsCoordinates:
        
        #creating a list that will be used to find the home census block of the real-world station
        distances = []
        
        #find the nearest census block to the current station
        for stationKey in range(len(blocksDF)):
            dist = geopy.distance.distance((station[0], station[1]), (blocksDF.iloc[stationKey].loc["Latitude"], blocksDF.iloc[stationKey].loc["Longitude"])).mi
            distances.append(dist)
        
        nearestIndex = distances.index(min(distances))
        station_home_block = str(blocksDF.iloc[nearestIndex].loc["Name"])
        
        #add all blocks within a half-mile of the current station to the list of accessible stations 
        for blockKey in range(len(blocksDF)):
            home_block_latitude = float(blocksDF.loc[blocksDF["Name"] == station_home_block]["Latitude"])
            home_block_longitude = float(blocksDF.loc[blocksDF["Name"] == station_home_block]["Longitude"])

            
            if (geopy.distance.distance((home_block_latitude, home_block_longitude) , (blocksDF.iloc[blockKey].loc["Latitude"], blocksDF.iloc[blockKey].loc["Longitude"])) <= 0.5) and (blocksDF.iloc[blockKey].loc["Name"] not in accessible_stations):
                accessible_stations.append(blocksDF.iloc[blockKey].loc["Name"])
        
    population = 0
    #getting the sum of populations in all accessible blocks
    for accessible_station in accessible_stations:
        population += int(blocksDF.loc[blocksDF["Name"] == accessible_station]["Block_Population"])
    
    return population