In [1]:
import geopy.distance
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
from itertools import combinations, permutations
import folium
import csv

In [3]:
#=====================================================
#City Subway Network Optimization - Data Reading
#=====================================================

def dataReading(censusFileName, countySplitText, countyCode,
               geodataFileName):

    #Reading data downloaded from the US Census website for all census blocks in 
    cityCensusBlockPopulations = open(censusFileName, 'r')

    #rows is the entire csv in one string
    rows = cityCensusBlockPopulations.read().split("\n")
    headers = rows[0].split(countySplitText)

    #the last value is empty so we can get rid of it to make the next list creation easier (avoid an index searching error)
    headers.pop()

    #Creating lists that hold the tract and block numbers for the city metro area census data
    cityTracts = []
    cityBlocks = []
    for label in headers:
        cityBlocks.append(label[label.index("Block") + 6:label.index("Block") + 10])
        cityTracts.append(label[label.index("Tract") + 6:])

    #Creating the list that holds the population in each block from the city data
    cityBlockPopulations = rows[1].split(",")

    #removing the first value twice because the first two just say "Total" and then the value for the entire metro area which is not needed
    cityBlockPopulations.pop(0)
    cityBlockPopulations.pop(0)

    cityCensusBlockPopulations.close()

    #Creating one list of unique tract-block identifiers
    city_ids = []
    for index in range(len(cityBlocks)):

        tract = cityTracts[index]
        block = cityBlocks[index]    

        #some tracts are represented in this dataset as single numbers - they should have two trailing zeros, which are the numbers that come after the decimal point
        if "." not in tract:
            tract = tract + "00"
        else: #remove the decimal
            decimal_index = tract.find(".")
            tract = tract[:decimal_index] + tract[decimal_index + 1:]

        #reformatting to add leadering zeroes when necessary for consistent formatting with the other dataset
        while len(tract) < 6:
            tract = "0" + tract

        #049 is the Franklin Coutny code that will be critical for matching with the state data later
        unique_id = countyCode + "-" + tract + "-" + block
        city_ids.append(unique_id)

    #================================================================================================================================    

    #Reading in the geodata for the entire state of ohio
    stateGeodata = open(geodataFileName, 'r')

    #rows is the entire csv in one string
    rows = stateGeodata.read().split("\n")
    rows.pop() #the last row is just empty and indexing later will be an issue if we leave it there
    headers = rows[0].split(",")
    blockTypeIndex = headers.index('"Land/Water Block Type"')
    idIndex = headers.index('"Geographic Identifier"')
    internal_point_latitude_index = headers.index('"Internal Point Latitude"')
    internal_point_longitude_index = headers.index('"Internal Point Longitude"')

    #some blocks have the same tract/block ID but are a different block type (water, land, both). We want land when possible, and both as a backup.
    #Water-only blocks have no population and are therefore not needed
    land_and_water_ids = []
    state_ids = []

    state_latitudes = []
    land_and_water_latitudes = []

    state_longitudes = []
    land_and_water_longitudes = []

    #getting the tract/block ids from the state geodata
    for rowNum in range(1, len(rows)):
        rowData = rows[rowNum].split(",")

        unique_id = rowData[idIndex][-14:-11] + "-" + rowData[idIndex][-11:-5] + "-" + rowData[idIndex][-5:-1]
        latitude = rowData[internal_point_latitude_index]
        longitude = rowData[internal_point_longitude_index]

        #some blocks are entirely on land, some are a mix of land and water. We want the land ones by default, and the land+water ones when there is no land one by the same id
        if rowData[blockTypeIndex] == '"L"':
            state_ids.append(unique_id)
            state_latitudes.append(latitude)
            state_longitudes.append(longitude)

        elif rowData[blockTypeIndex] == '"B"':
            land_and_water_ids.append(unique_id)
            land_and_water_latitudes.append(latitude)
            land_and_water_longitudes.append(longitude)

    #adding the mixed land/water ids and coordinates when they aren't already present, but defaulting to the land ones when they exist
    for identifier_index in range(len(land_and_water_ids)):

        if land_and_water_ids[identifier_index] not in state_ids:
            state_ids.append(land_and_water_ids[identifier_index])
            state_latitudes.append(land_and_water_latitudes[identifier_index])
            state_longitudes.append(land_and_water_longitudes[identifier_index])

    #assembling matching lists:
    #ids: a list of unique ids (T is a tract number, B is a block number) in form TTTTTT-BBBB, all must be shared between the city and state data
    #poulations: a list of the populations for all unique blocks in the same order as ids
    #longitudes: a list of the longitudes of the census block internal points
    #latitudes: a list of the latitudes of the census block internal points

    ids = []
    block_populations = []
    latitudes = []
    longitudes = []

    for id_index in range(len(state_ids)):
        if state_ids[id_index] in city_ids and state_ids[id_index] not in ids:
            ids.append(state_ids[id_index])
            latitudes.append(float(state_latitudes[id_index]))
            longitudes.append(float(state_longitudes[id_index]))

            #The right population for the block is at the index in city_ids where the value state_ids[id_index] is
            popIndex = city_ids.index(state_ids[id_index])

            #Extracting the numeric values from each cell
            numbers = ""
            for char in cityBlockPopulations[popIndex]:
                if char != "'" and char != '"':
                    numbers += char
                    
            if len(numbers) == 0:
                numbers = "0"
                    
            if numbers.isdigit():
                block_populations.append(int(numbers))
            else:
                block_populations.append(int(float(numbers)))

    stateGeodata.close()
    
    return ids, block_populations, latitudes, longitudes
    

In [4]:
#Function to calculate distance between stations
def distance(station1, station2, stationsDF):
    
    distance = geopy.distance.distance((stationsDF.at[station1,'Latitude'],stationsDF.at[station1,'Longitude']),
                        (stationsDF.at[station2,'Latitude'],stationsDF.at[station2,'Longitude'])).mi  
    return distance

In [5]:
#=====================================================
#City Subway Network Optimization - Creation of All-Stations DataFrame
#=====================================================

def all_stations_creation(ids, block_populations, latitudes, longitudes):
    #Creating all_stations, a pandas dataframe that contains data for all potential station locations
    all_stations_DoL = {"Name": ids,
                       "Latitude": latitudes,
                       "Longitude": longitudes,
                       "Block_Population": block_populations}

    all_stations = pd.DataFrame(all_stations_DoL, index = ids)
    all_stations = all_stations.rename_axis('ID')

    all_stations_indexes = all_stations.index

    #Creating nearby_populations, a list that contains the population of each block and the population of
    #each block around it with an internal point within a half-mile radius - for all blocks

    nearby_populations = [0 for i in range(len(block_populations))]
    for index1 in range(len(all_stations)):
        for index2 in range(len(all_stations)):
            lat1 = latitudes[index1]
            long1 = longitudes[index1]
            lat2 = latitudes[index2]
            long2 = longitudes[index2]

            distance = geopy.distance.distance((lat1, long1), (lat2, long2)).mi

            if distance <= 0.5:
                nearby_populations[index1] += block_populations[index2]

    all_stations["Nearby_Population"] = nearby_populations
    
    return all_stations

In [6]:
#=====================================================
#City Subway Network Optimization - Station Proximity Constraint
#=====================================================

#Creating a list of lists: nearbyList
#This LoL will contain n lists of length n, where n is the number of potential station points.
#Each value will be a 0 or 1: 0 if the point represented in the small list is not within a mile of the point of the big list, 1 if it is
#At nearbyList[x][x], the value will always be zero, for purposes of running the matrix multiplication in the optimization model
def nearbyListCreation(stations):

    nearbyList = []
    for i in range(len(stations)):
        subList = []
        lat1 = stations.iloc[i].loc["Latitude"]
        long1 = stations.iloc[i].loc["Longitude"]
        for j in range(len(stations)):
            lat2 = stations.iloc[j].loc["Latitude"]
            long2 = stations.iloc[j].loc["Longitude"]
            if (i == j) or (geopy.distance.distance((lat1, long1), (lat2, long2)).mi > 1):
                subList.append(0)
            else:
                subList.append(1)
        nearbyList.append(subList)
        
    return nearbyList

In [7]:
#=====================================================
#City Subway Network Optimization - Point Selection
#=====================================================
def point_selection(all_stations, nearbyList, num_stations, mandatory_points):

    #model creation
    point_selection_mod = gp.Model("City Subway Point Selection")

    #creating the list of point objects
    points = []

    #creating the point objects, objective value for each station is the number of people reached (within a half-mile radius)
    for blockIndex in range(len(all_stations)):
        points.append(point_selection_mod.addVar(obj = all_stations.iloc[blockIndex].loc["Nearby_Population"], vtype = "B", name = all_stations.iloc[blockIndex].loc["Name"]))

    #the model may only distribute 10 stations
    point_selection_mod.addConstr(gp.quicksum(points) == num_stations)
    
    #mandating that each (lat, long) tuple in mandatory_points is within half a mile of a selected station
    for mandatory_point in mandatory_points:
        distancesFromMandatoryPoint = []
        lat = mandatory_point[0]
        long = mandatory_point[1]
        
        mandatoryPointMatrix = [0 for i in range(len(all_stations))]
        #finding the blocks within a half mile radius of the mandatory point
        for nearbyOption in range(len(all_stations)):
            dist = geopy.distance.distance((lat, long), (all_stations.iloc[nearbyOption].loc["Latitude"], all_stations.iloc[nearbyOption].loc["Longitude"])).mi
            distancesFromMandatoryPoint.append(dist)
            if dist <= 0.5:
                mandatoryPointMatrix[nearbyOption] = 1
            
        if mandatoryPointMatrix.count(1) == 0:
            
            minDistIndex = distancesFromMandatoryPoint.index(min(distancesFromMandatoryPoint))
            mandatoryPointMatrix[minDistIndex] = 1
                
        point_selection_mod.addConstr(gp.LinExpr(mandatoryPointMatrix, points) == 1)

    #no two stations can be within a mile of each other
    for potential_station_index in range(len(nearbyList)):
        point_selection_mod.addConstr(points[potential_station_index] * (gp.LinExpr(nearbyList[potential_station_index], points)) == 0)

    #maximize population reached
    point_selection_mod.ModelSense = 0
    point_selection_mod.optimize()

    #creating the list of points that were selected
    points_selected = []
    if point_selection_mod.Status == gp.GRB.OPTIMAL:
        for point in points:
            if point.X == 1:
                points_selected.append(point.Varname)
                
    return points_selected, point_selection_mod.ObjVal


In [8]:
#=====================================================
#City Subway Network Optimization - Station Mapping
#=====================================================
def station_mapping(points_selected, all_stations):

    #Creating the pandas dataframe to run the route optimization from
    latitudesColumn = []
    longitudesColumn = []

    for point_id in range(len(points_selected)):
        lat_long_index = all_stations.loc[points_selected[point_id]].loc["Name"]
        latitudesColumn.append(all_stations.loc[lat_long_index].loc["Latitude"])
        longitudesColumn.append(all_stations.loc[lat_long_index].loc["Longitude"])

    stationsDoL = {"Name": points_selected,
                   "Latitude": latitudesColumn,
                  "Longitude": longitudesColumn}
    points_selected_df = pd.DataFrame(stationsDoL, index = points_selected)
    points_selected_df = points_selected_df.rename_axis('ID')


    station_indexes = points_selected_df.index


    distances = {(station1, station2): distance(station1, station2, points_selected_df) for station1, station2 in permutations(station_indexes, 2)}
    
    return station_indexes, points_selected_df, distances

In [9]:
#=====================================================
#City Subway Network Optimization - Subtour Elimination
#=====================================================

#Create subtour elimination constraint function (this code chunk originates from Dr. Anthony Bonifonte at Denison University)

## Calculate shortest subtour for subtour elimination constraints
def subtour(edges):
    nodes = set(i for e in edges for i in e)
    unvisited = list(nodes)
    cycle = list(nodes)
    while unvisited:  # true if list is non-empty
        thiscycle = []
        neighbors = unvisited
        while neighbors:
            current = neighbors[0]
            thiscycle.append(current)
            unvisited.remove(current)
            neighbors = [j for i, j in edges.select(current, '*') if j in unvisited]
        if len(thiscycle) <= len(cycle):
            cycle = thiscycle # New shortest subtour
    return cycle

## Subtour elimination function
def subtourelim(model, where):
    global subtour_iterations
    if where == GRB.Callback.MIPSOL:
        # make a list of edges selected in the solution
        vals = model.cbGetSolution(model._vars)
        selected = gp.tuplelist((i, j) for i, j in model._vars.keys()
            if vals[i, j] > 0.5)
        tour = subtour(selected)
        if len(tour) < len(selected): #len(selected) is total number of edges
            model.cbLazy(gp.quicksum(model._vars[i, j] for i, j in permutations(tour, 2)) <= len(tour)-1)

In [13]:
#=====================================================
#City Subway Network Optimization - Route Optimization
#=====================================================

def routeOptimization(station_indexes, points_selected_df, distances):

    station_model = gp.Model("Subway Route Design")

    # Variables: is station 'i' adjacent to station 'j' on the route?
    vars = station_model.addVars(distances.keys(), obj=distances, vtype=GRB.BINARY, name='Station Pairs')

    # Constraints: one incoming and one outgoing edge to each capital
    cons1 = station_model.addConstrs(vars.sum(m, '*') == 1 for m in station_indexes)
    cons2 = station_model.addConstrs(vars.sum('*',m ) == 1 for m in station_indexes)

    # Add all constraints for pairwise violated subtours
    station_model.addConstrs(vars[(m1, m2)] + vars[(m2, m1)] <= 1 for m1 in station_indexes for m2 in station_indexes if m1!=m2)

    station_model._vars = vars
    station_model.Params.lazyConstraints = 1
    station_model.optimize(subtourelim)

    vals = station_model.getAttr('x', vars)
    selected = gp.tuplelist((i, j) for i, j in vals.keys() if vals[i, j] > 0.5)
    #selected

    map = folium.Map(location=[40, -95], zoom_start=4)
    for idx, station in points_selected_df.iterrows():
        folium.Marker([station['Latitude'],station['Longitude']],
                                popup=station['Name'],
                                 icon=folium.Icon(color= 'blue')).add_to(map)

    for loc in selected:
        folium.PolyLine([[points_selected_df.at[loc[0],'Latitude'],points_selected_df.at[loc[0],'Longitude']],
                         [points_selected_df.at[loc[1],'Latitude'],points_selected_df.at[loc[1],'Longitude']]]).add_to(map)


    return map, station_model.ObjVal

In [30]:
#=====================================================
#City Subway Network Optimization - Validation
#=====================================================

def existingStationPopulations(stationsCoordinates, stationsDF):
    population = 0
    for station in stationsCoordinates:
        
        #find the nearest census block
        distances = []
        for stationKey in range(len(stationsDF)):
            dist = geopy.distance.distance((station[0], station[1]), (all_stations.iloc[stationKey].loc["Latitude"], all_stations.iloc[stationKey].loc["Longitude"])).mi
            distances.append(dist)
        
        nearestIndex = distances.index(min(distances))
        
        #adding the nearby population of the closest census block to population
        population += stationsDF.iloc[nearestIndex].loc["Nearby_Population"]
        
    return population