In [None]:
import geopy.distance
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
from itertools import combinations, permutations
import folium
import csv

In [None]:
#=====================================================
#City Subway Network Optimization - Data Reading
#=====================================================

def dataReading(censusFileName, countySplitText, countyCode, geodataFileName):

    #Uses a geodata file and a census block populations to create parallel lists of the data, including populations and geographical information
    #Inputs:
    #     censusFileName (string): the name of the local file containing US Census block populations for the city in question
    #     countySplitText (string): the phrase contained in the headers of the relevant data in the Census file that looks like ", X County, State"
    #     countyCode (string): the FIPS county code for the county of the state that the city is in
    #     geodataFileName (string): the name of the local file containing geodata that encompasses the city in question
    #Outputs:
    #     ids (list of strings): all of the unique ids (in form "CCC-TTTTTT-BBBB" where C = County FIPS code, T = Tract, and B = Block) for the blocks in the city
    #     block_populations (list of integers): populations of all blocks in the city
    #     latitudes (list of floats): latitude values for internal points of all census blocks of the city
    #     longitudes (list of floats): longitude values for internal points of all census blocks of the city
    
    #Reading data downloaded from the US Census website for all census blocks in the city
    cityCensusBlockPopulations = open(censusFileName, 'r')

    #rows is the entire csv in one string
    rows = cityCensusBlockPopulations.read().split("\n")
    headers = rows[0].split(countySplitText)

    #the last value is empty so we can get rid of it to make the next list creation easier (avoid an indexing error)
    headers.pop()

    #Creating lists that hold the tract and block numbers for the city metro area census data
    cityTracts = []
    cityBlocks = []
    for label in headers:
        cityBlocks.append(label[label.index("Block") + 6:label.index("Block") + 10])
        cityTracts.append(label[label.index("Tract") + 6:])

    #Creating the list that holds the population in each block from the census population
    cityBlockPopulations = rows[1].split(",")

    #removing the first value twice because the first two just say "Total" and then the value for the entire metro area which is not needed
    cityBlockPopulations.pop(0)
    cityBlockPopulations.pop(0)

    cityCensusBlockPopulations.close()

    #Creating one list of unique county-tract-block identifiers
    city_ids = []
    for index in range(len(cityBlocks)):

        tract = cityTracts[index]
        block = cityBlocks[index]    

        #some tracts are represented in this dataset as single numbers - they should have two trailing zeros, which would be the numbers that come after the tract decimal point
        if "." not in tract:
            tract = tract + "00"
        else: #remove the decimal
            decimal_index = tract.find(".")
            tract = tract[:decimal_index] + tract[decimal_index + 1:]

        #reformatting to add leadering zeroes when necessary for consistent formatting with the other dataset
        while len(tract) < 6:
            tract = "0" + tract

        #Tract-Block is not a unique identifier on its own - county FIPS code must be included as well for a truly unique id that is cross-referenceable with the geodata
        unique_id = countyCode + "-" + tract + "-" + block
        city_ids.append(unique_id)

    #================================================================================================================================    

    #Reading in the geodata (usually for the entire state, at least for an area that fully encompasses the city metropolitan area)
    stateGeodata = open(geodataFileName, 'r')

    #rows is the entire csv in one string
    rows = stateGeodata.read().split("\n")
    rows.pop() #the last row is just empty and indexing later will be an issue if we leave it there
    headers = rows[0].split(",")
    blockTypeIndex = headers.index('"Land/Water Block Type"')
    idIndex = headers.index('"Geographic Identifier"')
    internal_point_latitude_index = headers.index('"Internal Point Latitude"')
    internal_point_longitude_index = headers.index('"Internal Point Longitude"')

    #some blocks have the same county/tract/block unique ID but are a different block type (water, land, both). We want land when possible, and both as a backup.
    #Water-only blocks have no population and are therefore not needed
    land_and_water_ids = []
    state_ids = []

    state_latitudes = []
    land_and_water_latitudes = []

    state_longitudes = []
    land_and_water_longitudes = []

    #getting the county/tract/block ids from the geodata file
    for rowNum in range(1, len(rows)):
        rowData = rows[rowNum].split(",")

        unique_id = rowData[idIndex][-14:-11] + "-" + rowData[idIndex][-11:-5] + "-" + rowData[idIndex][-5:-1]
        latitude = rowData[internal_point_latitude_index]
        longitude = rowData[internal_point_longitude_index]

        #some blocks are entirely on land, some are a mix of land and water. We want the land ones by default, and the land+water ones when there is no land one by the same id
        if rowData[blockTypeIndex] == '"L"':
            state_ids.append(unique_id)
            state_latitudes.append(latitude)
            state_longitudes.append(longitude)

        elif rowData[blockTypeIndex] == '"B"':
            land_and_water_ids.append(unique_id)
            land_and_water_latitudes.append(latitude)
            land_and_water_longitudes.append(longitude)

    #adding the mixed land/water ids and coordinates when they aren't already present, but defaulting to the land ones when they exist
    for identifier_index in range(len(land_and_water_ids)):

        if land_and_water_ids[identifier_index] not in state_ids:
            state_ids.append(land_and_water_ids[identifier_index])
            state_latitudes.append(land_and_water_latitudes[identifier_index])
            state_longitudes.append(land_and_water_longitudes[identifier_index])

    #assembling parallel lists:
    #ids: a list of unique ids (C is a county FIPS code, T is a tract number, B is a block number) in form CCC-TTTTTT-BBBB, all must be shared between the city and state data
    #poulations: a list of the populations for all unique blocks in the same order as ids
    #longitudes: a list of the longitudes of the census block internal points
    #latitudes: a list of the latitudes of the census block internal points

    ids = []
    block_populations = []
    latitudes = []
    longitudes = []

    #If the id from the state list is in the city list, and is not already present in the merged list, put it in the merged list
    for id_index in range(len(state_ids)):
        if state_ids[id_index] in city_ids and state_ids[id_index] not in ids:
            ids.append(state_ids[id_index])
            latitudes.append(float(state_latitudes[id_index]))
            longitudes.append(float(state_longitudes[id_index]))

            #The right population for the block is at the index in city_ids where the value state_ids[id_index] is
            popIndex = city_ids.index(state_ids[id_index])

            #Extracting the numeric values from each cell due to string formatting issues where the data was read with multiple sets of quotation marks
            numbers = ""
            for char in cityBlockPopulations[popIndex]:
                if char != "'" and char != '"':
                    numbers += char
                    
            if len(numbers) == 0:
                numbers = "0"
                    
            if numbers.isdigit():
                block_populations.append(int(numbers))
            else:
                block_populations.append(int(float(numbers)))

    stateGeodata.close()
    
    return ids, block_populations, latitudes, longitudes

In [None]:
#=====================================================
#City Subway Network Optimization - Creation of All-Stations DataFrame
#=====================================================

def all_stations_creation(ids, block_populations, latitudes, longitudes):
    
    #Creates all_stations, a pandas dataframe that contains data for all potential station locations
    #Inputs:
    #     ids (list of strings): all of the unique ids (in form "CCC-TTTTTT-BBBB" where C = County FIPS code, T = Tract, and B = Block) for the blocks in the city
    #     block_populations (list of integers): populations of all blocks in the city
    #     latitudes (list of floats): latitude values for internal points of all census blocks of the city
    #     longitudes (list of floats): longitude values for internal points of all census blocks of the city
    #Outputs:
    #     all_stations (pandas DataFrame): the DataFrame containing id (Name), latitude, longitude, block population, and nearby population for all potential stations
    
    #Dictionary of Lists containing the data to be converted into a DataFrame
    all_stations_DoL = {"Name": ids,
                       "Latitude": latitudes,
                       "Longitude": longitudes,
                       "Block_Population": block_populations}

    all_stations = pd.DataFrame(all_stations_DoL, index = ids)
    all_stations = all_stations.rename_axis('ID')

    all_stations_indexes = all_stations.index

    #Creating nearby_populations, a list that contains the population of each block and the population of
    #each block around it with an internal point within a half-mile radius - for all blocks

    nearby_populations = [0 for i in range(len(block_populations))]
    for index1 in range(len(all_stations)):
        for index2 in range(len(all_stations)):
            
            lat1 = latitudes[index1]
            long1 = longitudes[index1]
            lat2 = latitudes[index2]
            long2 = longitudes[index2]

            distance = geopy.distance.distance((lat1, long1), (lat2, long2)).mi

            #if index1 and index2 are sufficiently close to each other, nearby_populations at index1 accumulates the population of the block at index2
            if distance <= 0.5:
                nearby_populations[index1] += block_populations[index2]

    all_stations["Nearby_Population"] = nearby_populations
    
    return all_stations

In [None]:
#=====================================================
#City Subway Network Optimization - Station Proximity Constraint
#=====================================================

def nearbyListCreation(stations):
    
    #Creating a list of lists that will be used for matrix multiplication for the constraint that stations cannot be within a mile of each other
    #Inputs:
    #     stations (pandas DataFrame): the DataFrame containing information for all potential stations (internal points of census blocks)
    #Outputs:
    #     nearbyList (list of lists of ints, those ints being 0 or 1):
    #          The big list contains n lists, where n is the number of potential stations.
    #          Each smaller list contains n integers, all being either 0 or 1.
    #          Each integer represents whether the station at that index is too close (1) to the station represented by the list, or
    #               whether it is sufficiently far away (0).
    
    #At nearbyList[x][x], the value will always be zero, for purposes of running the matrix multiplication in the optimization model - every station
    #     needs to be eligible to be placed in conjunction with itself
    nearbyList = []
    for i in range(len(stations)):
        
        subList = []
        lat1 = stations.iloc[i].loc["Latitude"]
        long1 = stations.iloc[i].loc["Longitude"]
        for j in range(len(stations)):
            
            lat2 = stations.iloc[j].loc["Latitude"]
            long2 = stations.iloc[j].loc["Longitude"]
            
            #0 or 1 into the subList depending on proximity beween station 1 and station 2
            if (i == j) or (geopy.distance.distance((lat1, long1), (lat2, long2)).mi > 1):
                subList.append(0)
            else:
                subList.append(1)
        nearbyList.append(subList)
        
    return nearbyList