In [1]:
import numpy as np
import pandas as pd
import random
import warnings
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
import plotly.express as px
%run data_cleaning.ipynb

In [2]:
centroid_locations = pd.read_csv("/Users/dylanmack/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/ESE 499/trio-capstone/data/CensusTractCentroids.csv")
metro_locations = pd.read_csv("/Users/dylanmack/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/ESE 499/trio-capstone/data/MetroLinkStations_REGISTERED.csv")
north_south_locations = pd.read_csv("/Users/dylanmack/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/ESE 499/trio-capstone/data/MetroLinkStations_NS.csv")
census_data = data = pd.read_csv("/Users/dylanmack/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/ESE 499/trio-capstone/data/B08119_stl_city.csv")
race_data = pd.read_csv("/Users/dylanmack/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/ESE 499/trio-capstone/data/B08105_stl_city.csv")

In [3]:
by_tract = clean(census_data, race_data)
centroid_locations_small = centroid_locations[['NAMELSAD','INTPTLAT','INTPTLON']]
centroid_locations_small = centroid_locations_small.rename(columns = {"NAMELSAD": "location", "INTPTLAT" : "LAT", "INTPTLON" : "LON"})
full_data = by_tract.merge(centroid_locations_small, on='location')

In [4]:
k = 12
n = len(full_data)
x0 = []
for i in range(k):
    new_tract = random.randint(0,n-1)
    x0.append(full_data['LON'][new_tract])
    x0.append(full_data['LAT'][new_tract])

lat_bounds = (min(full_data['LAT']),max(full_data['LAT']))
lon_bounds = (min(full_data['LON']),max(full_data['LON']))
bnds = [val for pair in zip([lon_bounds]*12, [lat_bounds]*12) for val in pair]

In [10]:
def distances_to_nearest_stop(tract_locations, metro_locations):
    
    shortest_distance = np.zeros(len(tract_locations))
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    
    for i in range(len(tract_locations)):
        cent = np.array([tract_locations['LON'][i],tract_locations['LAT'][i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array(metro_locations[j])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

def orth_dist(x):
    x = [[x[i], x[i + 1]] for i in range(0, len(x), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in x]).reshape(-1,1)
    lats = np.array([s[1] for s in x])
    model = LinearRegression().fit(longs,lats)
    b = model.intercept_
    m = model.coef_[0]
    dists = 0
    for stop in x:
        n = abs(-1*m*stop[0] + stop[1] - b)
        d = np.sqrt(m**2+1)
        dists = dists + n/d
    return dists


def distances_between_stops(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    shortest_distance = np.zeros(len(metro_locations))
    total_distance = 0
    for i in range(len(metro_locations)):
        metro_current = np.array([metro_locations[i][0],metro_locations[i][1]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            if (i != j):
                metro_compared = np.array(metro_locations[j])
                distance = np.linalg.norm(metro_current-metro_compared)
                if (distance < closest_distance):
                    closest_distance = distance
        total_distance = total_distance + closest_distance
    return total_distance


def distance_to_mean_x(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in metro_locations])
    x_avg = np.mean(longs)
    deviation_from_avg = 0
    for i in range(len(metro_locations)):
        deviation_from_avg = deviation_from_avg + np.abs(x_avg - metro_locations[i][0])
    return deviation_from_avg


def fun(new_stops):
    #print(f'shape of new stops: {np.shape(new_stops)}')
    #weights = full_data['race weight']
    weights = np.ones(len(full_data))
    #print(weights)
    dist_to_stops = np.dot(weights, distances_to_nearest_stop(full_data, new_stops))/len(weights)
    linearity = orth_dist(new_stops) 
    dist_btw_stops = distances_between_stops(new_stops)
    distance_to_mean = distance_to_mean_x(new_stops)
    #print(linearity)
    #print(f'dist to stops: {dist_to_stops:3.3f}, linearity: {linearity:3.3f}')
    return 1*dist_to_stops + 0*linearity - 0.0*dist_btw_stops + 0*distance_to_mean



In [11]:
result = minimize(fun, x0, bounds=bnds)
all_centroids = result.x
lp_results = [[all_centroids[i], all_centroids[i + 1]] for i in range(0, len(all_centroids), 2)]

In [32]:
print(f'Unweighted Final Objective Function Value: {result.fun}')

def dist_to_nearest_stop_eval(centroid_locations, metro_locations):
    shortest_distance = np.zeros(len(centroid_locations))
    
    for i in range(len(centroid_locations)):
        cent = np.array([centroid_locations.INTPTLAT[i],centroid_locations.INTPTLON[i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array([metro_locations[j][1],metro_locations[j][0]])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

shortest_distance = dist_to_nearest_stop_eval(centroid_locations, lp_results)
print(f'Unweighted Evaluation: {np.dot(shortest_distance, np.ones(len(full_data)))/len(shortest_distance)}')
print(f'Transit weight Evaluation: {np.dot(shortest_distance, full_data["transit weight"])}')
print(f'Transit weight (pop) Evaluation: {np.dot(shortest_distance, full_data["transit weight (pop)"])/len(shortest_distance)}')
print(f'Income weight Evaluation: {np.dot(shortest_distance, full_data["income weight"])}')
print(f'Income weight (pop) Evaluation: {np.dot(shortest_distance, full_data["income weight (pop)"])/len(shortest_distance)}')
print(f'Race weight Evaluation: {np.dot(shortest_distance, full_data["race weight"])}')
print(f'Race weight (pop) Evaluation: {np.dot(shortest_distance, full_data["race weight (pop)"])/len(shortest_distance)}')

Unweighted Final Objective Function Value: 0.8632891204799855
Unweighted Evaluation: 0.8830244309649059
Transit weight Evaluation: 0.8725430887216501
Transit weight (pop) Evaluation: 1.121281849667557
Income weight Evaluation: 0.8826650219342462
Income weight (pop) Evaluation: 1.2885227114064195
Race weight Evaluation: 0.9157970797219064
Race weight (pop) Evaluation: 0.8792837274637763


In [36]:
def distances_to_nearest_stop(tract_locations, metro_locations):
    
    shortest_distance = np.zeros(len(tract_locations))
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    
    for i in range(len(tract_locations)):
        cent = np.array([tract_locations['LON'][i],tract_locations['LAT'][i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array(metro_locations[j])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

def orth_dist(x):
    x = [[x[i], x[i + 1]] for i in range(0, len(x), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in x]).reshape(-1,1)
    lats = np.array([s[1] for s in x])
    model = LinearRegression().fit(longs,lats)
    b = model.intercept_
    m = model.coef_[0]
    dists = 0
    for stop in x:
        n = abs(-1*m*stop[0] + stop[1] - b)
        d = np.sqrt(m**2+1)
        dists = dists + n/d
    return dists


def distances_between_stops(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    shortest_distance = np.zeros(len(metro_locations))
    total_distance = 0
    for i in range(len(metro_locations)):
        metro_current = np.array([metro_locations[i][0],metro_locations[i][1]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            if (i != j):
                metro_compared = np.array(metro_locations[j])
                distance = np.linalg.norm(metro_current-metro_compared)
                if (distance < closest_distance):
                    closest_distance = distance
        total_distance = total_distance + closest_distance
    return total_distance


def distance_to_mean_x(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in metro_locations])
    x_avg = np.mean(longs)
    deviation_from_avg = 0
    for i in range(len(metro_locations)):
        deviation_from_avg = deviation_from_avg + np.abs(x_avg - metro_locations[i][0])
    return deviation_from_avg


def fun(new_stops):
    #print(f'shape of new stops: {np.shape(new_stops)}')
    weights = full_data['race weight']
    #weights = np.ones(len(full_data))
    #print(weights)
    dist_to_stops = np.dot(weights, distances_to_nearest_stop(full_data, new_stops))/len(weights)
    linearity = orth_dist(new_stops) 
    dist_btw_stops = distances_between_stops(new_stops)
    distance_to_mean = distance_to_mean_x(new_stops)
    #print(linearity)
    #print(f'dist to stops: {dist_to_stops:3.3f}, linearity: {linearity:3.3f}')
    return 1*dist_to_stops + 0*linearity - 0.0*dist_btw_stops + 0*distance_to_mean



In [37]:
result = minimize(fun, x0, bounds=bnds)
all_centroids = result.x
lp_results = [[all_centroids[i], all_centroids[i + 1]] for i in range(0, len(all_centroids), 2)]

In [43]:
print(f'Unweighted Final Objective Function Value: {result.fun}')

def dist_to_nearest_stop_eval(centroid_locations, metro_locations):
    shortest_distance = np.zeros(len(centroid_locations))
    
    for i in range(len(centroid_locations)):
        cent = np.array([centroid_locations.INTPTLAT[i],centroid_locations.INTPTLON[i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array([metro_locations[j][1],metro_locations[j][0]])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

shortest_distance = dist_to_nearest_stop_eval(centroid_locations, lp_results)
print(f'Unweighted Evaluation: {np.dot(shortest_distance, np.ones(len(full_data)))/len(shortest_distance)}')
print(f'Transit weight Evaluation: {np.dot(shortest_distance, full_data["transit weight"])}')
print(f'Transit weight (pop) Evaluation: {np.dot(shortest_distance, full_data["transit weight (pop)"])/len(shortest_distance)}')
print(f'Income weight Evaluation: {np.dot(shortest_distance, full_data["income weight"])}')
print(f'Income weight (pop) Evaluation: {np.dot(shortest_distance, full_data["income weight (pop)"])/len(shortest_distance)}')
print(f'Race weight Evaluation: {np.dot(shortest_distance, full_data["race weight"])}')
print(f'Race weight (pop) Evaluation: {np.dot(shortest_distance, full_data["race weight (pop)"])/len(shortest_distance)}')

Unweighted Final Objective Function Value: 0.008545135884211534
Unweighted Evaluation: 0.9067648175993386
Transit weight Evaluation: 0.8814200875678959
Transit weight (pop) Evaluation: 1.1417635010998042
Income weight Evaluation: 0.906451340700517
Income weight (pop) Evaluation: 1.3206043850196598
Race weight Evaluation: 0.9312703809916218
Race weight (pop) Evaluation: 0.9016352976473995


In [44]:
def distances_to_nearest_stop(tract_locations, metro_locations):
    
    shortest_distance = np.zeros(len(tract_locations))
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    
    for i in range(len(tract_locations)):
        cent = np.array([tract_locations['LON'][i],tract_locations['LAT'][i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array(metro_locations[j])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

def orth_dist(x):
    x = [[x[i], x[i + 1]] for i in range(0, len(x), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in x]).reshape(-1,1)
    lats = np.array([s[1] for s in x])
    model = LinearRegression().fit(longs,lats)
    b = model.intercept_
    m = model.coef_[0]
    dists = 0
    for stop in x:
        n = abs(-1*m*stop[0] + stop[1] - b)
        d = np.sqrt(m**2+1)
        dists = dists + n/d
    return dists


def distances_between_stops(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    shortest_distance = np.zeros(len(metro_locations))
    total_distance = 0
    for i in range(len(metro_locations)):
        metro_current = np.array([metro_locations[i][0],metro_locations[i][1]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            if (i != j):
                metro_compared = np.array(metro_locations[j])
                distance = np.linalg.norm(metro_current-metro_compared)
                if (distance < closest_distance):
                    closest_distance = distance
        total_distance = total_distance + closest_distance
    return total_distance


def distance_to_mean_x(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in metro_locations])
    x_avg = np.mean(longs)
    deviation_from_avg = 0
    for i in range(len(metro_locations)):
        deviation_from_avg = deviation_from_avg + np.abs(x_avg - metro_locations[i][0])
    return deviation_from_avg


def fun(new_stops):
    #print(f'shape of new stops: {np.shape(new_stops)}')
    weights = full_data['income weight']
    #weights = np.ones(len(full_data))
    #print(weights)
    dist_to_stops = np.dot(weights, distances_to_nearest_stop(full_data, new_stops))/len(weights)
    linearity = orth_dist(new_stops) 
    dist_btw_stops = distances_between_stops(new_stops)
    distance_to_mean = distance_to_mean_x(new_stops)
    #print(linearity)
    #print(f'dist to stops: {dist_to_stops:3.3f}, linearity: {linearity:3.3f}')
    return 1*dist_to_stops + 0*linearity - 0.0*dist_btw_stops + 0*distance_to_mean



In [45]:
result = minimize(fun, x0, bounds=bnds)
all_centroids = result.x
lp_results = [[all_centroids[i], all_centroids[i + 1]] for i in range(0, len(all_centroids), 2)]

In [46]:
print(f'Unweighted Final Objective Function Value: {result.fun}')

def dist_to_nearest_stop_eval(centroid_locations, metro_locations):
    shortest_distance = np.zeros(len(centroid_locations))
    
    for i in range(len(centroid_locations)):
        cent = np.array([centroid_locations.INTPTLAT[i],centroid_locations.INTPTLON[i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array([metro_locations[j][1],metro_locations[j][0]])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

shortest_distance = dist_to_nearest_stop_eval(centroid_locations, lp_results)
print(f'Unweighted Evaluation: {np.dot(shortest_distance, np.ones(len(full_data)))/len(shortest_distance)}')
print(f'Transit weight Evaluation: {np.dot(shortest_distance, full_data["transit weight"])}')
print(f'Transit weight (pop) Evaluation: {np.dot(shortest_distance, full_data["transit weight (pop)"])/len(shortest_distance)}')
print(f'Income weight Evaluation: {np.dot(shortest_distance, full_data["income weight"])}')
print(f'Income weight (pop) Evaluation: {np.dot(shortest_distance, full_data["income weight (pop)"])/len(shortest_distance)}')
print(f'Race weight Evaluation: {np.dot(shortest_distance, full_data["race weight"])}')
print(f'Race weight (pop) Evaluation: {np.dot(shortest_distance, full_data["race weight (pop)"])/len(shortest_distance)}')

Unweighted Final Objective Function Value: 0.008545135884211534
Unweighted Evaluation: 0.9067648175993386
Transit weight Evaluation: 0.8814200875678959
Transit weight (pop) Evaluation: 1.1417635010998042
Income weight Evaluation: 0.906451340700517
Income weight (pop) Evaluation: 1.3206043850196598
Race weight Evaluation: 0.9312703809916218
Race weight (pop) Evaluation: 0.9016352976473995


In [48]:
def distances_to_nearest_stop(tract_locations, metro_locations):
    
    shortest_distance = np.zeros(len(tract_locations))
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    
    for i in range(len(tract_locations)):
        cent = np.array([tract_locations['LON'][i],tract_locations['LAT'][i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array(metro_locations[j])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

def orth_dist(x):
    x = [[x[i], x[i + 1]] for i in range(0, len(x), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in x]).reshape(-1,1)
    lats = np.array([s[1] for s in x])
    model = LinearRegression().fit(longs,lats)
    b = model.intercept_
    m = model.coef_[0]
    dists = 0
    for stop in x:
        n = abs(-1*m*stop[0] + stop[1] - b)
        d = np.sqrt(m**2+1)
        dists = dists + n/d
    return dists


def distances_between_stops(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    shortest_distance = np.zeros(len(metro_locations))
    total_distance = 0
    for i in range(len(metro_locations)):
        metro_current = np.array([metro_locations[i][0],metro_locations[i][1]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            if (i != j):
                metro_compared = np.array(metro_locations[j])
                distance = np.linalg.norm(metro_current-metro_compared)
                if (distance < closest_distance):
                    closest_distance = distance
        total_distance = total_distance + closest_distance
    return total_distance


def distance_to_mean_x(metro_locations):
    metro_locations = [[metro_locations[i], metro_locations[i + 1]] for i in range(0, len(metro_locations), 2)] # reshape back into 2d array
    longs = np.array([s[0] for s in metro_locations])
    x_avg = np.mean(longs)
    deviation_from_avg = 0
    for i in range(len(metro_locations)):
        deviation_from_avg = deviation_from_avg + np.abs(x_avg - metro_locations[i][0])
    return deviation_from_avg


def fun(new_stops):
    #print(f'shape of new stops: {np.shape(new_stops)}')
    weights = full_data['transit weight']
    #weights = np.ones(len(full_data))
    #print(weights)
    dist_to_stops = np.dot(weights, distances_to_nearest_stop(full_data, new_stops))/len(weights)
    linearity = orth_dist(new_stops) 
    dist_btw_stops = distances_between_stops(new_stops)
    distance_to_mean = distance_to_mean_x(new_stops)
    #print(linearity)
    #print(f'dist to stops: {dist_to_stops:3.3f}, linearity: {linearity:3.3f}')
    return 1*dist_to_stops + 0*linearity - 0.0*dist_btw_stops + 0*distance_to_mean



In [49]:
result = minimize(fun, x0, bounds=bnds)
all_centroids = result.x
lp_results = [[all_centroids[i], all_centroids[i + 1]] for i in range(0, len(all_centroids), 2)]

In [1]:
print(f'Unweighted Final Objective Function Value: {result.fun}')

def dist_to_nearest_stop_eval(centroid_locations, metro_locations):
    shortest_distance = np.zeros(len(centroid_locations))
    
    for i in range(len(centroid_locations)):
        cent = np.array([centroid_locations.INTPTLAT[i],centroid_locations.INTPTLON[i]])
        closest_distance = float('inf') # initialize to infinity
        for j in range(len(metro_locations)):
            metro = np.array([metro_locations[j][1],metro_locations[j][1]])
            diff_latlon = cent-metro
            diff_miles = np.copy(diff_latlon)
            diff_miles[0] = 69*diff_latlon[0]
            diff_miles[1] = 54.6*diff_latlon[1]
            distance = np.linalg.norm(diff_miles)
            if (distance < closest_distance):
                closest_distance = distance
        shortest_distance[i] = closest_distance
    
    return shortest_distance

shortest_distance = dist_to_nearest_stop_eval(centroid_locations, lp_results)
print(f'Unweighted Evaluation: {np.dot(shortest_distance, np.ones(len(full_data)))/len(shortest_distance)}')
print(f'Transit weight Evaluation: {np.dot(shortest_distance, full_data["transit weight"])}')
print(f'Transit weight (pop) Evaluation: {np.dot(shortest_distance, full_data["transit weight (pop)"])/len(shortest_distance)}')
print(f'Income weight Evaluation: {np.dot(shortest_distance, full_data["income weight"])}')
print(f'Income weight (pop) Evaluation: {np.dot(shortest_distance, full_data["income weight (pop)"])/len(shortest_distance)}')
print(f'Race weight Evaluation: {np.dot(shortest_distance, full_data["race weight"])}')
print(f'Race weight (pop) Evaluation: {np.dot(shortest_distance, full_data["race weight (pop)"])/len(shortest_distance)}')

NameError: name 'result' is not defined