# Comparing All Optimization Results

In [1]:
import math
import random
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
import networkx as nx
import warnings
import geopandas as gpd
from shapely.geometry import Point, LineString
from matplotlib.colors import ListedColormap
from shapely.geometry import MultiPoint
import matplotlib.image as mpimg

%run data_cleaning.ipynb
%run optimization_functions.ipynb

### Import all necessary data

In [2]:
centroid_locations = pd.read_csv(r"data\CensusTractCentroids.csv")
pop_centers = pd.read_csv(r"data\PopCenters.csv")
metro_locations = pd.read_csv(r"data\MetroLinkStations_REGISTERED.csv")
north_south_locations = pd.read_csv(r"data\MetroLinkStations_NS.csv")
census_data = data = pd.read_csv(r"data\B08119_stl_city.csv")
race_data = pd.read_csv(r"data\B08105_stl_city.csv")
stl = nx.read_weighted_edgelist(r"data\weighted_dual_list",delimiter='%',nodetype=str)

### Clean the data and set constants

# Population Centers

In [3]:
k = 14 #number of clusters/metro stops for all algorithms
center = 'POP' # population centers vs geometric centers

# adjust edge weights for walking distance (inverse)
total_weight = 0
for edge in stl.edges(data=True):
    total_weight = total_weight + edge[2]['weight'] 
for edge in stl.edges(data=True):
    stl[edge[0]][edge[1]]['weight'] = total_weight - edge[2]['weight']


full_data = clean(census_data, race_data, pop_centers, centroid_locations, center)
full_data.head()

TypeError: split() takes from 1 to 2 positional arguments but 3 positional arguments (and 1 keyword-only argument) were given

### Original Stops

In [None]:
orig_stops = [[lat, lon] for lat, lon in zip(north_south_locations['LON'],north_south_locations['LAT'])]
orig_labels = graph_from_lp(full_data, orig_stops)
cluster_dict = {k:v for k, v in zip(full_data['node id'],orig_labels)}
orig_graph = stl.copy()
nx.set_node_attributes(orig_graph, cluster_dict, 'cluster')
nx.write_gexf(orig_graph, "capstone.gexf")
graph_results_lp(orig_graph, orig_stops, 'Original Plan', center)


### K-Means

In [None]:
k_means_weight = 'estimate'
k_means_stops, labels = weighted_kmeans(full_data, k, k_means_weight)
cluster_dict = {k:v for k, v in zip(full_data['node id'],labels)}
k_means_graph = stl.copy()
nx.set_node_attributes(k_means_graph, cluster_dict, 'cluster')
nx.write_gexf(k_means_graph, "capstone.gexf")
graph_results_lp(k_means_graph, k_means_stops, 'K-Means', center)


### Linear Programming

In [None]:
lp_stops = linear_programming(full_data)

In [None]:
labels = graph_from_lp(full_data, lp_stops)
cluster_dict = {k:v for k, v in zip(full_data['node id'],labels)}
lp_graph = stl.copy()
nx.set_node_attributes(lp_graph, cluster_dict, 'cluster')
nx.write_gexf(lp_graph, "capstone.gexf")
graph_results_lp(lp_graph, lp_stops, 'Linear Programming', center)


### Modularity Maximization

In [None]:
mod_max = mod_max_weighted(stl,k)
nx.write_gexf(mod_max, "capstone.gexf")
mm_stops = graph_results(mod_max, 'Modularity Maximization', center)


# Evaluating Methods Against One Another

In [None]:
total_orig_dist = dist_to_nearest_stop_eval(full_data, orig_stops)
orig_unweighted_eval = np.mean(total_orig_dist)
orig_race_eval = np.dot(full_data['race weight'], total_orig_dist)
orig_income_eval = np.dot(full_data['income weight'], total_orig_dist)
orig_transit_eval = np.dot(full_data['transit weight'], total_orig_dist)
print(f'Original plan unweighted eval: {orig_unweighted_eval}')
print(f'Original plan race eval: {orig_race_eval}')
print(f'Original plan income eval: {orig_income_eval}')
print(f'Original plan transit eval: {orig_transit_eval}\n')


total_lp_dist = dist_to_nearest_stop_eval(full_data, lp_stops)
lp_unweighted_eval = np.mean(total_lp_dist)
lp_race_eval = np.dot(full_data['race weight'], total_lp_dist)
lp_income_eval = np.dot(full_data['income weight'], total_lp_dist)
lp_transit_eval = np.dot(full_data['transit weight'], total_lp_dist)
print(f'LP unweighted eval: {lp_unweighted_eval}')
print(f'LP race eval: {lp_race_eval}')
print(f'LP income eval: {lp_income_eval}')
print(f'LP transit eval: {lp_transit_eval}\n')


total_km_dist = dist_to_nearest_stop_eval(full_data, k_means_stops)
km_unweighted_eval = np.mean(total_km_dist)
km_race_eval = np.dot(full_data['race weight'], total_km_dist)
km_income_eval = np.dot(full_data['income weight'], total_km_dist)
km_transit_eval = np.dot(full_data['transit weight'], total_km_dist)
print(f'K-Means unweighted eval: {km_unweighted_eval}')
print(f'K-Means race eval: {km_race_eval}')
print(f'K-Means income eval: {km_income_eval}')
print(f'K-Means transit eval: {km_transit_eval}\n')

total_mm_dist = dist_to_nearest_stop_eval(full_data, mm_stops)
mm_unweighted_eval = np.mean(total_mm_dist)
mm_race_eval = np.dot(full_data['race weight'], total_mm_dist)
mm_income_eval = np.dot(full_data['income weight'], total_mm_dist)
mm_transit_eval = np.dot(full_data['transit weight'], total_mm_dist)
print(f'Mod Max unweighted eval: {mm_unweighted_eval}')
print(f'Mod Max race eval: {mm_race_eval}')
print(f'Mod Max income eval: {mm_income_eval}')
print(f'Mod Max transit eval: {mm_transit_eval}\n')

In [None]:
def arr2SHP(np_array, output_shapefile):
    df = gpd.GeoDataFrame(geometry=[Point(lonlat) for lonlat in np_array], crs="EPSG:4326")
    df.to_file(output_shapefile, driver="ESRI Shapefile")
    
arr2SHP(orig_stops, "plans\OrigStops_p.shp")
arr2SHP(k_means_stops, "plans\KMeansStops_p.shp")
arr2SHP(lp_stops, "plans\LPStops_p.shp")
arr2SHP(mm_stops, "plans\MMStops_p.shp")

# Geometric Centers

In [None]:
center = 'GEO' # population centers vs geometric centers
full_data = clean(census_data, race_data, pop_centers, centroid_locations, center)

### Original Stops

In [None]:
orig_stops = [[lat, lon] for lat, lon in zip(north_south_locations['LON'],north_south_locations['LAT'])]
orig_labels = graph_from_lp(full_data, orig_stops)
cluster_dict = {k:v for k, v in zip(full_data['node id'],orig_labels)}
orig_graph = stl.copy()
nx.set_node_attributes(orig_graph, cluster_dict, 'cluster')
nx.write_gexf(orig_graph, "capstone.gexf")
graph_results_lp(orig_graph, orig_stops, 'Original Plan', center)

### K-Means

In [None]:
k_means_weight = 'estimate'
k_means_stops, labels = weighted_kmeans(full_data, k, k_means_weight)
cluster_dict = {k:v for k, v in zip(full_data['node id'],labels)}
k_means_graph = stl.copy()
nx.set_node_attributes(k_means_graph, cluster_dict, 'cluster')
nx.write_gexf(k_means_graph, "capstone.gexf")
graph_results_lp(k_means_graph, k_means_stops, 'K-Means', center)


### Linear Programming

In [None]:
lp_stops = linear_programming(full_data)

In [None]:
labels = graph_from_lp(full_data, lp_stops)
cluster_dict = {k:v for k, v in zip(full_data['node id'],labels)}
lp_graph = stl.copy()
nx.set_node_attributes(lp_graph, cluster_dict, 'cluster')
nx.write_gexf(lp_graph, "capstone.gexf")
graph_results_lp(lp_graph, lp_stops, 'Linear Programming', center)


### Modularity Maximization

In [None]:
mod_max = mod_max_weighted(stl,k)
nx.write_gexf(mod_max, "capstone.gexf")
mm_stops = graph_results(mod_max, 'Modularity Maximization', center)


# Evaluating Methods Against One Another

In [None]:
total_orig_dist = dist_to_nearest_stop_eval(full_data, orig_stops)
orig_unweighted_eval = np.mean(total_orig_dist)
orig_race_eval = np.dot(full_data['race weight'], total_orig_dist)
orig_income_eval = np.dot(full_data['income weight'], total_orig_dist)
orig_transit_eval = np.dot(full_data['transit weight'], total_orig_dist)
print(f'Original plan unweighted eval: {orig_unweighted_eval}')
print(f'Original plan race eval: {orig_race_eval}')
print(f'Original plan income eval: {orig_income_eval}')
print(f'Original plan transit eval: {orig_transit_eval}\n')


total_lp_dist = dist_to_nearest_stop_eval(full_data, lp_stops)
lp_unweighted_eval = np.mean(total_lp_dist)
lp_race_eval = np.dot(full_data['race weight'], total_lp_dist)
lp_income_eval = np.dot(full_data['income weight'], total_lp_dist)
lp_transit_eval = np.dot(full_data['transit weight'], total_lp_dist)
print(f'LP unweighted eval: {lp_unweighted_eval}')
print(f'LP race eval: {lp_race_eval}')
print(f'LP income eval: {lp_income_eval}')
print(f'LP transit eval: {lp_transit_eval}\n')


total_km_dist = dist_to_nearest_stop_eval(full_data, k_means_stops)
km_unweighted_eval = np.mean(total_km_dist)
km_race_eval = np.dot(full_data['race weight'], total_km_dist)
km_income_eval = np.dot(full_data['income weight'], total_km_dist)
km_transit_eval = np.dot(full_data['transit weight'], total_km_dist)
print(f'K-Means unweighted eval: {km_unweighted_eval}')
print(f'K-Means race eval: {km_race_eval}')
print(f'K-Means income eval: {km_income_eval}')
print(f'K-Means transit eval: {km_transit_eval}\n')

total_mm_dist = dist_to_nearest_stop_eval(full_data, mm_stops)
mm_unweighted_eval = np.mean(total_mm_dist)
mm_race_eval = np.dot(full_data['race weight'], total_mm_dist)
mm_income_eval = np.dot(full_data['income weight'], total_mm_dist)
mm_transit_eval = np.dot(full_data['transit weight'], total_mm_dist)
print(f'Mod Max unweighted eval: {mm_unweighted_eval}')
print(f'Mod Max race eval: {mm_race_eval}')
print(f'Mod Max income eval: {mm_income_eval}')
print(f'Mod Max transit eval: {mm_transit_eval}\n')

## Save plans as point shapefiles


In [None]:
arr2SHP(orig_stops, "plans\OrigStops.shp")
arr2SHP(k_means_stops, "plans\KMeansStops.shp")
arr2SHP(lp_stops, "plans\LPStops.shp")
arr2SHP(mm_stops, "plans\MMStops.shp")