# Imports

In [None]:
import requests
import json
import folium
import numpy as np
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.cluster.hierarchy import linkage, cut_tree
import matplotlib.pyplot as plt
import pandas as pd
import overpy
import os
import alphashape
import shapely
from matplotlib.patches import Polygon

# 1. Fetch Data

## 1.1.Data preprocessing

Using [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API) to query locations that fulfill the given conditions. In this case, all [amenities](https://wiki.openstreetmap.org/wiki/Key:amenity), but all bus stations could be selected

In [None]:
api = overpy.Overpass()

location = "Granada"
road_distance = "2000"  # (m)

query = '''
[out:json];
area[name='''+ location + '''][admin_level=8]->.where;
node(area.where)[highway=bus_stop];
out body;
>;
out skel qt;'''


# Selecting ALL amenities
query = '''
[out:json];
area[name='''+ location + '''][admin_level=8]->.granada;
(
  node(area.granada)[amenity](37.120, -3.650, 37.300, -3.570);
);
out body;
>;
out skel qt;
'''

response = api.query(query)

In [None]:
# Preparing the dataframe [id,latitude,longitude]
df = pd.DataFrame(
    columns=["id","lat","lon"])

for node in response.get_nodes():
    # Adding all the position information of nodes
    new_row = pd.DataFrame(
        {"id": node.id,
         "lat": node.lat,
         "lon": node.lon},
         index=[0])

    df = pd.concat([df,new_row],axis=0)

# Formatted information into a DataFrame, only for convenience
df.reset_index(inplace=True, drop=True)
print(df)

In [None]:
plt.scatter(x=df["lon"],y=df["lat"], s=0.5)
plt.xlabel("lon")
plt.ylabel("lat")
print("Number of detected possible places:", len(df.index))

## 1.2.Algorithm

Preprocessing: We will be using a clustering algorithm to hierarchically organize the bus stations. So the QUBO can be implemented layer by layer in the same hierarchical approach. This way, we exploit the fractal network that bus stops are

## 1.1.Data preprocessing

We create a class with all desirrd functionalities implemented and some functions that return distance matrices, etc.

In [None]:
from deliverable.main.tree.linkageTree import linkageCut

linkage_matrix = linkageCut(df)

The first step is to apply a hierarchical cluster (Ward distances) and classify all fetched possible 
locations into all clusters. W want a recursively defined structure: N 0-level districts, N 1-level sub-districts
per district. The function top_down_view achieves that exact classification.

In [None]:
n_clusters = 6  
levels = 2

# We hierarchically distribute all points
top_down = linkage_matrix.top_down_view_recur(n_clusters, levels)
X = linkage_matrix.data

fig, ax = plt.subplots()
fig.set_size_inches((5,5))

agg_labels = top_down[:,0] # level 0
ax.scatter(X.T[0], X.T[1], c=agg_labels)
ax.set_title(f'Ward clustering, level 0: {n_clusters} clusters ')
plt.show()

We can also see how each cluster in the previous figure is subsequently divided in N clusters. The tiny variations in hue indicate the different level-1 clusters

In [None]:
agg_labels = top_down[:,1]

fig, ax = plt.subplots()
ax.scatter(X.T[0], X.T[1], c=agg_labels)
ax.set_title(f'Ward clustering, level 1: {n_clusters*n_clusters} clusters ')
plt.show()

As our location for the bus trajectory, we will use the actual amenity closer to the centroid of 
each cluster centroid (per level as well). The result is presented in the next figure with red lines that
represent approximately the limits for level-0 clusters. The colors indicate level-1 clusters and the crosses the 
corresponding centroid locations.

In [None]:
# fancy scatter
fig, ax = plt.subplots()
fig.set_size_inches((5,5))
X = np.array(linkage_matrix.data_lon_lat, dtype=np.float64)

ax.scatter(X[:,0], X[:,1], c=top_down[:,1]%10, cmap='Accent')
alpha_list = np.ones(n_clusters)*100

for i in range(1,n_clusters+1):
    cluster = X[top_down[:,0] == i]
    alpha = alpha_list[i-1]
    hull = alphashape.alphashape(cluster, alpha)
    if type(hull)== shapely.geometry.multipolygon.MultiPolygon:
        areas = [ geom.area for geom in hull.geoms]
        # Select the component with larger area
        big = np.argmax(areas)
        hull_pts = hull.geoms[big].exterior.coords.xy
    else:
        hull_pts = hull.exterior.coords.xy
    poly_patch = Polygon(np.array(hull_pts).T, facecolor='none', edgecolor='red')
    ax.add_patch(poly_patch)

centers =  linkage_matrix.give_centers_level(0)
plt.scatter(*centers.T, marker='X', edgecolors='black', color='red', linewidth=0.5, label='level 0')
centers =  linkage_matrix.give_centers_level(1)
plt.scatter(*centers.T, marker='X', edgecolors='black', linewidth=0.5, label='level 1')    
fig.suptitle('Hierarchical division')

plt.xlabel(r'lon (deg)')
plt.ylabel(r'lat (deg)')
plt.legend()
plt.show()


## 1.3. Locations in the map

Let's visualize the bus stops in their real geographical location. To do that, we use folium.

In [None]:
def map_show_array(data, labels, loc_coords, color, map=None):

  # Create a map centered on loc_coords [latitude, longitude]
  if map == None:
      map = folium.Map(location=loc_coords, zoom_start=12)

  # Loop through the data and add markers for each location
  for i in range(len(data)):
      folium.Marker([data[i][1], data[i][0]],
                    popup=labels[i], icon=folium.Icon(color=color) ).add_to(map)
  return map

centers_0 =  linkage_matrix.give_centers_level(0)
means = centers_0.mean(axis=0)
means_lat_lon = [means[1], means[0]]
labels_0 = range(1, len(centers_0) + 1)
map = map_show_array(centers_0, labels_0, means_lat_lon, 'red')

centers_1 =  linkage_matrix.give_centers_level(1)
means = centers_1.mean(axis=0)
means_lat_lon = [means[1], means[0]]
labels = [ int(str(i) + str(j)) for i in labels_0 for j in labels_0] 
print(labels)
map_show_array(centers_1, labels, means_lat_lon, 'blue', map)



# 2. Solve with QUBOSolver

In [None]:
#HERE EXECUTE QUBOSOLVER

## 2.1 Visualize QUBOSolver

In [None]:
def sample_data(df, labels, n_sampled_stops, seed = 140421):
  np.random.seed(seed)
  # We extract n samples from each cluster
  unique_labels = np.unique(labels)
  # first position is reserved for nonassigned data
  n_cluster = np.zeros(len(unique_labels))
  indices = np.indices(labels.shape)[0]
  cluster_indices = []
  max_cluster_size = int(n_sampled_stops/len(unique_labels))

  sample_indx = []

  for i in range(len(unique_labels)):
    filter_labels = labels == unique_labels[i]
    cluster_indices = indices[filter_labels]
    n_cluster[i] = len(cluster_indices)

    # Now we sample max_cluster_size
    extract_size = np.amin([max_cluster_size, n_cluster[i]]).astype(int)
    sample_indx.extend(np.random.choice(cluster_indices, size=extract_size, replace=False))
  final_data = pd.DataFrame(df.to_numpy()[sample_indx], columns=['id', 'lat', 'lon', 'index'])
  return final_data

# Since calculating driving distances is a costly process, we sample the data to obtain 
# something manageable
labels = top_down[:,0]
df['index'] = range(1, len(df) + 1)
n_sampled_stops = 100
final_data = sample_data(df, labels, n_sampled_stops)
print(final_data)

In [None]:
def draw_line(df, line, color, map=None):
    means = df.mean()
    loc_coords = [means.iloc[1], means.iloc[2]]
    if map == None:
        map = folium.Map(location=loc_coords, zoom_start=8)
    # Get all connected positions from line adj matrix
    nonzero = np.nonzero(line)
    for i in range(len(nonzero[0])):
        indx1 = nonzero[0][i]
        indx2 = nonzero[1][i]

        pos_1 = df.iloc[indx1][['lat','lon']].values
        pos_2 = df.iloc[indx2][['lat','lon']].values
        folium.Marker(pos_1).add_to(map)
        folium.Marker(pos_2).add_to(map)
        colorline = folium.features.PolyLine([pos_1,pos_2], color=color)
        colorline.add_to(map)
    return map
line = np.genfromtxt(os.path.join("results", 'Line0.dat'))
map = draw_line(final_data, line, 'red')
line = np.genfromtxt(os.path.join("results", 'Line1.dat'))
map = draw_line(final_data, line, 'blue', map)
line = np.genfromtxt(os.path.join("results", 'Line2.dat'))
map = draw_line(final_data, line, 'green', map)
map