# Edge Dataset

This notebook will convert the `Graph*.edges` files into `.csv` files.

In [1]:
import pandas as pd
import numpy as np
import edge_dataset_utils
import os

Define necessary directory paths.

In [2]:
GRAPHS_DIR = '../2_graphgen/Graphs'
EDGES_DIR = '../3_edgegen/'

#### Convert these edges files to CSVs

Open edge file.

In [3]:
df_edges = pd.read_csv(EDGES_DIR + 'Graph0.edges', delimiter=' ', skiprows=[0], names=['Node1_ID', 'Node2_ID', 'EUCLIDEAN_ROUNDED_DISTANCE'])
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE
0,0,1,2
1,0,2,1
2,0,3,2
3,0,4,5
4,0,5,1


Open Graph file to get coordinates.

In [4]:
df_graph = pd.read_csv(GRAPHS_DIR + '/Graph0.tsp', delimiter=' ', 
                       skiprows=[i for i in range(6)], 
                       names=['NODE_ID', 'LONGITUDE', 'LATITUDE'])
df_graph.head()

Unnamed: 0,NODE_ID,LONGITUDE,LATITUDE
0,0,-0.591962,52.293442
1,1,-2.275903,53.441334
2,2,0.202122,51.089753
3,3,-2.159202,51.792456
4,4,-5.071282,50.156301


Join coordinates on the node IDs

In [5]:
df_edges = df_edges.join(df_graph, how='left', on='Node1_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_1', 
                         'LATITUDE': 'LATITUDE_NODE_1'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1
0,0,1,2,-0.591962,52.293442
1,0,2,1,-0.591962,52.293442
2,0,3,2,-0.591962,52.293442
3,0,4,5,-0.591962,52.293442
4,0,5,1,-0.591962,52.293442


Repeat for the other node in each edge.

In [6]:
df_edges = df_edges.join(df_graph, how='left', on='Node2_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_2', 
                         'LATITUDE': 'LATITUDE_NODE_2'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1,LONGITUDE_NODE_2,LATITUDE_NODE_2
0,0,1,2,-0.591962,52.293442,-2.275903,53.441334
1,0,2,1,-0.591962,52.293442,0.202122,51.089753
2,0,3,2,-0.591962,52.293442,-2.159202,51.792456
3,0,4,5,-0.591962,52.293442,-5.071282,50.156301
4,0,5,1,-0.591962,52.293442,-0.332657,53.742919


Combine columns to make tuples that will be be our edges.

In [7]:
df_edges['EDGE(Node1_ID, Node2_ID)'] = list(zip(df_edges['Node1_ID'], df_edges['Node2_ID']))
df_edges = df_edges.drop(['Node1_ID', 'Node2_ID'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1,LONGITUDE_NODE_2,LATITUDE_NODE_2,"EDGE(Node1_ID, Node2_ID)"
0,2,-0.591962,52.293442,-2.275903,53.441334,"(0, 1)"
1,1,-0.591962,52.293442,0.202122,51.089753,"(0, 2)"
2,2,-0.591962,52.293442,-2.159202,51.792456,"(0, 3)"
3,5,-0.591962,52.293442,-5.071282,50.156301,"(0, 4)"
4,1,-0.591962,52.293442,-0.332657,53.742919,"(0, 5)"


Convert Longitude and Latitiude columns to tuples.

In [8]:
df_edges['NODE1_COORDS'] = list(zip(df_edges['LONGITUDE_NODE_1'], df_edges['LATITUDE_NODE_1']))
df_edges = df_edges.drop(['LONGITUDE_NODE_1', 'LATITUDE_NODE_1'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_2,LATITUDE_NODE_2,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS
0,2,-2.275903,53.441334,"(0, 1)","(-0.591962, 52.293442)"
1,1,0.202122,51.089753,"(0, 2)","(-0.591962, 52.293442)"
2,2,-2.159202,51.792456,"(0, 3)","(-0.591962, 52.293442)"
3,5,-5.071282,50.156301,"(0, 4)","(-0.591962, 52.293442)"
4,1,-0.332657,53.742919,"(0, 5)","(-0.591962, 52.293442)"


Repeat for Node2's coordinates.

In [9]:
df_edges['NODE2_COORDS'] = list(zip(df_edges['LONGITUDE_NODE_2'], df_edges['LATITUDE_NODE_2']))
df_edges = df_edges.drop(['LONGITUDE_NODE_2', 'LATITUDE_NODE_2'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS,NODE2_COORDS
0,2,"(0, 1)","(-0.591962, 52.293442)","(-2.275903, 53.441334)"
1,1,"(0, 2)","(-0.591962, 52.293442)","(0.202122, 51.089753)"
2,2,"(0, 3)","(-0.591962, 52.293442)","(-2.159202, 51.792456)"
3,5,"(0, 4)","(-0.591962, 52.293442)","(-5.071282, 50.156301)"
4,1,"(0, 5)","(-0.591962, 52.293442)","(-0.33265700000000004, 53.74291899999999)"


Compute the unrounded distance between the nodes.

In [10]:
from scipy.spatial import distance
from geopy.distance import geodesic
# df_edges['EUCLIDEAN_DISTANCE'] = pd.Series(list(zip(df_edges['NODE1_COORDS'], df_edges['NODE2_COORDS']))).apply(lambda x: distance.euclidean(x[0], x[1])) # Euclidean distance between Coordinates
df_edges['GEODESIC_DISTANCE_KM'] = pd.Series(list(zip(df_edges['NODE1_COORDS'], df_edges['NODE2_COORDS']))).apply(lambda x: geodesic(x[1], x[0]).kilometers) # Geodesic distance between coordinates

df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS,NODE2_COORDS,GEODESIC_DISTANCE_KM
0,2,"(0, 1)","(-0.591962, 52.293442)","(-2.275903, 53.441334)",225.805756
1,1,"(0, 2)","(-0.591962, 52.293442)","(0.202122, 51.089753)",160.19888
2,2,"(0, 3)","(-0.591962, 52.293442)","(-2.159202, 51.792456)",182.044779
3,5,"(0, 4)","(-0.591962, 52.293442)","(-5.071282, 50.156301)",549.333412
4,1,"(0, 5)","(-0.591962, 52.293442)","(-0.33265700000000004, 53.74291899999999)",163.877493


Clean up any floating point erros.

In [11]:
df_edges['NODE1_COORDS'] = df_edges['NODE1_COORDS'].apply(lambda x: tuple(np.round(x, decimals=6)))
df_edges['NODE2_COORDS'] = df_edges['NODE2_COORDS'].apply(lambda x: tuple(np.round(x, decimals=6)))
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS,NODE2_COORDS,GEODESIC_DISTANCE_KM
0,2,"(0, 1)","(-0.591962, 52.293442)","(-2.275903, 53.441334)",225.805756
1,1,"(0, 2)","(-0.591962, 52.293442)","(0.202122, 51.089753)",160.19888
2,2,"(0, 3)","(-0.591962, 52.293442)","(-2.159202, 51.792456)",182.044779
3,5,"(0, 4)","(-0.591962, 52.293442)","(-5.071282, 50.156301)",549.333412
4,1,"(0, 5)","(-0.591962, 52.293442)","(-0.332657, 53.742919)",163.877493


Save as `.csv` file.

In [12]:
df_edges.to_csv('./Graph0.csv', index=False, float_format='%.6f')

---
Apply this procedure to all the edge files.

In [13]:
edge_dataset_utils.edges2csv(GRAPHS_DIR, EDGES_DIR, metric='geodesic')

Completed Graph0.csv
Completed Graph1.csv
Completed Graph2.csv
Completed Graph3.csv
Completed Graph4.csv
Completed Graph5.csv
Completed Graph6.csv
Completed Graph7.csv
Completed Graph8.csv
Completed Graph9.csv
