# Edge Dataset

This notebook will convert the `Graph*.edges` files into `.csv` files.

In [1]:
import pandas as pd
import numpy as np

Define necessary directory paths.

In [2]:
GRAPHS_DIR = '../graphgen/Graphs'
EDGES_DIR = '../edgegen/'

#### Convert these edges files to CSVs

Open edge file.

In [3]:
df_edges = pd.read_csv(EDGES_DIR + 'Graph0.edges', delimiter=' ', skiprows=[0], names=['Node1_ID', 'Node2_ID', 'EUCLIDEAN_ROUNDED_DISTANCE'])
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE
0,0,1,2
1,0,2,0
2,0,3,3
3,0,4,5
4,0,5,4


Open Graph file to get coordinates.

In [4]:
df_graph = pd.read_csv(GRAPHS_DIR + '/Graph0.tsp', delimiter=' ', 
                       skiprows=[i for i in range(6)], 
                       names=['NODE_ID', 'LONGITUDE', 'LATITUDE'])
df_graph.head()

Unnamed: 0,NODE_ID,LONGITUDE,LATITUDE
0,0,-0.216122,51.499286
1,1,-1.761813,52.483136
2,2,-0.27598,51.941321
3,3,-2.246072,53.479275
4,4,-3.013794,56.186123


Join coordinates on the node IDs

In [5]:
df_edges = df_edges.join(df_graph, how='left', on='Node1_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_1', 
                         'LATITUDE': 'LATITUDE_NODE_1'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1
0,0,1,2,-0.216122,51.499286
1,0,2,0,-0.216122,51.499286
2,0,3,3,-0.216122,51.499286
3,0,4,5,-0.216122,51.499286
4,0,5,4,-0.216122,51.499286


Repeat for the other node in each edge.

In [6]:
df_edges = df_edges.join(df_graph, how='left', on='Node2_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_2', 
                         'LATITUDE': 'LATITUDE_NODE_2'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Unnamed: 0,Node1_ID,Node2_ID,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1,LONGITUDE_NODE_2,LATITUDE_NODE_2
0,0,1,2,-0.216122,51.499286,-1.761813,52.483136
1,0,2,0,-0.216122,51.499286,-0.27598,51.941321
2,0,3,3,-0.216122,51.499286,-2.246072,53.479275
3,0,4,5,-0.216122,51.499286,-3.013794,56.186123
4,0,5,4,-0.216122,51.499286,-3.606638,50.530432


Combine columns to make tuples that will be be our edges.

In [7]:
df_edges['EDGE(Node1_ID, Node2_ID)'] = list(zip(df_edges['Node1_ID'], df_edges['Node2_ID']))
df_edges = df_edges.drop(['Node1_ID', 'Node2_ID'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_1,LATITUDE_NODE_1,LONGITUDE_NODE_2,LATITUDE_NODE_2,"EDGE(Node1_ID, Node2_ID)"
0,2,-0.216122,51.499286,-1.761813,52.483136,"(0, 1)"
1,0,-0.216122,51.499286,-0.27598,51.941321,"(0, 2)"
2,3,-0.216122,51.499286,-2.246072,53.479275,"(0, 3)"
3,5,-0.216122,51.499286,-3.013794,56.186123,"(0, 4)"
4,4,-0.216122,51.499286,-3.606638,50.530432,"(0, 5)"


Convert Longitude and Latitiude columns to tuples.

In [8]:
df_edges['NODE1_COORDS'] = list(zip(df_edges['LONGITUDE_NODE_1'], df_edges['LATITUDE_NODE_1']))
df_edges = df_edges.drop(['LONGITUDE_NODE_1', 'LATITUDE_NODE_1'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,LONGITUDE_NODE_2,LATITUDE_NODE_2,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS
0,2,-1.761813,52.483136,"(0, 1)","(-0.21612199999999998, 51.499286)"
1,0,-0.27598,51.941321,"(0, 2)","(-0.21612199999999998, 51.499286)"
2,3,-2.246072,53.479275,"(0, 3)","(-0.21612199999999998, 51.499286)"
3,5,-3.013794,56.186123,"(0, 4)","(-0.21612199999999998, 51.499286)"
4,4,-3.606638,50.530432,"(0, 5)","(-0.21612199999999998, 51.499286)"


Repeat for Node2's coordinates.

In [9]:
df_edges['NODE2_COORDS'] = list(zip(df_edges['LONGITUDE_NODE_2'], df_edges['LATITUDE_NODE_2']))
df_edges = df_edges.drop(['LONGITUDE_NODE_2', 'LATITUDE_NODE_2'], axis=1)
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS,NODE2_COORDS
0,2,"(0, 1)","(-0.21612199999999998, 51.499286)","(-1.761813, 52.483136)"
1,0,"(0, 2)","(-0.21612199999999998, 51.499286)","(-0.27598, 51.941321)"
2,3,"(0, 3)","(-0.21612199999999998, 51.499286)","(-2.2460720000000003, 53.479275)"
3,5,"(0, 4)","(-0.21612199999999998, 51.499286)","(-3.013794, 56.186122999999995)"
4,4,"(0, 5)","(-0.21612199999999998, 51.499286)","(-3.606638, 50.530432)"


Compute the unrounded distance between the nodes.

In [10]:
from scipy.spatial import distance
df_edges['EUCLIDEAN_DISTANCE'] = pd.Series(list(zip(df_edges['NODE1_COORDS'], df_edges['NODE2_COORDS']))).apply(lambda x: distance.euclidean(x[0], x[1]))
df_edges.head()

Unnamed: 0,EUCLIDEAN_ROUNDED_DISTANCE,"EDGE(Node1_ID, Node2_ID)",NODE1_COORDS,NODE2_COORDS,EUCLIDEAN_DISTANCE
0,2,"(0, 1)","(-0.21612199999999998, 51.499286)","(-1.761813, 52.483136)",1.832245
1,0,"(0, 2)","(-0.21612199999999998, 51.499286)","(-0.27598, 51.941321)",0.446069
2,3,"(0, 3)","(-0.21612199999999998, 51.499286)","(-2.2460720000000003, 53.479275)",2.835675
3,5,"(0, 4)","(-0.21612199999999998, 51.499286)","(-3.013794, 56.186122999999995)",5.458334
4,4,"(0, 5)","(-0.21612199999999998, 51.499286)","(-3.606638, 50.530432)",3.526227


Save as `.csv` file.

In [11]:
df_edges.to_csv('./Graph0.csv', index=False, float_format='%.6f')

In [19]:
np.round((0.21612199999999998, 2), decimals=6)

array([0.216122, 2.      ])