# Edge Dataset

This notebook will convert the `Graph*.edges` files into `.csv` files.

In [None]:
import pandas as pd
import numpy as np
import edge_dataset_utils
import os

Define necessary directory paths.

In [None]:
GRAPHS_DIR = '../2_graphgen/Graphs'
EDGES_DIR = '../3_edgegen/'

#### Convert these edges files to CSVs

Open edge file.

In [None]:
df_edges = pd.read_csv(EDGES_DIR + 'Graph0.edges', delimiter=' ', skiprows=[0], names=['Node1_ID', 'Node2_ID', 'DISTANCE_KM'])
df_edges.head()

Open Graph file to get coordinates.

In [None]:
df_graph = pd.read_csv(GRAPHS_DIR + '/Graph0.tsp', delimiter=' ', 
                       skiprows=[i for i in range(6)], 
                       names=['NODE_ID', 'LATITUDE', 'LONGITUDE'])
df_graph.head()

Join coordinates on the node IDs

In [None]:
df_edges = df_edges.join(df_graph, how='left', on='Node1_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_1', 
                         'LATITUDE': 'LATITUDE_NODE_1'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Repeat for the other node in each edge.

In [None]:
df_edges = df_edges.join(df_graph, how='left', on='Node2_ID') \
        .rename(columns={'LONGITUDE': 'LONGITUDE_NODE_2', 
                         'LATITUDE': 'LATITUDE_NODE_2'})      \
        .drop('NODE_ID', axis=1)
df_edges.head()

Combine columns to make tuples that will be be our edges.

In [None]:
df_edges['EDGE(Node1_ID, Node2_ID)'] = list(zip(df_edges['Node1_ID'], df_edges['Node2_ID']))
df_edges = df_edges.drop(['Node1_ID', 'Node2_ID'], axis=1)
df_edges.head()

Convert Longitude and Latitiude columns to tuples.

In [None]:
df_edges['NODE1_COORDS'] = list(zip(df_edges['LATITUDE_NODE_1'], df_edges['LONGITUDE_NODE_1']))
df_edges = df_edges.drop(['LONGITUDE_NODE_1', 'LATITUDE_NODE_1'], axis=1)
df_edges.head()

Repeat for Node2's coordinates.

In [None]:
df_edges['NODE2_COORDS'] = list(zip(df_edges['LATITUDE_NODE_2'], df_edges['LONGITUDE_NODE_2']))
df_edges = df_edges.drop(['LONGITUDE_NODE_2', 'LATITUDE_NODE_2'], axis=1)
df_edges.head()

Compute the unrounded distance between the nodes.

In [None]:
from scipy.spatial import distance
from geopy.distance import geodesic

df_edges['GEODESIC_DISTANCE_KM'] = pd.Series(list(zip(df_edges['NODE1_COORDS'], df_edges['NODE2_COORDS']))).apply(lambda x: geodesic(x[0], x[1]).kilometers) # Geodesic distance between coordinates
df_edges['GEODESIC_ROUNDED_DISTANCE_KM'] = df_edges['GEODESIC_DISTANCE_KM'].apply(np.round) # Geodesic rounded distance between Coordinates

df_edges.head()

Clean up any floating point erros.

In [None]:
df_edges['NODE1_COORDS'] = df_edges['NODE1_COORDS'].apply(lambda x: tuple(np.round(x, decimals=6)))
df_edges['NODE2_COORDS'] = df_edges['NODE2_COORDS'].apply(lambda x: tuple(np.round(x, decimals=6)))
df_edges.head()

Save as `.csv` file.

In [None]:
df_edges.to_csv('./Graph0.csv', index=False, float_format='%.6f')

---
Apply this procedure to all the edge files.

In [None]:
edge_dataset_utils.edges2csv(GRAPHS_DIR, EDGES_DIR)