In [1]:
from graphframes import GraphFrame
from pixiedust.display import *
import pyspark.sql.functions as SF 

Pixiedust database opened successfully


In [2]:
nodes = spark.read.csv('/home/maria/Documents/grafos/transport-nodes.csv', header=True)
rels = spark.read.csv('/home/maria/Documents/grafos/transport-relationships.csv', header=True)


In [3]:
nodes=nodes.withColumn("population", nodes["population"].cast("double"))

In [4]:
rels=rels.withColumn("cost", rels["cost"].cast("Int"))

In [5]:
rels.show(2,False)

+---------+--------+------------+----+
|src      |dst     |relationship|cost|
+---------+--------+------------+----+
|Amsterdam|Utrecht |EROAD       |46  |
|Amsterdam|Den Haag|EROAD       |59  |
+---------+--------+------------+----+
only showing top 2 rows



In [5]:
g = GraphFrame(nodes, rels)

In [7]:
g.vertices.show()
g.edges.show()
g.degrees.show()


+----------------+---------+---------+----------+
|              id| latitude|longitude|population|
+----------------+---------+---------+----------+
|       Amsterdam|52.379189| 4.899431|  821752.0|
|         Utrecht|52.092876| 5.104480|  334176.0|
|        Den Haag|52.078663| 4.288788|  514861.0|
|       Immingham| 53.61239| -0.22219|    9642.0|
|       Doncaster| 53.52285| -1.13116|  302400.0|
|Hoek van Holland|  51.9775|  4.13333|    9382.0|
|      Felixstowe| 51.96375|   1.3511|   23689.0|
|         Ipswich| 52.05917|  1.15545|  133384.0|
|      Colchester| 51.88921|  0.90421|  104390.0|
|          London|51.509865|-0.118092| 8787892.0|
|       Rotterdam|  51.9225|  4.47917|  623652.0|
|           Gouda| 52.01667|  4.70833|   70939.0|
+----------------+---------+---------+----------+

+----------------+----------------+------------+----+
|             src|             dst|relationship|cost|
+----------------+----------------+------------+----+
|       Amsterdam|         Utrecht|  

In [55]:
display(g)

In [11]:
Degree=g.degrees
InDegree=g.inDegrees
OutDegree=g.outDegrees

In [12]:
CentralityDegree=Degree.join(InDegree,["id"],"left").join(OutDegree,["id"],"left").fillna(0)

In [16]:
CentralityDegree.orderBy(SF.desc("inDegree")).show(10,False)

+----------+------+--------+---------+
|id        |degree|inDegree|outDegree|
+----------+------+--------+---------+
|Rotterdam |3     |3       |0        |
|Utrecht   |2     |2       |0        |
|Den Haag  |4     |2       |2        |
|Felixstowe|2     |1       |1        |
|Doncaster |2     |1       |1        |
|London    |2     |1       |1        |
|Gouda     |3     |1       |2        |
|Ipswich   |2     |1       |1        |
|Immingham |2     |1       |1        |
|Colchester|2     |1       |1        |
+----------+------+--------+---------+
only showing top 10 rows



In [30]:
import networkx as nw
import pandas as pd
import pprint as pp


In [42]:
df=rels.withColumnRenamed("src","source").withColumnRenamed("dst","target").toPandas()
df1 = nw.Graph(df)

In [54]:
HarmonicCentrality=nw.harmonic_centrality(df1)
pp.pprint(HarmonicCentrality)

{'Amsterdam': 6.166666666666666,
 'Colchester': 4.816666666666667,
 'Den Haag': 6.666666666666666,
 'Doncaster': 5.0,
 'Felixstowe': 5.25,
 'Gouda': 5.566666666666666,
 'Hoek van Holland': 5.999999999999999,
 'Immingham': 5.333333333333333,
 'Ipswich': 4.95,
 'London': 4.816666666666667,
 'Rotterdam': 5.866666666666666,
 'Utrecht': 5.066666666666667}


In [53]:
WassermanCentrality=nw.closeness_centrality(df1)
pp.pprint(WassermanCentrality)

{'Amsterdam': 0.44,
 'Colchester': 0.3235294117647059,
 'Den Haag': 0.4583333333333333,
 'Doncaster': 0.3548387096774194,
 'Felixstowe': 0.3793103448275862,
 'Gouda': 0.3548387096774194,
 'Hoek van Holland': 0.4230769230769231,
 'Immingham': 0.39285714285714285,
 'Ipswich': 0.34375,
 'London': 0.3235294117647059,
 'Rotterdam': 0.39285714285714285,
 'Utrecht': 0.34375}


In [52]:
BetweennessCentrality=nw.betweenness_centrality(df1)
pp.pprint(BetweennessCentrality)

{'Amsterdam': 0.3090909090909091,
 'Colchester': 0.11818181818181818,
 'Den Haag': 0.28181818181818186,
 'Doncaster': 0.17272727272727273,
 'Felixstowe': 0.20909090909090908,
 'Gouda': 0.06363636363636364,
 'Hoek van Holland': 0.2636363636363636,
 'Immingham': 0.22727272727272727,
 'Ipswich': 0.15454545454545454,
 'London': 0.12727272727272726,
 'Rotterdam': 0.05454545454545454,
 'Utrecht': 0.03636363636363636}
