In [1]:
# Run this if something is not working
#!pip install folium
#!pip install ipyleaflet
#!pip install geojson
# in terminal: jupyter nbextension enable --py --sys-prefix ipyleaflet

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql import Row, DataFrame
from pyspark.sql.functions import *

In [3]:
spark = SparkSession.builder \
    .master('local[*]') \
    .appName('Traffic Graph') \
    .getOrCreate()

In [4]:
spark.version

'2.3.0'

## Parameters

In [5]:
partition_min = 10

## Import Data

In [6]:
partition_schema = StructType() \
    .add('node', StringType(), False) \
    .add('partition', IntegerType(), False)\
    .add('group', StringType(), False)

In [7]:
df_forward_partitions_raw = spark.read.csv(
    '../../data/partitions/forward_partitions-'+ str(partition_min) + 'min.csv', 
    sep=';', 
    schema=partition_schema,
    ignoreLeadingWhiteSpace=True,
    ignoreTrailingWhiteSpace=True,
    header=True,
    timestampFormat='yyyy/MM/dd HH:mm:ss.SSS'
)

In [8]:
df_backward_partitions_raw = spark.read.csv(
    '../../data/partitions/backward_partitions-'+ str(partition_min) + 'min.csv', 
    sep=';', 
    schema=partition_schema,
    ignoreLeadingWhiteSpace=True,
    ignoreTrailingWhiteSpace=True,
    header=True,
    timestampFormat='yyyy/MM/dd HH:mm:ss.SSS'
)

In [9]:
nodes = spark.read.parquet('../../data/nodes-parquet/*')
nodes.count()

857

In [10]:
edges = spark.read.parquet('../../data/edges_with_weight-parquet/*')
edges.count()

868

## Assign nodes to partitions

In [46]:
forward_partitions = df_forward_partitions_raw.alias('p').join(
    nodes.alias('n'),
    col('p.node') == col('n.node'),
    "inner"
).select(col('n.node').alias("node"), col('partition'), col('group'), col('X'), col('Y')).sort(col("partition"))

print(df_forward_partitions_raw.count(), forward_partitions.count())

857 857


In [47]:
backward_partitions = df_backward_partitions_raw.alias('p').join(
    nodes.alias('n'),
    col('p.node') == col('n.node'),
    "inner"
).select(col('n.node').alias("node"), col('partition'), col('group'), col('X'), col('Y')).sort(col("partition"))

print(df_backward_partitions_raw.count(), forward_partitions.count())

857 857


In [32]:
forward_partitions = forward_partitions.orderBy('partition')
backward_partitions = backward_partitions.orderBy('partition')

In [33]:
forward_partitions.agg(max('partition')).collect()

[Row(max(partition)=14)]

In [34]:
backward_partitions.agg(max('partition')).collect()

[Row(max(partition)=12)]

### Check for missing nodes

In [35]:
missing_nodes = nodes.alias('n').join(
    forward_partitions.alias('p'),
    col('n.node') == col('p.node'),
    "leftouter"
).where(col('p.node').isNull()).select(col('n.node').alias('node'))
missing_nodes.count()

0

In [36]:
missing_nodes.collect()

[]

In [37]:
missing_nodes = nodes.alias('n').join(
    backward_partitions.alias('p'),
    col('n.node') == col('p.node'),
    "leftouter"
).where(col('p.node').isNull()).select(col('n.node').alias('node'))
missing_nodes.count()

0

In [38]:
missing_nodes.collect()

[]

### Filter partitions

In [39]:
#forward_partitions = forward_partitions.where(col('partition') == 3).alias('p').join(
#    edges.alias('e'),
#    col('e.src') == col('p.node'),
#    'inner'
#).alias('m').join(
#    partitions.where(col('partition') == 3).alias('p'),
#    col('m.dest') == col('p.node'),
#    'inner'
#).select("m.src", "m.dest", "m.weight")
#
#forward_partitions.sort('src').where(~col('dest').like('%E4_A%')).agg(sum('weight')).show()

In [40]:
# Filter partitions
@udf(BooleanType())
def check_if_in_long_path(node):
    long_path = set(['E20W-63160', 'E20W-63260', 'E20W-62390', 'E20W-62835', 'E20W-63715', 'E20W-63560', 'E20W-63045', 'E20W-62945', 'E20W-62635', 'E20W-63410', 'E20W-62060'])
    return node in long_path

#forward_partitions = forward_partitions.where(col('partition') !=  20)
#forward_partitions = forward_partitions.where(col('partition') !=  25)
#forward_partitions = forward_partitions.where(col('partition') !=  27)
#forward_partitions = forward_partitions.where(col('partition') !=  34)
#forward_partitions = forward_partitions.where(col('partition') !=  41)
#forward_partitions = forward_partitions.where(col('partition') !=  43)
#forward_partitions = forward_partitions.where(col('partition') !=  44)

backward_partitions = backward_partitions.where(col('partition') !=  11)
backward_partitions = backward_partitions.where(col('partition') !=  12)

#partitions = partitions.filter(check_if_in_long_path('node'))

## Plot sensor with popup

In [57]:
import folium
import os

In [58]:
m_f = folium.Map([59.304591, 17.703240], zoom_start=10)
m_b = folium.Map([59.304591, 17.703240], zoom_start=10)
start_colors = [
    '#38ff8a', '#5dbcfc', '#d379f7', '#ffe377', '#fcaf6c',
    '#f49389', '#5f87af', '#9fb3b5', '#fcd6f7', '#5f27cd',
    '#af6487', '#b2698b', '#535d60', '#a69ee2', '#dda394'
]
center_colors = [
    '#2ecc71', '#3498db', '#9b59b6', '#f1c40f', '#e67e22',
    '#e74c3c', '#34495e', '#7f8c8d', '#ff9ff3', '#5f27cd',
    '#b33771', '#b33771', '#2d3436', '#6c5ce7', '#e17055'
]
end_colors = [
    '#1c934e', '#1e587f', '#6d3f7f', '#b59d41', '#b76721',
    '#ba2d1d', '#17324c', '#3c4444', '#fc32e1', '#5114cc',
    '#aa195d', '#af0c58', '#1d3238', '#321cdb', '#d63d17'
]

In [59]:
for data in forward_partitions.toLocalIterator():
    if data['group'] == 'start':
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=start_colors[data['partition'] % len(colors)],
            fill_color=start_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_f)
    elif data['group'] == 'center':
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=center_colors[data['partition'] % len(colors)],
            fill_color=center_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_f)
    else: 
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=end_colors[data['partition'] % len(colors)],
            fill_color=end_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_f)

In [60]:
for data in backward_partitions.toLocalIterator():
    if data['group'] == 'start':
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=start_colors[data['partition'] % len(colors)],
            fill_color=start_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_b)
    elif data['group'] == 'center':
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=center_colors[data['partition'] % len(colors)],
            fill_color=center_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_b)
    else: 
        folium.CircleMarker(
            location=[data['Y'], data['X']],
            color=end_colors[data['partition'] % len(colors)],
            fill_color=end_colors[data['partition'] % len(colors)],
            fill_opacity=1,
            fill=True,
            radius=5,
            popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
        ).add_to(m_b)

In [61]:
m_f

In [62]:
m_f.save(str(partition_min) + 'min_partition.html')

In [63]:
m_b

In [64]:
m_b.save(str(partition_min) + 'min_partition.html')