In [1]:
# Run this if something is not working
#!pip install folium
#!pip install ipyleaflet
#!pip install geojson
# in terminal: jupyter nbextension enable --py --sys-prefix ipyleaflet

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql import Row, DataFrame
from pyspark.sql.functions import *

In [3]:
spark = SparkSession.builder \
    .master('local[*]') \
    .appName('Traffic Graph') \
    .getOrCreate()

In [4]:
spark.version

'2.3.0'

## Parameters

In [5]:
base_weight = 2
forward = 3
backward = 10

## Import Data

In [6]:
partition_schema = StructType() \
    .add('node', StringType(), False) \
    .add('partition', IntegerType(), False)\
    .add('type', StringType(), False)

In [7]:
df_overlapping_partitions_raw = spark.read.csv(
    '../../data/partitions/overlapping_partitions-base_weight_' + str(base_weight) + \
    "_min-forward_" +str(forward) + "-backward_" + str(backward) + ".csv",
    sep=';', 
    schema=partition_schema,
    ignoreLeadingWhiteSpace=True,
    ignoreTrailingWhiteSpace=True,
    header=True,
    timestampFormat='yyyy/MM/dd HH:mm:ss.SSS'
)

In [8]:
nodes = spark.read.parquet('../../data/nodes-parquet/*')
nodes.count()

857

In [9]:
edges = spark.read.parquet('../../data/edges_with_weight-parquet/*')
edges.count()

868

## Assign nodes to partitions

In [10]:
backward_partitions = df_overlapping_partitions_raw.where(col('type') == 'backward').alias('p').join(
    nodes.alias('n'),
    col('p.node') == col('n.node'),
    "inner"
).select(col('n.node').alias("node"), col('partition'), col('X'), col('Y')).sort(col("partition"))

print(df_overlapping_partitions_raw.where(col('type') == 'backward').count(), backward_partitions.count())

13752 13752


In [11]:
critical_partitions = df_overlapping_partitions_raw.where(col('type') == 'critical').alias('p').join(
    nodes.alias('n'),
    col('p.node') == col('n.node'),
    "inner"
).select(col('n.node').alias("node"), col('partition'), col('X'), col('Y')).sort(col("partition"))

print(df_overlapping_partitions_raw.where(col('type') == 'critical').count(), critical_partitions.count())

857 857


In [12]:
critical_partitions.agg(max('partition')).collect()

[Row(max(partition)=61)]

In [13]:
forward_partitions = df_overlapping_partitions_raw.where(col('type') == 'forward').alias('p').join(
    nodes.alias('n'),
    col('p.node') == col('n.node'),
    "inner"
).select(col('n.node').alias("node"), col('partition'), col('X'), col('Y')).sort(col("partition"))

print(df_overlapping_partitions_raw.where(col('type') == 'forward').count(), forward_partitions.count())

2063 2063


### Check for missing nodes

In [14]:
missing_nodes = nodes.alias('n').join(
    critical_partitions.alias('p'),
    col('n.node') == col('p.node'),
    "leftouter"
).where(col('p.node').isNull()).select(col('n.node').alias('node'))
missing_nodes.count()

0

In [15]:
missing_nodes.collect()

[]

## Plot base partitions

In [16]:
import folium

In [17]:
m = folium.Map([59.304591, 17.703240], zoom_start=10)
colors = ['#2ecc71', '#3498db', '#9b59b6', '#f1c40f', '#e67e22',
         '#e74c3c', '#34495e', '#7f8c8d', '#ff9ff3', '#5f27cd',
         '#b33771', '#b33771', '#2d3436', '#6c5ce7', '#e17055']

In [18]:
for data in critical_partitions.toLocalIterator():
    folium.CircleMarker(
        location=[data['Y'], data['X']],
        color=colors[data['partition'] % len(colors)],
        fill_color=colors[data['partition'] % len(colors)],
        fill_opacity=1,
        fill=True,
        radius=5,
        popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
    ).add_to(m)

In [19]:
m

## Plot Overlapping partition

In [51]:
partition_id = 32

In [52]:
m_o = folium.Map([59.304591, 17.703240], zoom_start=10)

backward_color = '#2ecc71' # green
critical_color = '#e74c3c'# red
forward_color = '#3498db' #blue

In [53]:
for data in backward_partitions.where(col('partition') == partition_id).toLocalIterator():
    folium.CircleMarker(
        location=[data['Y'], data['X']],
        color=backward_color,
        fill_color=backward_color,
        fill_opacity=1,
        fill=True,
        radius=5,
        popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
    ).add_to(m_o)

In [54]:
for data in critical_partitions.where(col('partition') == partition_id).toLocalIterator():
    folium.CircleMarker(
        location=[data['Y'], data['X']],
        color=critical_color,
        fill_color=critical_color,
        fill_opacity=1,
        fill=True,
        radius=5,
        popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
    ).add_to(m_o)

In [55]:
for data in forward_partitions.where(col('partition') == partition_id).toLocalIterator():
    folium.CircleMarker(
        location=[data['Y'], data['X']],
        color=forward_color,
        fill_color=forward_color,
        fill_opacity=1,
        fill=True,
        radius=5,
        popup=folium.Popup(data['node'] + " is in partition: " +str(data['partition']))
    ).add_to(m_o)

In [56]:
m_o

In [57]:
m_o.save("overlapping_map.html")