In [1]:
# Run this if something is not working
#!pip install folium
#!pip install ipyleaflet
#!pip install geojson
# in terminal: jupyter nbextension enable --py --sys-prefix ipyleaflet

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql import Row, DataFrame
from pyspark.sql.functions import *

In [3]:
spark = SparkSession.builder \
    .master('local[*]') \
    .appName('Traffic Graph') \
    .getOrCreate()

In [4]:
spark.version

'2.3.0'

In [5]:
metadata_schema = StructType() \
        .add('Y', DoubleType(), False) \
        .add('X', DoubleType(), False) \
        .add('DetectorId', ShortType(), False) \
        .add('McsDetecto', ShortType(), False) \
        .add('McsDsRefer', StringType(), False) \
        .add('LaneId', ShortType(), False) \
        .add('Bearing', ShortType(), True) \
        .add('Location', StringType(), True) \
        .add('RegionId', ShortType(), False) \
        .add('Entreprene', StringType(), True) \
        .add('StationId', ShortType(), False) \
        .add('SiteId', ShortType(), False) \
        .add('SiteValidF', TimestampType(), False) \
        .add('SiteValidT', TimestampType(), False) \
        .add('DetectorVa', TimestampType(), False) \
        .add('Detector_1', TimestampType(), False)

In [6]:
df_metadata_raw = spark.read.csv(
    '../../data/StockholmDetectorMetadata.csv', 
    sep=';', 
    schema=metadata_schema,
    ignoreLeadingWhiteSpace=True,
    ignoreTrailingWhiteSpace=True,
    header=True,
    timestampFormat='yyyy/MM/dd HH:mm:ss.SSS'
)

### Remove invalid sensors

In [7]:
print("Before removal:", df_metadata_raw.count(), "should be 2059")
df_metadata_raw = df_metadata_raw.where(col('Detector_1').like('%9999%'))
print("After removal:", df_metadata_raw.count(), "should be 2037")

Before removal: 2059 should be 2059
After removal: 2037 should be 2037


In [8]:
@udf(StringType())
def generate_node_id(reference):
    return reference.replace(' ', '-').replace(',', '')
    
df_metadata = df_metadata_raw .withColumn('node', generate_node_id('McsDsRefer'))

## Extract Nodes

In [9]:
# Some senor locations have a laneId that does not start from 1
nodes = df_metadata.groupBy("node").agg(min('LaneId').alias('MinLaneId')).alias("m").join(
    df_metadata.alias("n"),
    (col('m.MinLaneId') == col('n.LaneId')) & (col('m.node') == col('n.node')),
    "inner"
).select(
    col("n.node").alias("node"),
    "X", 
    "Y"
)

In [10]:
nodes.count()

859

In [11]:
#highlight nodes
#highlight = ['E4N-23570']
#highlight_nodes = nodes.where(col("node").isin(highlight))
#nodes = nodes.filter(col("node").isin(highlight) == False)

## Plot sensor with popup

In [69]:
import folium
import os

In [70]:
m = folium.Map([59.304591, 17.703240], zoom_start=10, )#tiles='Stamen Terrain',)

In [71]:
for data in nodes.toLocalIterator():
    folium.CircleMarker(
        location=[data['Y'], data['X']],
        color='#e74c3c',
        fill_color='#e74c3c',
        fill_opacity=1,
        fill=True,
        radius=2,
        popup=folium.Popup(data['node'])
    ).add_to(m)

In [72]:
m

In [73]:
path='sensor_map.html'
m.save(path)