In [1]:
import pandas as pd

In [2]:
import pandas as pd
import folium
from folium import FeatureGroup, LayerControl
from folium.plugins import TimestampedGeoJson
import json
import matplotlib.pyplot as plt

In [3]:
from datetime import datetime as dt
from datetime import timedelta

# Load data

Load test data; for now we are just loading the test data

In [4]:
# Path to fake data file
path_to_data = '/Users/oliver.klaus/Desktop/test_data.jsonl'

In [5]:
df = pd.read_json(path_to_data, lines=True)

# Preprocessing

Extract longitude and latitude; we need this for the visualisation of the map

In [6]:
def extract_lat_lng_from_row(row):
    latitude = row['coordinates']['lat']
    longitude = row['coordinates']['lng']
    return latitude, longitude

In [7]:
df['location_latitude'], df['location_longitude'] = zip(*df.apply(lambda x: extract_lat_lng_from_row(x), axis=1))

---

The date is to be replaced with the date that the API call was initiated

In [8]:
df['date'] = dt.today()

In [9]:
def hour_rounder(t):
    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
               +timedelta(hours=t.minute//30))

In [10]:
df['weekday'] = df['date'].apply(lambda x: x.weekday())
df['hour_of_day'] = df['date'].apply(lambda x: hour_rounder(x).hour)

This lookup identifies the usual popularity at the same day of the week and hour of the API call

In [11]:
def populartimes_lookup(row):
    week_day = row['weekday']
    hour_of_day = row['hour_of_day']
    
    return row['populartimes'][week_day]['data'][hour_of_day]

In [12]:
df['usual_popularity_at_day_hour'] = df.apply(lambda x: populartimes_lookup(x), axis=1)

---

Now we have the current popularity of the station and the usual popularity at that day and time. We want to visualise where stations are overpopulated in comparison to usual popularity. We will map them in three buckets with the following thresholds:
- Green: Current popularity is lower than 30% compared to usual popularity
- Yellow: Current popularity is between 30% and 60% of usual popularity
- Red: Current popularity is larger than 60% compared to usual popularity

In [13]:
thresholds = {
    'low_treshold': 0.3,
    'high_threshold': 0.6
}

In [14]:
def calculate_critical_level_from_thresholds(row):
    low_treshold = thresholds['low_treshold']
    high_threshold = thresholds['high_threshold']
    
    low_value = low_treshold * row['usual_popularity_at_day_hour']
    high_value = high_threshold * row['usual_popularity_at_day_hour']
    
    current_popularity = row['current_popularity']
    
    if low_value > current_popularity:
        return 'low'
    elif (low_value < current_popularity) & (high_value > current_popularity):
        return 'medium'
    elif high_value < current_popularity:
        return 'high'

In [15]:
df['critical_level'] = df.apply(lambda x: calculate_critical_level_from_thresholds(x), axis=1)

In [16]:
critical_mapping = {
    'high': 'red', 
    'medium': 'yellow', 
    'low': 'green'
}

---

# Visualisation

Add a circle to the visualisation with the color scheme explained above. The visualisation is created using folium. One can click into each circle to get the station name.

In [17]:
def add_circle_to_map(map_object, row):
    critical_level = row['critical_level']
    color = critical_mapping[critical_level]
    name = row['name']
    
    folium.CircleMarker(
        location=[row.location_latitude, row.location_longitude],
        radius=15,
        popup=name,
        color=color,
        fill=True,
        fill_color=color
    ).add_to(natal)
    pass

In [18]:
# initial map location; this is to be replaced
initial_location = [51.515289, -0.071977]

In [19]:
natal = folium.Map(location=initial_location, zoom_start=13)

for index, row in df.iterrows():
    add_circle_to_map(natal, row)

add_circle_to_map(natal, row)
    
natal