In [5]:
# generate a map with bubbles that vary in size according to count no.
import pandas as pd
import folium
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
from IPython.display import display
import numpy as np


def generate_map(df, station_name, count_column, station_latitude, station_longitude, bubble_colour, bubble_radius, num_top_labels=None, num_top_colored=None, colored_bubble_colour=None, num_bottom_colored=None, bottom_colored_bubble_colour=None, imagename=None):
    max_count = df[count_column].max()
    df['relative_size'] = df[count_column] / max_count

    # Add new columns for rank and reverse rank
    df['rank'] = df[count_column].rank(ascending=False, method='dense')
    df['reverse_rank'] = df[count_column].rank(ascending=True, method='dense')

    # Create a folium map centered at the average latitude and longitude
    map_center = [df[station_latitude].mean(), df[station_longitude].mean()]
    bike_map = folium.Map(location=map_center, zoom_start=14)

    for index, row in df.iterrows():
        # Get the station coordinates and count
        station_lat = row[station_latitude]
        station_lon = row[station_longitude]
        bike_count = row['relative_size']
        rank = int(row['rank'])
        reverse_rank = int(row['reverse_rank'])
        station = row[station_name]  # Station name

        # Determine the color based on top or bottom coloring
        if num_top_colored and rank <= num_top_colored:
            color = colored_bubble_colour
        elif num_bottom_colored and reverse_rank <= num_bottom_colored:
            color = bottom_colored_bubble_colour
        else:
            color = bubble_colour

        # Create a circle marker for each station with a popup
        marker = folium.CircleMarker(
            location=[station_lat, station_lon],
            radius=bike_count * bubble_radius,  # Adjust the scaling factor as needed
            color=color,
            fill=True,
            fill_opacity=1,
        )
        
        # Add a popup with the station name
        popup = folium.Popup(station, parse_html=True)
        marker.add_child(popup)
        
        marker.add_to(bike_map)

        # Add label with the count value for the top and bottom stations
        if num_top_labels and rank <= num_top_labels:
            label_lat = station_lat + 0.0005  # Adjust the label position as needed
            label_lon = station_lon
            label = f"{rank}"  # Display rank as the label
            folium.Marker(
                location=[label_lat, label_lon],
                icon=folium.DivIcon(html=f'<div style="font-size: 10pt; color: black;">{label}</div>')
            ).add_to(bike_map)

  # Save the map to HTML if imagename is provided
    if imagename:
        bike_map.save(imagename + ".html")

    # Display the map
    return bike_map

In [3]:
# function that show map routes via a line 

def interpolate_color_along_line(color1, color2, num_points):
    c1_rgb = mcolors.hex2color(color1)
    c2_rgb = mcolors.hex2color(color2)

    interpolated_colors = [
        mcolors.rgb2hex(tuple((1 - i / (num_points - 1)) * c1 + (i / (num_points - 1)) * c2 for c1, c2 in zip(c1_rgb, c2_rgb)))
        for i in range(num_points)
    ]

    return interpolated_colors

def visualize_routes_on_map(input_df):
    # Create a map centered around the average latitude and longitude
    avg_lat = input_df[['start station latitude', 'end station latitude']].mean().mean()
    avg_lon = input_df[['start station longitude', 'end station longitude']].mean().mean()
    bike_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=14)

    # Check if the maximum count is zero to avoid division by zero
    max_count = input_df['count'].max()
    if max_count == 0:
        max_count = 1  # Set a default value to avoid division by zero

    # Define a color gradient
    color1 = 'blue'
    color2 = 'red'

    # Add a new column for labels
    input_df['label'] = input_df['count'].rank(ascending=False, method='max').astype(int)

    # Iterate over all routes to draw lines and add labels
    for index, row in input_df.iterrows():
        start_lat, start_lon = row['start station latitude'], row['start station longitude']
        end_lat, end_lon = row['end station latitude'], row['end station longitude']
        count = row['count']
        label = row['label']

        # Calculate the line width based on the count ratio
        line_width_ratio = count * 1.5 / max_count

        # Calculate the normalized count for gradient color
        normalized_count = count / max_count

        # Interpolate the color along the line based on the start and end stations
        num_points = 10  # You can adjust the number of points for smoother color transition
        line_colors = interpolate_color_along_line(color1, color2, num_points)

        # Draw a series of small line segments with different colors and labels
        for i in range(num_points - 1):
            color_range = line_colors[i:i+2]
            color = [color_range[0], color_range[1]]

            # Calculate intermediate coordinates for the small line segment
            segment_coords = list(zip(np.linspace(start_lat, end_lat, num_points), np.linspace(start_lon, end_lon, num_points)))

            # Draw a small line segment on the map with adjusted line width
            folium.PolyLine(
                locations=segment_coords[i:i+2],
                color=color[1],
                weight=line_width_ratio * 10,  # Adjusting line width based on count ratio (you can adjust the scaling factor)
                opacity=0.7
            ).add_to(bike_map)

            # Calculate label position
            label_lat, label_lon = segment_coords[num_points // 2]  # Middle point of the line segment

            # Add label with the count value
            folium.Marker(
                location=[label_lat, label_lon],
                icon=folium.DivIcon(html=f'<div style="font-size: 10pt; color: black;">{label}</div>')
            ).add_to(bike_map)

    # Display the map in the Python environment
    display(bike_map)

In [35]:
def connections_map(df=None, start_lat=None, start_long=None, end_lat=None, end_long=None):
    bike_map = folium.Map(location=[unique_stations['latitude'].mean(), unique_stations['longitude'].mean()], zoom_start=14)

    for index, row in unique_stations.iterrows():
        marker = folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=7,
            color='green',
            fill=True,
            fill_opacity=1,
        )
        # Add the marker to the map
        marker.add_to(bike_map)

        popup = folium.Popup(row['station name'], parse_html=True)
        marker.add_child(popup)

    if df is not None and start_lat is not None and start_long is not None and end_lat is not None and end_long is not None:
        for index, row in df.iterrows():
            start_station = (row[start_lat], row[start_long])
            end_station = (row[end_lat], row[end_long])

            # Create a PolyLine connecting the start and end stations
            folium.PolyLine(locations=[start_station, end_station], color='blue').add_to(bike_map)

    return bike_map

In [37]:
def filter_summed_routes(summed_routes_df, desired_station, threshold):
    routes = summed_routes_df[
        (summed_routes_df['start station name'] == desired_station) |
        (summed_routes_df['end station name'] == desired_station)
    ]

    # Use .loc to create a copy
    routes_copy = routes.loc[:, :].copy()
    routes_copy['cumulative_sum'] = routes_copy['count'].cumsum()

    # Calculate the total sum of counts
    total_counts = routes_copy['count'].sum()

    # Calculate the threshold 
    threshold1 = threshold * total_counts

    # Filter the DataFrame
    filtered_routes = routes_copy[routes_copy['cumulative_sum'] <= threshold1]

    # Drop the cumulative_sum column if you don't need it
    filtered_routes = filtered_routes.drop(columns=['cumulative_sum'])

    return filtered_routes

In [6]:
def filter_summed_routes(summed_routes_df, desired_station, threshold):
    routes = summed_routes_df[
        (summed_routes_df['start station name'] == desired_station) |
        (summed_routes_df['end station name'] == desired_station)
    ]

    # Use .loc to create a copy
    routes_copy = routes.loc[:, :].copy()
    routes_copy['cumulative_sum'] = routes_copy['count'].cumsum()

    # Calculate the total sum of counts
    total_counts = routes_copy['count'].sum()

    # Calculate the threshold 
    threshold1 = threshold * total_counts

    # Filter the DataFrame
    filtered_routes = routes_copy[routes_copy['cumulative_sum'] <= threshold1]

    # Drop the cumulative_sum column if you don't need it
    filtered_routes = filtered_routes.drop(columns=['cumulative_sum'])

    return filtered_routes