In this notebook, we will analyze the prescience of each forecaster, and optimize the map given and make it more readable.

## Prescience

In [1]:
import pandas as pd
import numpy as np

# Sample data: forecasters' predictions and actual market outcomes
data = {
    'forecaster': ['Forecaster A', 'Forecaster A', 'Forecaster A', 'Forecaster B', 'Forecaster B', 'Forecaster B', 'Forecaster C', 'Forecaster C', 'Forecaster C'],
    'year': [2021, 2022, 2023, 2021, 2022, 2023, 2021, 2022, 2023],
    'predicted_percentage': [5.5, 6.0, 7.2, 6.0, 7.0, 8.0, 5.8, 6.5, 7.5],  # Forecasted percentage changes
    'actual_percentage': [5.0, 6.3, 7.0, 5.8, 6.8, 7.5, 6.0, 6.4, 7.1]       # Actual market outcomes
}

# Convert the data into a DataFrame
df = pd.DataFrame(data)

# Calculate the absolute error and squared error for each prediction
df['absolute_error'] = np.abs(df['predicted_percentage'] - df['actual_percentage'])
df['squared_error'] = (df['predicted_percentage'] - df['actual_percentage']) ** 2

# Group by forecaster to calculate Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE)
grouped = df.groupby('forecaster').agg(
    mae=('absolute_error', 'mean'),
    rmse=('squared_error', lambda x: np.sqrt(x.mean()))
).reset_index()

# Rank forecasters by MAE (lower MAE = better accuracy)
grouped['rank'] = grouped['mae'].rank(method='min')

# Sort forecasters by rank (lowest MAE first)
grouped = grouped.sort_values('rank')

# Display the results: forecaster ranking with MAE and RMSE
print(grouped)


     forecaster       mae      rmse  rank
2  Forecaster C  0.233333  0.264575   1.0
1  Forecaster B  0.300000  0.331662   2.0
0  Forecaster A  0.333333  0.355903   3.0


## Mapping

### Import libraries

In [2]:
import folium
import pandas as pd
from geopy.distance import geodesic

### Optmization
- Different Colors for Distance Ranges: We'll categorize distances and assign different colors for short, medium, and long connections.
- Interactive Popups: Distances will be shown only when hovering over or clicking on the lines.

In [3]:
# Define a function to categorize distances and assign colors
def get_color(distance):
    if distance <= 2:
        return 'green'  # Short distance
    elif 2 < distance <= 5:
        return 'blue'   # Medium distance
    else:
        return 'red'    # Long distance

# Define a function to create a folium map and plot locations with categorized lines and popups
def plot_map(locations):
    # Initialize folium map centered at the mean location
    center_lat = locations['latitude'].mean()
    center_lon = locations['longitude'].mean()
    m = folium.Map(location=[center_lat, center_lon], zoom_start=13, tiles='CartoDB positron')

    # Add markers for each location
    for _, row in locations.iterrows():
        folium.Marker([row['latitude'], row['longitude']], popup=row['name'], 
                      icon=folium.Icon(color='blue')).add_to(m)

    # Draw lines between all points with categorized colors and interactive popups
    for i in range(len(locations)):
        for j in range(i+1, len(locations)):
            point1 = (locations.iloc[i]['latitude'], locations.iloc[i]['longitude'])
            point2 = (locations.iloc[j]['latitude'], locations.iloc[j]['longitude'])
            
            # Calculate the geodesic distance between the two points
            distance = round(geodesic(point1, point2).kilometers, 2)
            color = get_color(distance)  # Get color based on distance
            
            # Draw the line with hover popup showing the distance
            line = folium.PolyLine([point1, point2], color=color, weight=2.5, opacity=0.7)
            line.add_child(folium.Popup(f"Distance: {distance} km", sticky=True))
            m.add_child(line)

    return m

# Precomputed latitude and longitude for the given locations
data = {
    'name': [
        'Stop 1 of University of Melbourne', 
        'Monument Park in Docklands', 
        'Melbourne Central', 
        'Southbank', 
        'South Yarra', 
        'Prahran', 
        'Richmond Library', 
        'Aldi Johnston Street, Fitzroy', 
        'Fitzroy'
    ],
    'latitude': [
        -37.7963,   # Stop 1 of University of Melbourne
        -37.8166,   # Monument Park in Docklands
        -37.8100,   # Melbourne Central
        -37.8226,   # Southbank
        -37.8397,   # South Yarra
        -37.8510,   # Prahran
        -37.8240,   # Richmond Library
        -37.7980,   # Aldi Johnston Street, Fitzroy
        -37.7931    # Fitzroy
    ],
    'longitude': [
        144.9614,   # Stop 1 of University of Melbourne
        144.9425,   # Monument Park in Docklands
        144.9629,   # Melbourne Central
        144.9641,   # Southbank
        144.9927,   # South Yarra
        144.9937,   # Prahran
        144.9984,   # Richmond Library
        144.9850,   # Aldi Johnston Street, Fitzroy
        144.9787    # Fitzroy
    ]
}

# Convert the data into a pandas DataFrame
locations = pd.DataFrame(data)

# Plot the map with categorized lines and interactive popups
map_with_lines = plot_map(locations)

map_with_lines