 # Import libraries

In [1]:
import sys
import os
import pandas as pd
import folium
from folium.plugins import MarkerCluster

# Add project directory path, establish database connection, load data, create data frame

In [2]:
# Add project directory path
sys.path.append(os.path.abspath(".."))

# Establish database connection
from database.db_connection import DatabaseConnector

# Error handling for database connection
try:
    db_connector = DatabaseConnector()
    engine = db_connector.get_engine()
except Exception as e:
    print(f"Error connecting to the database: {e}")
    sys.exit(1)  # Exit program on connection error

# Load data
try:
    forecast_df = pd.read_sql("SELECT datetime, temperature_2m FROM historical_forecast;", engine)
    weather_df = pd.read_sql("SELECT datetime, temperature_2m FROM historical_weather;", engine)
except Exception as e:
    print(f"Error loading data: {e}")
    sys.exit(1)

# Convert datetime format
try:
    forecast_df['datetime'] = pd.to_datetime(forecast_df['datetime'])
    weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])
except Exception as e:
    print(f"Error in datetime formatting: {e}")
    sys.exit(1)

# Define city mapping (add coordinates manually)
cities_coords = [
    {"city": "Zürich", "latitude": 47.3769, "longitude": 8.5417},
    {"city": "Bern", "latitude": 46.9481, "longitude": 7.4474},
    {"city": "Genf", "latitude": 46.2044, "longitude": 6.1432},
    {"city": "Luzern", "latitude": 47.0502, "longitude": 8.3093},
    {"city": "Basel", "latitude": 47.5596, "longitude": 7.5886},
    {"city": "Lausanne", "latitude": 46.5197, "longitude": 6.6323},
    {"city": "St. Gallen", "latitude": 47.4245, "longitude": 9.3767},
    {"city": "Chur", "latitude": 46.8508, "longitude": 9.5311},
    {"city": "Lugano", "latitude": 46.0037, "longitude": 8.9511},
    {"city": "Neuenburg", "latitude": 46.9899, "longitude": 6.9293},
]
city_mapping = pd.DataFrame(cities_coords)

# Add city information to the data
forecast_df['city'] = city_mapping['city']
weather_df['city'] = city_mapping['city']

# Merge forecast and weather data for comparison
merged_df = pd.merge(forecast_df, weather_df, how='inner', on=['datetime', 'city'], suffixes=('_forecast', '_actual'))

# Calculate temperature differences
merged_df['temperature_difference'] = merged_df['temperature_2m_forecast'] - merged_df['temperature_2m_actual']

# Group by city to calculate mean difference and maximum error
city_stats = merged_df.groupby('city').agg({
    'temperature_difference': ['mean', 'max'],
    'temperature_2m_actual': 'max'
}).reset_index()
city_stats.columns = ['city', 'mean_temperature_difference', 'max_temperature_difference', 'max_actual_temperature']

# Display city with highest forecast error
most_error_city = city_stats.loc[city_stats['max_temperature_difference'].idxmax()]
print(f"City with highest forecast error: {most_error_city['city']} with a max difference of {most_error_city['max_temperature_difference']}°C.")

# Visualize forecast accuracy on a map
city_stats = city_stats.merge(city_mapping, on='city', how='left')

City with highest forecast error: Zürich with a max difference of 0.7999999999999998°C.


# Visualize geographical data

In [3]:
# Create a map centered around Switzerland
map = folium.Map(location=[46.8182, 8.2275], zoom_start=8)
marker_cluster = MarkerCluster().add_to(map)

# Add markers for each city with forecast statistics
for _, row in city_stats.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=(f"City: {row['city']}<br>Mean Difference: {row['mean_temperature_difference']:.2f}°C<br>Max Difference: {row['max_temperature_difference']}°C"),
        icon=folium.Icon(color='red' if row['max_temperature_difference'] == most_error_city['max_temperature_difference'] else 'blue', icon="info-sign")
    ).add_to(marker_cluster)

# Display the map
from IPython.display import display
print("Displaying the map...")
display(map)

Displaying the map...


# Interpretation

The map illustrates the accuracy of weather forecasts across Swiss cities by displaying the mean and maximum temperature differences between forecasted and actual data. Each city is represented by a marker, with Zurich highlighted in red as the city with the highest forecast error. Blue markers indicate other cities, showing relatively smaller discrepancies. This visualization helps identify geographical patterns in forecasting accuracy, emphasizing areas where models may need refinement.