In [17]:
!pip install pandas



## Configuration

# Imports and config

In [18]:
import pandas as pd

## API Function Definitions

In [19]:
import requests
from datetime import datetime, timedelta
import time

def get_weather_for_location(lat, lon, date=None):
    """
    Fetch weather data from Open-Meteo API (free, no API key required)
    
    Args:
        lat: Latitude
        lon: Longitude
        date: datetime object for historical weather (if None, gets current weather)
    
    Returns:
        Dictionary with weather data or None if request fails
    """
    try:
        if date:
            # Convert datetime to date-only format (API only accepts YYYY-MM-DD)
            date_str = date.strftime('%Y-%m-%d')
            # Historical weather - use archive API
            url = "https://archive-api.open-meteo.com/v1/archive"
            params = {
                "latitude": lat,
                "longitude": lon,
                "start_date": date_str,
                "end_date": date_str,
                "daily": "temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum,windspeed_10m_max",
                "timezone": "Europe/Malta"
            }
        else:
            # Current weather
            url = "https://api.open-meteo.com/v1/forecast"
            params = {
                "latitude": lat,
                "longitude": lon,
                "current": "temperature_2m,precipitation,rain,windspeed_10m",
                "timezone": "Europe/Malta"
            }
        
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching weather for ({lat}, {lon}): {e}")
        return None

# Test with Valletta coordinates
test_weather = get_weather_for_location(35.8989, 14.5136)
if test_weather:
    print("Successfully connected to weather API!")
    print(f"Sample response structure: {list(test_weather.keys())}")
else:
    print("Failed to connect to weather API")

Successfully connected to weather API!
Sample response structure: ['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'current_units', 'current']


## Weather Data for Accident Locations
Let's fetch weather data for the towns where accidents occurred to analyze potential correlations between weather conditions and accident frequency.

In [20]:

# Load the pre-deduplication CSV which has the coordinates

pre_dedup_path = '../../../data/staging/deduplication/deduplicated_with_coords.csv'
deduplicated_with_coords_df = pd.read_csv(pre_dedup_path)

print(f"Loaded {len(deduplicated_with_coords_df)} accidents with coordinates")
print(f"Columns: {deduplicated_with_coords_df.columns.tolist()}")
print(f"\nSample data:")
display(deduplicated_with_coords_df.head())

Loaded 232 accidents with coordinates
Columns: ['id', 'street', 'city', 'accident_datetime', 'street_latitude', 'street_longitude']

Sample data:


Unnamed: 0,id,street,city,accident_datetime,street_latitude,street_longitude
0,article_496362.0,triq l-imgarr,ghajnsielem,2025-10-10 09:00:00,36.02573,14.29241
1,article_496274.0,triq sant anna,floriana,2025-10-09 13:00:00,35.892863,14.507252
2,article_496202.0,triq il-belt valletta,zurrieq,2025-10-09 09:30:00,35.835041,14.474832
3,article_496206.0,paola roundabout,paola,2025-10-09 00:00:00,,
4,release_52,triq il-kappella ta xaghra,naxxar,2025-10-06 09:30:00,35.91592,14.44063


In [None]:
# Fetch weather data for each accident location and datetime
from datetime import datetime, timedelta
import pandas as pd

print(f"Total accidents to process: {len(deduplicated_with_coords_df)}")
print(f"Fetching weather for each accident location and datetime...\n")

results = []
failed = []

for idx, row in deduplicated_with_coords_df.iterrows():
    accident_id = row['id']
    lat = row['street_latitude']
    lon = row['street_longitude']
    accident_dt = row['accident_datetime']
    
    # Skip if missing coordinates or datetime
    if pd.isna(lat) or pd.isna(lon) or pd.isna(accident_dt):
        failed.append({
            'id': accident_id,
            'reason': 'missing_coordinates_or_datetime'
        })
        continue
    
    # Convert accident_datetime string to datetime object if needed
    if isinstance(accident_dt, str):
        try:
            accident_dt = pd.to_datetime(accident_dt)
        except:
            failed.append({
                'id': accident_id,
                'reason': 'invalid_datetime_format'
            })
            continue
    
    # Fetch weather data
    weather_data = get_weather_for_location(lat, lon, date=accident_dt)
    
    weather_info = {
        'id': accident_id,
        'latitude': lat,
        'longitude': lon,
        'accident_datetime': accident_dt,
        'temperature_max': None,
        'temperature_min': None,
        'temperature_mean': None,
        'precipitation_sum': None,
        'windspeed_max': None,
        'is_raining': False
    }
    
    # Extract weather information
    if weather_data and 'daily' in weather_data:
        daily = weather_data['daily']
        
        # Extract temperature data
        if 'temperature_2m_max' in daily and daily['temperature_2m_max']:
            weather_info['temperature_max'] = daily['temperature_2m_max'][0]
        if 'temperature_2m_min' in daily and daily['temperature_2m_min']:
            weather_info['temperature_min'] = daily['temperature_2m_min'][0]
        if 'temperature_2m_mean' in daily and daily['temperature_2m_mean']:
            weather_info['temperature_mean'] = daily['temperature_2m_mean'][0]
        
        # Extract precipitation data
        rain_amount = 0.0
        if 'precipitation_sum' in daily and daily['precipitation_sum'] is not None:
            precip_amount = daily['precipitation_sum'][0] if daily['precipitation_sum'][0] is not None else 0.0
            weather_info['precipitation_sum'] = precip_amount
            weather_info['is_raining'] = precip_amount > 0
        
        # Extract wind data
        if 'windspeed_10m_max' in daily and daily['windspeed_10m_max']:
            weather_info['windspeed_max'] = daily['windspeed_10m_max'][0]
        
    else:
        failed.append({
            'id': accident_id,
            'reason': 'weather_fetch_failed'
        })
    
    results.append(weather_info)
    
    if (idx + 1) % 50 == 0:
        print(f"Processed {idx + 1} / {len(deduplicated_with_coords_df)} accidents")

# Create results dataframe
weather_df = pd.DataFrame(results)

print(f"\n✓ Successfully fetched weather for {len(weather_df)} accidents")
print(f"✗ Failed for {len(failed)} accidents")

if failed:
    print("\nFailed accidents:")
    for f in failed[:10]:  # Show first 10 failures
        print(f"  {f}")

print(f"\nWeather statistics:")
print(f"  Rainy days: {weather_df['is_raining'].sum()}")
print(f"  Clear days: {(~weather_df['is_raining']).sum()}")
print(f"  Average temperature: {weather_df['temperature_mean'].mean():.1f}°C")
print(f"  Average wind speed: {weather_df['windspeed_max'].mean():.1f} km/h")


display(weather_df.head(10))

Total accidents to process: 232
Fetching weather for each accident location and datetime...

Processed 50 / 232 accidents
Processed 100 / 232 accidents
Processed 150 / 232 accidents
Processed 200 / 232 accidents

✓ Successfully fetched weather for 230 accidents
✗ Failed for 2 accidents

Failed accidents:
  {'id': 'article_496206.0', 'reason': 'missing_coordinates_or_datetime'}
  {'id': 'article_1034.0', 'reason': 'missing_coordinates_or_datetime'}

Weather statistics:
  Rainy days: 75
  Clear days: 155
  Average temperature: 21.2°C
  Average wind speed: 20.4 km/h


Unnamed: 0,id,latitude,longitude,accident_datetime,temperature_max,temperature_min,temperature_mean,precipitation_sum,rain_sum,windspeed_max,is_raining
0,article_496362.0,36.02573,14.29241,2025-10-10 09:00:00,22.7,21.3,22.0,0.0,0.0,13.8,False
1,article_496274.0,35.892863,14.507252,2025-10-09 13:00:00,24.5,18.1,21.2,0.0,0.0,6.7,False
2,article_496202.0,35.835041,14.474832,2025-10-09 09:30:00,23.8,17.4,20.6,0.0,0.0,9.8,False
3,release_52,35.91592,14.44063,2025-10-06 09:30:00,23.4,20.0,22.2,0.3,0.3,35.9,True
4,article_496006.0,35.88498,14.39863,2025-10-05 00:00:00,24.5,18.6,22.2,0.1,0.1,24.9,True
5,release_28,35.84692,14.49538,2025-10-04 23:00:00,22.3,18.2,20.2,0.0,0.0,23.1,False
6,release_93,35.83388,14.43712,2025-10-04 14:30:00,22.1,18.1,20.1,0.0,0.0,26.1,False
7,release_94,35.923432,14.482421,2025-10-04 08:45:00,22.6,18.4,20.4,0.0,0.0,23.1,False
8,release_87,36.02573,14.29241,2025-10-02 05:30:00,25.1,21.3,23.1,1.7,1.7,44.9,True
9,article_495442.0,35.888194,14.463934,2025-10-02 00:00:00,24.0,20.2,22.7,0.1,0.1,30.8,True


In [22]:
# Merge weather data with deduplicated accidents data
deduplicated_with_weather_df = deduplicated_with_coords_df.merge(
    weather_df,
    on='id',
    how='left'
)

# Display sample with weather data
print("\nSample of deduplicated data with weather information:")
display(deduplicated_with_weather_df.head(10))

# Save to CSV
deduplication_data_folder = '../../../data/staging/deduplication'
deduplicated_with_weather_csv = f"{deduplication_data_folder}/deduplicated_with_weather.csv"
deduplicated_with_weather_df.to_csv(deduplicated_with_weather_csv, index=False)

print(f"\n✓ Saved {len(deduplicated_with_weather_df)} incidents to {deduplicated_with_weather_csv}")
print(f"  Columns: {len(deduplicated_with_weather_df.columns)}")
print(f"  Accidents with weather data: {deduplicated_with_weather_df['is_raining'].notna().sum()}")


Sample of deduplicated data with weather information:


Unnamed: 0,id,street,city,accident_datetime_x,street_latitude,street_longitude,latitude,longitude,accident_datetime_y,temperature_max,temperature_min,temperature_mean,precipitation_sum,rain_sum,windspeed_max,is_raining
0,article_496362.0,triq l-imgarr,ghajnsielem,2025-10-10 09:00:00,36.02573,14.29241,36.02573,14.29241,2025-10-10 09:00:00,22.7,21.3,22.0,0.0,0.0,13.8,False
1,article_496274.0,triq sant anna,floriana,2025-10-09 13:00:00,35.892863,14.507252,35.892863,14.507252,2025-10-09 13:00:00,24.5,18.1,21.2,0.0,0.0,6.7,False
2,article_496202.0,triq il-belt valletta,zurrieq,2025-10-09 09:30:00,35.835041,14.474832,35.835041,14.474832,2025-10-09 09:30:00,23.8,17.4,20.6,0.0,0.0,9.8,False
3,article_496206.0,paola roundabout,paola,2025-10-09 00:00:00,,,,,NaT,,,,,,,
4,release_52,triq il-kappella ta xaghra,naxxar,2025-10-06 09:30:00,35.91592,14.44063,35.91592,14.44063,2025-10-06 09:30:00,23.4,20.0,22.2,0.3,0.3,35.9,True
5,article_496006.0,saqqajja hill roundabout,rabat,2025-10-05 00:00:00,35.88498,14.39863,35.88498,14.39863,2025-10-05 00:00:00,24.5,18.6,22.2,0.1,0.1,24.9,True
6,release_28,triq dawret il-gudja,gudja,2025-10-04 23:00:00,35.84692,14.49538,35.84692,14.49538,2025-10-04 23:00:00,22.3,18.2,20.2,0.0,0.0,23.1,False
7,release_93,triq ta lawrenti,siggiewi,2025-10-04 14:30:00,35.83388,14.43712,35.83388,14.43712,2025-10-04 14:30:00,22.1,18.1,20.1,0.0,0.0,26.1,False
8,release_94,triq sant andrija,st julians,2025-10-04 08:45:00,35.923432,14.482421,35.923432,14.482421,2025-10-04 08:45:00,22.6,18.4,20.4,0.0,0.0,23.1,False
9,release_87,triq l-imgarr,qala,2025-10-02 05:30:00,36.02573,14.29241,36.02573,14.29241,2025-10-02 05:30:00,25.1,21.3,23.1,1.7,1.7,44.9,True



✓ Saved 264 incidents to ../../../data/staging/deduplication/deduplicated_with_weather.csv
  Columns: 16
  Accidents with weather data: 262
