<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Turku_Finland_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta

# --- CONFIGURATION ---
# Turku Artukainen Station ID
FMISID = "100949"

# 1. DEFINE TASKS
# FMI uses different "Stored Queries" for Weather vs Radiation
queries = [
    {
        "type": "Weather",
        "stored_query": "fmi::observations::weather::simple",
        "parameters": "t2m,ws_10min",  # Temp (C), Wind Speed (m/s)
        "col_map": {"t2m": "Temperature_C", "ws_10min": "Wind_Speed_ms"}
    },
    {
        "type": "Radiation",
        "stored_query": "fmi::observations::radiation::simple",
        "parameters": "GLOB_1MIN",     # Global Radiation 1-min
        "col_map": {"GLOB_1MIN": "Global_Radiation_W/m2"}
    }
]

# Chunk by 7 days (FMI limits request size strictly)
start_date = datetime(2024, 1, 1)
end_date = datetime(2025, 1, 1)
chunks = []
curr = start_date
while curr < end_date:
    nxt = min(curr + timedelta(days=7), end_date)
    chunks.append((curr.strftime('%Y-%m-%dT%H:%M:%SZ'), nxt.strftime('%Y-%m-%dT%H:%M:%SZ')))
    curr = nxt

dfs = []

print("Fetching 2024 Data from Turku Artukainen (FMI)...")

# --- FETCH FUNCTION ---
def get_fmi_data(task):
    all_rows = []
    print(f" > Fetching {task['type']} data...")

    for start, end in chunks:
        # FMI WFS URL Construction
        url = "http://opendata.fmi.fi/wfs"
        params = {
            "service": "WFS",
            "version": "2.0.0",
            "request": "getFeature",
            "storedquery_id": task['stored_query'],
            "fmisid": FMISID,
            "parameters": task['parameters'],
            "starttime": start,
            "endtime": end
        }

        try:
            r = requests.get(url, params=params)
            if r.status_code == 200:
                # Parse XML
                root = ET.fromstring(r.content)

                # FMI 'Simple' format is a flat list of members
                # Namespace handling usually required, but we can search by tag name suffix
                for member in root:
                    # Navigate down to BsWfs:BsWfsElement
                    # Structure: member -> BsWfsElement -> [Time, ParameterName, ParameterValue]
                    # We iterate efficiently:
                    for element in member.iter():
                        if 'BsWfsElement' in element.tag:
                            # Extract data safely
                            timestamp = None
                            param = None
                            val = None

                            for child in element:
                                if 'Time' in child.tag:
                                    timestamp = child.text
                                elif 'ParameterName' in child.tag:
                                    param = child.text
                                elif 'ParameterValue' in child.tag:
                                    val = child.text

                            # Clean Data
                            if val == 'NaN':
                                val = None
                            else:
                                try:
                                    val = float(val)
                                except:
                                    val = None

                            if timestamp and param and val is not None:
                                # Map internal name to friendly name
                                friendly_name = task['col_map'].get(param)
                                if friendly_name:
                                    all_rows.append({'Time': timestamp, 'Type': friendly_name, 'Value': val})
            else:
                print(f"    [!] Error {r.status_code} for {start}")

        except Exception as e:
            print(f"    Error parsing XML: {e}")

        # Be polite to the API
        time.sleep(0.1)

    if all_rows:
        # Pivot the data: Turn 'Type' rows into Columns
        df = pd.DataFrame(all_rows)
        df['Time'] = pd.to_datetime(df['Time'])

        # Pivot: Index=Time, Columns=Type, Values=Value
        df_pivot = df.pivot_table(index='Time', columns='Type', values='Value')
        return df_pivot
    else:
        print(f"    WARNING: No data found for {task['type']}")
        return pd.DataFrame()

# --- EXECUTE ---
for task in queries:
    df_task = get_fmi_data(task)
    if not df_task.empty:
        dfs.append(df_task)

# --- MERGE & SAVE ---
if dfs:
    print("Merging datasets...")
    # Outer join to mix 1-min Radiation with 10-min Weather
    df_final = pd.concat(dfs, axis=1, join='outer')
    df_final = df_final.sort_index()

    # Interpolate (Fill gaps)
    # Limit=15 means we fill up to 15 min gaps (good for 10-min weather data)
    df_final = df_final.interpolate(method='linear', limit=15)

    # Format
    df_final = df_final.reset_index()
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    # Select Columns
    cols_to_keep = ['Date', 'Hour', 'Temperature_C', 'Global_Radiation_W/m2', 'Wind_Speed_ms']
    final_cols = [c for c in cols_to_keep if c in df_final.columns]
    df_final = df_final[final_cols]

    # Filter 2024 only (cleanup edges)
    df_final = df_final[df_final['Date'].str.startswith('2024')]

    print(df_final.head())
    print(f"Rows: {len(df_final)}")

    filename = "Turku_Finland_2024.csv"
    df_final.to_csv(filename, index=False)
    print(f"\nSuccess! Saved to {filename}")
else:
    print("Failed to retrieve data.")

Fetching 2024 Data from Turku Artukainen (FMI)...
 > Fetching Weather data...
